diff --git a/.gitignore b/.gitignore index 7810562acd61a05ef0d9e39e4e69cf9e8b2a1fde..8de22b3bb0085a0848762c7d073404b89461a636 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,8 @@ deploy *.suo .vs/ .vscode/ +*.blg +*.bbl +*.pyc +/out/ +/doc/_minted-software-manual/ diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100644 index 0000000000000000000000000000000000000000..e578adc74d6e0b60ad709004c5a87a64ac98b960 --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,59 @@ +stages: + - build + +.build_template: + stage: build + script: + - make realclean + - make all + only: + refs: + - master + - merge_requests + variables: + - $CI_PROJECT_URL == 'https://vcgit.hhi.fraunhofer.de/jvet/VVCSoftware_VTM' + - $CI_MERGE_REQUEST_TARGET_BRANCH_NAME == 'master' + +.build_template_linux: + extends: .build_template + script: + - make realclean + - make all + - make realclean + - make linuxbuild enable-tracing=true + +#build_macos: +# extends: .build_template +# tags: +# - macos + +build_ubuntu1604: + extends: .build_template_linux + tags: + - ubuntu1604 + +build_ubuntu1804: + extends: .build_template_linux + tags: + - ubuntu1804 + +build_ubuntu1804-gcc8: + extends: .build_template_linux + script: + - make realclean + - make all toolset=gcc-8 + - make realclean + - make linuxbuild enable-tracing=true toolset=gcc-8 + tags: + - ubuntu1804-gcc8 + +build_vc191x: + extends: .build_template + tags: + - vc191x + +build_vc192x: + extends: .build_template + tags: + - vc192x + diff --git a/CMakeLists.txt b/CMakeLists.txt index 2b9737cc9885c4bda03c5cd1bced0182d9d3d0d3..574707d86a77b06cee8e523cf691a8ca388bcb3e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -28,6 +28,7 @@ if( CMAKE_SYSTEM_NAME STREQUAL "Linux" ) endif() set( EXTENSION_360_VIDEO OFF CACHE BOOL "If EXTENSION_360_VIDEO is on, 360Lib will be added" ) +set( EXTENSION_HDRTOOLS OFF CACHE BOOL "If EXTENSION_HDRTOOLS is on, HDRLib will be added" ) set( SET_ENABLE_TRACING OFF CACHE BOOL "Set ENABLE_TRACING as a compiler flag" ) set( ENABLE_TRACING OFF CACHE BOOL "If SET_ENABLE_TRACING is on, it will be set to this value" ) @@ -112,6 +113,7 @@ if( CMAKE_CXX_COMPILER_ID STREQUAL "GNU" ) if( USE_ADDRESS_SANITIZER ) # add compile options add_compile_options( "-fsanitize=address" ) + add_link_options( "-fsanitize=address" ) endif() endif() @@ -136,6 +138,9 @@ if( EXTENSION_360_VIDEO ) add_subdirectory( "source/Lib/Lib360" ) add_subdirectory( "source/Lib/AppEncHelper360" ) endif() +if ( EXTENSION_HDRTOOLS ) + add_subdirectory( "source/Lib/HDRLib") +endif() add_subdirectory( "source/Lib/DecoderAnalyserLib" ) add_subdirectory( "source/Lib/DecoderLib" ) add_subdirectory( "source/Lib/EncoderLib" ) @@ -146,6 +151,7 @@ add_subdirectory( "source/App/DecoderApp" ) add_subdirectory( "source/App/EncoderApp" ) add_subdirectory( "source/App/SEIRemovalApp" ) add_subdirectory( "source/App/Parcat" ) +add_subdirectory( "source/App/StreamMergeApp" ) if( EXTENSION_360_VIDEO ) add_subdirectory( "source/App/utils/360ConvertApp" ) endif() diff --git a/COPYING b/COPYING index 0227899c94523ffb16b26e6a85ff55add99123e4..a328b7da34cb542dd0137b58b0380080de67f97a 100644 --- a/COPYING +++ b/COPYING @@ -3,7 +3,7 @@ License, included below. This software may be subject to other third party and contributor rights, including patent rights, and no such rights are granted under this license.  -Copyright (c) 2010-2019, ITU/ISO/IEC +Copyright (c) 2010-2020, ITU/ISO/IEC All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/Makefile b/Makefile index d61744bed9a7926118b9107c9e59384d12a3126d..c48915825cb53879040adfad525f555ed0770dbf 100644 --- a/Makefile +++ b/Makefile @@ -17,7 +17,7 @@ BUILD_SCRIPT := $(CURDIR)/cmake/CMakeBuild/bin/cmake.py # TARGETS := CommonLib DecoderAnalyserApp DecoderAnalyserLib DecoderApp DecoderLib -TARGETS += EncoderApp EncoderLib Utilities SEIRemovalApp +TARGETS += EncoderApp EncoderLib Utilities SEIRemovalApp StreamMergeApp ifeq ($(OS),Windows_NT) ifneq ($(MSYSTEM),) diff --git a/README.md b/README.md index b695964769c896c17c0b5f6690e79da19d7c0ca3..d2853b793a91c0f9554b6f081da57508b242888e 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,21 @@ -How to build VTM -================ +VTM reference software for VVC +============================== + +This software package is the reference software for Versatile Video Coding (VVC). The reference software includes both encoder and decoder functionality. + +Reference software is useful in aiding users of a video coding standard to establish and test conformance and interoperability, and to educate users and demonstrate the capabilities of the standard. For these purposes, this software is provided as an aid for the study and implementation of Versatile Video Coding. + +The software has been jointly developed by the ITU-T Video Coding Experts Group (VCEG, Question 6 of ITU-T Study Group 16) and the ISO/IEC Moving Picture Experts Group (MPEG, Working Group 11 of Subcommittee 29 of ISO/IEC Joint Technical Committee 1). + +A software manual, which contains usage instructions, can be found in the "doc" subdirectory of this software package. + +Build instructions +================== + +The CMake tool is used to create platform-specific build files. + +Although CMake may be able to generate 32-bit binaries, **it is generally suggested to build 64-bit binaries**. 32-bit binaries are not able to access more than 2GB of RAM, which will not be sufficient for coding larger image formats. Building in 32-bit environments is not tested and will not be supported. -The software uses CMake to create platform-specific build files. Build instructions for plain CMake (suggested) ---------------------------------------------- @@ -21,20 +35,38 @@ mkdir build Use one of the following CMake commands, based on your platform. Feel free to change the commands to satisfy your needs. -**Windows Visual Studio 2015 64 Bit:** +**Windows Visual Studio 2015/17/19 64 Bit:** + +Use the proper generator string for generating Visual Studio files, e.g. for VS 2015: + ```bash cd build cmake .. -G "Visual Studio 14 2015 Win64" ``` + Then open the generated solution file in MS Visual Studio. +For VS 2017 use "Visual Studio 15 2017 Win64", for VS 2019 use "Visual Studio 16 2019". + +Visual Studio 2019 also allows you to open the CMake directory directly. Choose "File->Open->CMake" for this option. + **macOS Xcode:** + +For generating an Xcode workspace type: ```bash cd build cmake .. -G "Xcode" ``` Then open the generated work space in Xcode. +For generating Makefiles with optional non-default compilers, use the following commands: + +```bash +cd build +cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=gcc-9 -DCMAKE_CXX_COMPILER=g++-9 +``` +In this example the brew installed GCC 9 is used for a release build. + **Linux** For generating Linux Release Makefile: @@ -68,16 +100,21 @@ To use the default system compiler simply call: ```bash make all ``` -For MSYS2 and MinGW: Open an MSYS MinGW 64-Bit terminal and change into the root directory of this project. + + +**MSYS2 and MinGW (Windows)** + +**Note:** Build files for MSYS MinGW were added on request. The build platform is not regularily tested and can't be supported. + +Open an MSYS MinGW 64-Bit terminal and change into the root directory of this project. Call: ```bash make all toolset=gcc ``` +The following tools need to be installed for MSYS2 and MinGW: -Tool Installation on Windows ----------------------------- Download CMake: http://www.cmake.org/ and install it. Python and GnuWin32 are not mandatory, but they simplify the build process for the user. diff --git a/cfg/444/yuv444.cfg b/cfg/444/yuv444.cfg new file mode 100644 index 0000000000000000000000000000000000000000..c64afd759ff18af972a5a350252607a7b61815d6 --- /dev/null +++ b/cfg/444/yuv444.cfg @@ -0,0 +1 @@ +BDPCM: 2 diff --git a/cfg/encoder_intra_vtm.cfg b/cfg/encoder_intra_vtm.cfg index 80c2ee8475c742931beeb0d4d6cdec678d865318..fc8224aa6b470a2436a133437c31a55080b73ca8 100644 --- a/cfg/encoder_intra_vtm.cfg +++ b/cfg/encoder_intra_vtm.cfg @@ -3,7 +3,7 @@ BitstreamFile : str.bin ReconFile : rec.yuv #======== Profile ================ -Profile : next +Profile : auto #======== Unit definition ================ MaxCUWidth : 64 # Maximum coding unit width in pixel @@ -48,37 +48,20 @@ TransformSkipFast : 1 # Fast Transform skipping (0: OFF, 1 TransformSkipLog2MaxSize : 5 SAOLcuBoundary : 0 # SAOLcuBoundary using non-deblocked pixels (0: OFF, 1: ON) -#============ Slices ================ -SliceMode : 0 # 0: Disable all slice options. - # 1: Enforce maximum number of LCU in an slice, - # 2: Enforce maximum number of bytes in an 'slice' - # 3: Enforce maximum number of tiles in a slice -SliceArgument : 1500 # Argument for 'SliceMode'. - # If SliceMode==1 it represents max. SliceGranularity-sized blocks per slice. - # If SliceMode==2 it represents max. bytes per slice. - # If SliceMode==3 it represents max. tiles per slice. - -LFCrossSliceBoundaryFlag : 1 # In-loop filtering, including ALF and DB, is across or not across slice boundary. - # 0:not across, 1: across - -#============ PCM ================ -PCMEnabledFlag : 0 # 0: No PCM mode -PCMLog2MaxSize : 5 # Log2 of maximum PCM block size. -PCMLog2MinSize : 3 # Log2 of minimum PCM block size. -PCMInputBitDepthFlag : 1 # 0: PCM bit-depth is internal bit-depth. 1: PCM bit-depth is input bit-depth. -PCMFilterDisableFlag : 0 # 0: Enable loop filtering on I_PCM samples. 1: Disable loop filtering on I_PCM samples. - -#============ Lossless ================ -TransquantBypassEnable : 0 # Value of PPS flag. -CUTransquantBypassFlagForce: 0 # Force transquant bypass mode, when transquant_bypass_enable_flag is enabled +#============ Tiles / Slices ================ +EnablePicPartitioning : 0 # Enable picture partitioning (0: single tile, single slice, 1: multiple tiles/slices can be used) #============ VTM settings ====================== LoopFilterTcOffset_div2 : 0 SEIDecodedPictureHash : 0 -CbQpOffset : 1 -CrQpOffset : 1 +CbQpOffset : 0 +CrQpOffset : 0 +SameCQPTablesForAllChroma : 1 +QpInValCb : 1 31 43 +QpOutValCb : 1 32 41 TemporalSubsampleRatio : 8 +ReWriteParamSets : 1 #============ NEXT ==================== # General @@ -89,14 +72,16 @@ DualITree : 1 # separate partitioning of luma and chroma MinQTLumaISlice : 8 MinQTChromaISlice : 4 MinQTNonISlice : 8 -MaxBTDepth : 3 -MaxBTDepthISliceL : 3 -MaxBTDepthISliceC : 3 +MaxMTTHierarchyDepth : 3 +MaxMTTHierarchyDepthISliceL : 3 +MaxMTTHierarchyDepthISliceC : 3 MTS : 1 -MTSIntraMaxCand : 3 +MTSIntraMaxCand : 4 MTSInterMaxCand : 4 SBT : 1 +LFNST : 1 +ISP : 1 Affine : 1 SubPuMvp : 1 MaxNumMergeCand : 6 @@ -104,16 +89,25 @@ LMChroma : 1 # use CCLM only DepQuant : 1 IMV : 1 ALF : 1 -IBC : 0 # turned off in CTC +IBC : 0 # turned off in CTC AllowDisFracMMVD : 1 AffineAmvr : 0 -LumaReshapeEnable : 1 # luma reshaping. 0: disable 1:enable +LMCSEnable : 1 # LMCS: 0: disable, 1:enable +LMCSSignalType : 0 # Input signal type: 0:SDR, 1:HDR-PQ, 2:HDR-HLG +LMCSUpdateCtrl : 1 # LMCS model update control: 0:RA, 1:AI, 2:LDB/LDP +LMCSOffset : 2 # chroma residual scaling offset +MRL : 1 +MIP : 1 +JointCbCr : 1 # joint coding of chroma residuals (if available): 0: disable, 1: enable +ChromaTS : 1 # Fast tools PBIntraFast : 1 ISPFast : 1 FastMrg : 1 AMaxBT : 1 +FastMIP : 1 +FastLFNST : 1 # Encoder optimization tools AffineAmvrEncOpt : 0 diff --git a/cfg/encoder_lowdelay_P_vtm.cfg b/cfg/encoder_lowdelay_P_vtm.cfg index 6d345e4c713025c432e061cee682f32d7c7eb320..e0a5088970fe37e903e67447a2c09bc95910ffdb 100644 --- a/cfg/encoder_lowdelay_P_vtm.cfg +++ b/cfg/encoder_lowdelay_P_vtm.cfg @@ -3,7 +3,7 @@ BitstreamFile : str.bin ReconFile : rec.yuv #======== Profile ================ -Profile : next +Profile : auto #======== Unit definition ================ MaxCUWidth : 64 # Maximum coding unit width in pixel @@ -13,15 +13,19 @@ MaxPartitionDepth : 4 # Maximum coding unit depth #======== Coding Structure ============= IntraPeriod : -1 # Period of I-Frame ( -1 = only first) DecodingRefreshType : 0 # Random Accesss 0:none, 1:CRA, 2:IDR, 3:Recovery Point SEI -GOPSize : 4 # GOP Size (number of B slice = GOPSize-1) +GOPSize : 8 # GOP Size (number of B slice = GOPSize-1) IntraQPOffset : -1 LambdaFromQpEnable : 1 # see JCTVC-X0038 for suitable parameters for IntraQPOffset, QPoffset, QPOffsetModelOff, QPOffsetModelScale when enabled -# Type POC QPoffset QPOffsetModelOff QPOffsetModelScale CbQPoffset CrQPoffset QPfactor tcOffsetDiv2 betaOffsetDiv2 temporal_id #ref_pics_active #ref_pics reference pictures predict deltaRPS #ref_idcs reference idcs -Frame1: P 1 5 -6.5 0.2590 0 0 1.0 0 0 0 4 4 -1 -5 -9 -13 0 -Frame2: P 2 4 -6.5 0.2590 0 0 1.0 0 0 0 4 4 -1 -2 -6 -10 1 -1 5 1 1 1 0 1 -Frame3: P 3 5 -6.5 0.2590 0 0 1.0 0 0 0 4 4 -1 -3 -7 -11 1 -1 5 0 1 1 1 1 -Frame4: P 4 1 0.0 0.0 0 0 1.0 0 0 0 4 4 -1 -4 -8 -12 1 -1 5 0 1 1 1 1 +# Type POC QPoffset QPOffsetModelOff QPOffsetModelScale CbQPoffset CrQPoffset QPfactor tcOffsetDiv2 betaOffsetDiv2 temporal_id #ref_pics_active_L0 #ref_pics_L0 reference_pictures_L0 #ref_pics_active_L1 #ref_pics_L1 reference_pictures_L1 +Frame1: P 1 5 -6.5 0.2590 0 0 1.0 0 0 0 4 4 1 9 17 25 0 0 +Frame2: P 2 4 -6.5 0.2590 0 0 1.0 0 0 0 4 4 1 2 10 18 0 0 +Frame3: P 3 5 -6.5 0.2590 0 0 1.0 0 0 0 4 4 1 3 11 19 0 0 +Frame4: P 4 4 -6.5 0.2590 0 0 1.0 0 0 0 4 4 1 4 12 20 0 0 +Frame5: P 5 5 -6.5 0.2590 0 0 1.0 0 0 0 4 4 1 5 13 21 0 0 +Frame6: P 6 4 -6.5 0.2590 0 0 1.0 0 0 0 4 4 1 6 14 22 0 0 +Frame7: P 7 5 -6.5 0.2590 0 0 1.0 0 0 0 4 4 1 7 15 23 0 0 +Frame8: P 8 1 0.0 0.0 0 0 1.0 0 0 0 4 4 1 8 16 24 0 0 #=========== Motion Search ============= FastSearch : 1 # 0:Full search 1:TZ search @@ -56,29 +60,13 @@ TransformSkipFast : 1 # Fast Transform skipping (0: OFF, 1 TransformSkipLog2MaxSize : 5 SAOLcuBoundary : 0 # SAOLcuBoundary using non-deblocked pixels (0: OFF, 1: ON) -#============ Slices ================ -SliceMode : 0 # 0: Disable all slice options. - # 1: Enforce maximum number of LCU in an slice, - # 2: Enforce maximum number of bytes in an 'slice' - # 3: Enforce maximum number of tiles in a slice -SliceArgument : 1500 # Argument for 'SliceMode'. - # If SliceMode==1 it represents max. SliceGranularity-sized blocks per slice. - # If SliceMode==2 it represents max. bytes per slice. - # If SliceMode==3 it represents max. tiles per slice. - -LFCrossSliceBoundaryFlag : 1 # In-loop filtering, including ALF and DB, is across or not across slice boundary. - # 0:not across, 1: across - -#============ PCM ================ -PCMEnabledFlag : 0 # 0: No PCM mode -PCMLog2MaxSize : 5 # Log2 of maximum PCM block size. -PCMLog2MinSize : 3 # Log2 of minimum PCM block size. -PCMInputBitDepthFlag : 1 # 0: PCM bit-depth is internal bit-depth. 1: PCM bit-depth is input bit-depth. -PCMFilterDisableFlag : 0 # 0: Enable loop filtering on I_PCM samples. 1: Disable loop filtering on I_PCM samples. - -#============ Lossless ================ -TransquantBypassEnable : 0 # Value of PPS flag. -CUTransquantBypassFlagForce: 0 # Force transquant bypass mode, when transquant_bypass_enable_flag is enabled +#=========== TemporalFilter ================= +TemporalFilter : 0 # Enable/disable GOP Based Temporal Filter +TemporalFilterFutureReference : 0 # Enable/disable reading future frames +TemporalFilterStrengthFrame4 : 0.4 # Enable filter at every 4th frame with strength + +#============ Tiles / Slices ================ +EnablePicPartitioning : 0 # Enable picture partitioning (0: single tile, single slice, 1: multiple tiles/slices can be used) #============ Rate Control ====================== RateControl : 0 # Rate control: enable rate control @@ -92,9 +80,12 @@ RCForceIntraQP : 0 # Rate control: force int #============ VTM settings ====================== LoopFilterTcOffset_div2 : 0 SEIDecodedPictureHash : 0 -CbQpOffset : 1 -CrQpOffset : 1 - +CbQpOffset : 0 +CrQpOffset : 0 +SameCQPTablesForAllChroma : 1 +QpInValCb : 32 44 +QpOutValCb : 32 41 +ReWriteParamSets : 1 #============ NEXT ==================== # General @@ -105,14 +96,15 @@ DualITree : 1 # separate partitioning of luma and chroma MinQTLumaISlice : 8 MinQTChromaISlice : 4 MinQTNonISlice : 8 -MaxBTDepth : 3 -MaxBTDepthISliceL : 3 -MaxBTDepthISliceC : 3 +MaxMTTHierarchyDepth : 3 +MaxMTTHierarchyDepthISliceL : 3 +MaxMTTHierarchyDepthISliceC : 3 MTS : 1 MTSIntraMaxCand : 3 MTSInterMaxCand : 4 SBT : 1 +ISP : 1 Affine : 1 SubPuMvp : 1 MaxNumMergeCand : 6 @@ -120,22 +112,34 @@ LMChroma : 1 # use CCLM only DepQuant : 1 IMV : 1 ALF : 1 -MHIntra : 1 +CIIP : 1 IBC : 0 # turned off in CTC AllowDisFracMMVD : 1 AffineAmvr : 0 -LumaReshapeEnable : 1 # luma reshaping. 0: disable 1:enable +LMCSEnable : 1 # LMCS: 0: disable, 1:enable +LMCSSignalType : 0 # Input signal type: 0:SDR, 1:HDR-PQ, 2:HDR-HLG +LMCSUpdateCtrl : 2 # LMCS model update control: 0:RA, 1:AI, 2:LDB/LDP +LMCSOffset : 1 # chroma residual scaling offset +MRL : 1 +MIP : 0 +JointCbCr : 1 # joint coding of chroma residuals (if available): 0: disable, 1: enable +PROF : 1 +PPSorSliceMode : 3 +ChromaTS : 1 # Fast tools PBIntraFast : 1 -ISPFast : 1 +ISPFast : 0 FastMrg : 1 AMaxBT : 1 +FastMIP : 0 +FastLocalDualTreeMode : 2 # Encoder optimization tools AffineAmvrEncOpt : 0 - +MmvdDisNum : 6 ### DO NOT ADD ANYTHING BELOW THIS LINE ### ### DO NOT DELETE THE EMPTY LINE BELOW ### + diff --git a/cfg/encoder_lowdelay_vtm.cfg b/cfg/encoder_lowdelay_vtm.cfg index 07233c18980035d3eb60cdb6a464c17e2b115899..2b449a99efbcb37256ed42fe5b7420dbf146e14a 100644 --- a/cfg/encoder_lowdelay_vtm.cfg +++ b/cfg/encoder_lowdelay_vtm.cfg @@ -3,7 +3,7 @@ BitstreamFile : str.bin ReconFile : rec.yuv #======== Profile ================ -Profile : next +Profile : auto #======== Unit definition ================ MaxCUWidth : 64 # Maximum coding unit width in pixel @@ -13,15 +13,19 @@ MaxPartitionDepth : 4 # Maximum coding unit depth #======== Coding Structure ============= IntraPeriod : -1 # Period of I-Frame ( -1 = only first) DecodingRefreshType : 0 # Random Accesss 0:none, 1:CRA, 2:IDR, 3:Recovery Point SEI -GOPSize : 4 # GOP Size (number of B slice = GOPSize-1) +GOPSize : 8 # GOP Size (number of B slice = GOPSize-1) IntraQPOffset : -1 LambdaFromQpEnable : 1 # see JCTVC-X0038 for suitable parameters for IntraQPOffset, QPoffset, QPOffsetModelOff, QPOffsetModelScale when enabled -# Type POC QPoffset QPOffsetModelOff QPOffsetModelScale CbQPoffset CrQPoffset QPfactor tcOffsetDiv2 betaOffsetDiv2 temporal_id #ref_pics_active #ref_pics reference pictures predict deltaRPS #ref_idcs reference idcs -Frame1: B 1 5 -6.5 0.2590 0 0 1.0 0 0 0 4 4 -1 -5 -9 -13 0 -Frame2: B 2 4 -6.5 0.2590 0 0 1.0 0 0 0 4 4 -1 -2 -6 -10 1 -1 5 1 1 1 0 1 -Frame3: B 3 5 -6.5 0.2590 0 0 1.0 0 0 0 4 4 -1 -3 -7 -11 1 -1 5 0 1 1 1 1 -Frame4: B 4 1 0.0 0.0 0 0 1.0 0 0 0 4 4 -1 -4 -8 -12 1 -1 5 0 1 1 1 1 +# Type POC QPoffset QPOffsetModelOff QPOffsetModelScale CbQPoffset CrQPoffset QPfactor tcOffsetDiv2 betaOffsetDiv2 temporal_id #ref_pics_active_L0 #ref_pics_L0 reference_pictures_L0 #ref_pics_active_L1 #ref_pics_L1 reference_pictures_L1 +Frame1: B 1 5 -6.5 0.2590 0 0 1.0 0 0 0 4 4 1 9 17 25 4 4 1 9 17 25 +Frame2: B 2 4 -6.5 0.2590 0 0 1.0 0 0 0 4 4 1 2 10 18 4 4 1 2 10 18 +Frame3: B 3 5 -6.5 0.2590 0 0 1.0 0 0 0 4 4 1 3 11 19 4 4 1 3 11 19 +Frame4: B 4 4 -6.5 0.2590 0 0 1.0 0 0 0 4 4 1 4 12 20 4 4 1 4 12 20 +Frame5: B 5 5 -6.5 0.2590 0 0 1.0 0 0 0 4 4 1 5 13 21 4 4 1 5 13 21 +Frame6: B 6 4 -6.5 0.2590 0 0 1.0 0 0 0 4 4 1 6 14 22 4 4 1 6 14 22 +Frame7: B 7 5 -6.5 0.2590 0 0 1.0 0 0 0 4 4 1 7 15 23 4 4 1 7 15 23 +Frame8: B 8 1 0.0 0.0 0 0 1.0 0 0 0 4 4 1 8 16 24 4 4 1 8 16 24 #=========== Motion Search ============= FastSearch : 1 # 0:Full search 1:TZ search @@ -56,29 +60,13 @@ TransformSkipFast : 1 # Fast Transform skipping (0: OFF, 1 TransformSkipLog2MaxSize : 5 SAOLcuBoundary : 0 # SAOLcuBoundary using non-deblocked pixels (0: OFF, 1: ON) -#============ Slices ================ -SliceMode : 0 # 0: Disable all slice options. - # 1: Enforce maximum number of LCU in an slice, - # 2: Enforce maximum number of bytes in an 'slice' - # 3: Enforce maximum number of tiles in a slice -SliceArgument : 1500 # Argument for 'SliceMode'. - # If SliceMode==1 it represents max. SliceGranularity-sized blocks per slice. - # If SliceMode==2 it represents max. bytes per slice. - # If SliceMode==3 it represents max. tiles per slice. - -LFCrossSliceBoundaryFlag : 1 # In-loop filtering, including ALF and DB, is across or not across slice boundary. - # 0:not across, 1: across - -#============ PCM ================ -PCMEnabledFlag : 0 # 0: No PCM mode -PCMLog2MaxSize : 5 # Log2 of maximum PCM block size. -PCMLog2MinSize : 3 # Log2 of minimum PCM block size. -PCMInputBitDepthFlag : 1 # 0: PCM bit-depth is internal bit-depth. 1: PCM bit-depth is input bit-depth. -PCMFilterDisableFlag : 0 # 0: Enable loop filtering on I_PCM samples. 1: Disable loop filtering on I_PCM samples. - -#============ Lossless ================ -TransquantBypassEnable : 0 # Value of PPS flag. -CUTransquantBypassFlagForce: 0 # Force transquant bypass mode, when transquant_bypass_enable_flag is enabled +#=========== TemporalFilter ================= +TemporalFilter : 0 # Enable/disable GOP Based Temporal Filter +TemporalFilterFutureReference : 0 # Enable/disable reading future frames +TemporalFilterStrengthFrame4 : 0.4 # Enable filter at every 4th frame with strength + +#============ Tiles / Slices ================ +EnablePicPartitioning : 0 # Enable picture partitioning (0: single tile, single slice, 1: multiple tiles/slices can be used) #============ Rate Control ====================== RateControl : 0 # Rate control: enable rate control @@ -92,9 +80,12 @@ RCForceIntraQP : 0 # Rate control: force int #============ VTM settings ====================== LoopFilterTcOffset_div2 : 0 SEIDecodedPictureHash : 0 -CbQpOffset : 1 -CrQpOffset : 1 - +CbQpOffset : 0 +CrQpOffset : 0 +SameCQPTablesForAllChroma : 1 +QpInValCb : 32 44 +QpOutValCb : 32 41 +ReWriteParamSets : 1 #============ NEXT ==================== # General @@ -105,14 +96,16 @@ DualITree : 1 # separate partitioning of luma and chroma MinQTLumaISlice : 8 MinQTChromaISlice : 4 MinQTNonISlice : 8 -MaxBTDepth : 3 -MaxBTDepthISliceL : 3 -MaxBTDepthISliceC : 3 +MaxMTTHierarchyDepth : 3 +MaxMTTHierarchyDepthISliceL : 3 +MaxMTTHierarchyDepthISliceC : 3 MTS : 1 MTSIntraMaxCand : 3 MTSInterMaxCand : 4 SBT : 1 +ISP : 1 +MMVD : 1 Affine : 1 SubPuMvp : 1 MaxNumMergeCand : 6 @@ -120,25 +113,37 @@ LMChroma : 1 # use CCLM only DepQuant : 1 IMV : 1 ALF : 1 -GBi : 1 -GBiFast : 1 -MHIntra : 1 +BCW : 1 +BcwFast : 1 +CIIP : 1 Triangle : 1 IBC : 0 # turned off in CTC AllowDisFracMMVD : 1 AffineAmvr : 0 -LumaReshapeEnable : 1 # luma reshaping. 0: disable 1:enable +LMCSEnable : 1 # LMCS: 0: disable, 1:enable +LMCSSignalType : 0 # Input signal type: 0:SDR, 1:HDR-PQ, 2:HDR-HLG +LMCSUpdateCtrl : 2 # LMCS model update control: 0:RA, 1:AI, 2:LDB/LDP +LMCSOffset : 1 # chroma residual scaling offset +MRL : 1 +MIP : 0 +JointCbCr : 1 # joint coding of chroma residuals (if available): 0: disable, 1: enable +PROF : 1 +PPSorSliceMode : 2 +ChromaTS : 1 # Fast tools PBIntraFast : 1 -ISPFast : 1 +ISPFast : 0 FastMrg : 1 AMaxBT : 1 +FastMIP : 0 +FastLocalDualTreeMode : 2 # Encoder optimization tools AffineAmvrEncOpt : 0 - +MmvdDisNum : 6 ### DO NOT ADD ANYTHING BELOW THIS LINE ### ### DO NOT DELETE THE EMPTY LINE BELOW ### + diff --git a/cfg/encoder_randomaccess_vtm.cfg b/cfg/encoder_randomaccess_vtm.cfg index 389209a4cec35122dd6b916c5d4e515afc526c32..47f57f957765ea9bc213667a1bd188cad7134c0d 100644 --- a/cfg/encoder_randomaccess_vtm.cfg +++ b/cfg/encoder_randomaccess_vtm.cfg @@ -3,7 +3,7 @@ BitstreamFile : str.bin ReconFile : rec.yuv #======== Profile ================ -Profile : next +Profile : auto #======== Unit definition ================ MaxCUWidth : 64 # Maximum coding unit width in pixel @@ -17,23 +17,23 @@ GOPSize : 16 # GOP Size (number of B slice = GOPS IntraQPOffset : -3 LambdaFromQpEnable : 1 # see JCTVC-X0038 for suitable parameters for IntraQPOffset, QPoffset, QPOffsetModelOff, QPOffsetModelScale when enabled -# Type POC QPoffset QPOffsetModelOff QPOffsetModelScale CbQPoffset CrQPoffset QPfactor tcOffsetDiv2 betaOffsetDiv2 temporal_id #ref_pics_active #ref_pics reference pictures predict deltaRPS #ref_idcs reference idcs -Frame1: B 16 1 0.0 0.0 0 0 1.0 0 0 0 2 3 -16 -24 -32 0 -Frame2: B 8 1 -4.8848 0.2061 0 0 1.0 0 0 1 2 3 -8 -16 8 1 8 4 1 1 0 1 -Frame3: B 4 4 -5.7476 0.2286 0 0 1.0 0 0 2 2 4 -4 -12 4 12 1 4 4 1 1 1 1 -Frame4: B 2 5 -5.90 0.2333 0 0 1.0 0 0 3 2 5 -2 -10 2 6 14 1 2 5 1 1 1 1 1 -Frame5: B 1 6 -7.1444 0.3 0 0 1.0 0 0 4 2 5 -1 1 3 7 15 1 1 6 1 0 1 1 1 1 -Frame6: B 3 6 -7.1444 0.3 0 0 1.0 0 0 4 2 5 -1 -3 1 5 13 1 -2 6 1 1 1 1 1 0 -Frame7: B 6 5 -5.90 0.2333 0 0 1.0 0 0 3 2 4 -2 -6 2 10 1 -3 6 0 1 1 1 1 0 -Frame8: B 5 6 -7.1444 0.3 0 0 1.0 0 0 4 2 5 -1 -5 1 3 11 1 1 5 1 1 1 1 1 -Frame9: B 7 6 -7.1444 0.3 0 0 1.0 0 0 4 2 5 -1 -3 -7 1 9 1 -2 6 1 1 1 1 1 0 -Frame10: B 12 4 -5.7476 0.2286 0 0 1.0 0 0 2 2 3 -4 -12 4 1 -5 6 0 0 1 1 1 0 -Frame11: B 10 5 -5.90 0.2333 0 0 1.0 0 0 3 2 4 -2 -10 2 6 1 2 4 1 1 1 1 -Frame12: B 9 6 -7.1444 0.3 0 0 1.0 0 0 4 2 5 -1 -9 1 3 7 1 1 5 1 1 1 1 1 -Frame13: B 11 6 -7.1444 0.3 0 0 1.0 0 0 4 2 5 -1 -3 -11 1 5 1 -2 6 1 1 1 1 1 0 -Frame14: B 14 5 -5.90 0.2333 0 0 1.0 0 0 3 2 4 -2 -6 -14 2 1 -3 6 0 1 1 1 1 0 -Frame15: B 13 6 -7.1444 0.3 0 0 1.0 0 0 4 2 5 -1 -5 -13 1 3 1 1 5 1 1 1 1 1 -Frame16: B 15 6 -7.1444 0.3 0 0 1.0 0 0 4 2 5 -1 -3 -7 -15 1 1 -2 6 1 1 1 1 1 0 +# Type POC QPoffset QPOffsetModelOff QPOffsetModelScale CbQPoffset CrQPoffset QPfactor tcOffsetDiv2 betaOffsetDiv2 temporal_id #ref_pics_active_L0 #ref_pics_L0 reference_pictures_L0 #ref_pics_active_L1 #ref_pics_L1 reference_pictures_L1 +Frame1: B 16 1 0.0 0.0 0 0 1.0 0 0 0 2 3 16 32 24 2 2 16 32 +Frame2: B 8 1 -4.8848 0.2061 0 0 1.0 0 0 1 2 2 8 16 2 2 -8 8 +Frame3: B 4 4 -5.7476 0.2286 0 0 1.0 0 0 2 2 2 4 12 2 2 -4 -12 +Frame4: B 2 5 -5.90 0.2333 0 0 1.0 0 0 3 2 2 2 10 2 3 -2 -6 -14 +Frame5: B 1 6 -7.1444 0.3 0 0 1.0 0 0 4 2 2 1 -1 2 4 -1 -3 -7 -15 +Frame6: B 3 6 -7.1444 0.3 0 0 1.0 0 0 4 2 2 1 3 2 3 -1 -5 -13 +Frame7: B 6 5 -5.90 0.2333 0 0 1.0 0 0 3 2 2 2 6 2 2 -2 -10 +Frame8: B 5 6 -7.1444 0.3 0 0 1.0 0 0 4 2 2 1 5 2 3 -1 -3 -11 +Frame9: B 7 6 -7.1444 0.3 0 0 1.0 0 0 4 2 3 1 3 7 2 2 -1 -9 +Frame10: B 12 4 -5.7476 0.2286 0 0 1.0 0 0 2 2 2 4 12 2 2 -4 4 +Frame11: B 10 5 -5.90 0.2333 0 0 1.0 0 0 3 2 2 2 10 2 2 -2 -6 +Frame12: B 9 6 -7.1444 0.3 0 0 1.0 0 0 4 2 2 1 9 2 3 -1 -3 -7 +Frame13: B 11 6 -7.1444 0.3 0 0 1.0 0 0 4 2 3 1 3 11 2 2 -1 -5 +Frame14: B 14 5 -5.90 0.2333 0 0 1.0 0 0 3 2 3 2 6 14 2 2 -2 2 +Frame15: B 13 6 -7.1444 0.3 0 0 1.0 0 0 4 2 3 1 5 13 2 2 -1 -3 +Frame16: B 15 6 -7.1444 0.3 0 0 1.0 0 0 4 2 4 1 3 7 15 2 2 -1 1 #=========== Motion Search ============= FastSearch : 1 # 0:Full search 1:TZ search @@ -70,29 +70,14 @@ TransformSkipFast : 1 # Fast Transform skipping (0: OFF, 1 TransformSkipLog2MaxSize : 5 SAOLcuBoundary : 0 # SAOLcuBoundary using non-deblocked pixels (0: OFF, 1: ON) -#============ Slices ================ -SliceMode : 0 # 0: Disable all slice options. - # 1: Enforce maximum number of LCU in an slice, - # 2: Enforce maximum number of bytes in an 'slice' - # 3: Enforce maximum number of tiles in a slice -SliceArgument : 1500 # Argument for 'SliceMode'. - # If SliceMode==1 it represents max. SliceGranularity-sized blocks per slice. - # If SliceMode==2 it represents max. bytes per slice. - # If SliceMode==3 it represents max. tiles per slice. - -LFCrossSliceBoundaryFlag : 1 # In-loop filtering, including ALF and DB, is across or not across slice boundary. - # 0:not across, 1: across - -#============ PCM ================ -PCMEnabledFlag : 0 # 0: No PCM mode -PCMLog2MaxSize : 5 # Log2 of maximum PCM block size. -PCMLog2MinSize : 3 # Log2 of minimum PCM block size. -PCMInputBitDepthFlag : 1 # 0: PCM bit-depth is internal bit-depth. 1: PCM bit-depth is input bit-depth. -PCMFilterDisableFlag : 0 # 0: Enable loop filtering on I_PCM samples. 1: Disable loop filtering on I_PCM samples. - -#============ Lossless ================ -TransquantBypassEnable : 0 # Value of PPS flag. -CUTransquantBypassFlagForce: 0 # Force transquant bypass mode, when transquant_bypass_enable_flag is enabled +#=========== TemporalFilter ================= +TemporalFilter : 0 # Enable/disable GOP Based Temporal Filter +TemporalFilterFutureReference : 1 # Enable/disable reading future frames +TemporalFilterStrengthFrame8 : 0.95 # Enable filter at every 8th frame with given strength +TemporalFilterStrengthFrame16 : 1.5 # Enable filter at every 16th frame with given strength, longer intervals has higher priority + +#============ Tiles / Slices ================ +EnablePicPartitioning : 0 # Enable picture partitioning (0: single tile, single slice, 1: multiple tiles/slices can be used) #============ Rate Control ====================== RateControl : 0 # Rate control: enable rate control @@ -106,9 +91,12 @@ RCForceIntraQP : 0 # Rate control: force int #============ VTM settings ====================== LoopFilterTcOffset_div2 : 0 SEIDecodedPictureHash : 0 -CbQpOffset : 1 -CrQpOffset : 1 - +CbQpOffset : 0 +CrQpOffset : 0 +SameCQPTablesForAllChroma : 1 +QpInValCb : 32 44 +QpOutValCb : 32 41 +ReWriteParamSets : 1 #============ NEXT ==================== # General @@ -119,14 +107,17 @@ DualITree : 1 # separate partitioning of luma and chroma MinQTLumaISlice : 8 MinQTChromaISlice : 4 MinQTNonISlice : 8 -MaxBTDepth : 3 -MaxBTDepthISliceL : 3 -MaxBTDepthISliceC : 3 +MaxMTTHierarchyDepth : 3 +MaxMTTHierarchyDepthISliceL : 3 +MaxMTTHierarchyDepthISliceC : 3 MTS : 1 -MTSIntraMaxCand : 3 +MTSIntraMaxCand : 4 MTSInterMaxCand : 4 SBT : 1 +LFNST : 1 +ISP : 1 +MMVD : 1 Affine : 1 SubPuMvp : 1 MaxNumMergeCand : 6 @@ -134,27 +125,41 @@ LMChroma : 1 # use CCLM only DepQuant : 1 IMV : 1 ALF : 1 -GBi : 1 -GBiFast : 1 -BIO : 1 -MHIntra : 1 +BCW : 1 +BcwFast : 1 +BIO : 1 +CIIP : 1 Triangle : 1 IBC : 0 # turned off in CTC AllowDisFracMMVD : 1 AffineAmvr : 1 -LumaReshapeEnable : 1 # luma reshaping. 0: disable 1:enable +LMCSEnable : 1 # LMCS: 0: disable, 1:enable +LMCSSignalType : 0 # Input signal type: 0:SDR, 1:HDR-PQ, 2:HDR-HLG +LMCSUpdateCtrl : 0 # LMCS model update control: 0:RA, 1:AI, 2:LDB/LDP +LMCSOffset : 6 # chroma residual scaling offset +MRL : 1 +MIP : 1 DMVR : 1 +SMVD : 1 +JointCbCr : 1 # joint coding of chroma residuals (if available): 0: disable, 1: enable +PROF : 1 +PPSorSliceMode : 1 # Fast tools PBIntraFast : 1 -ISPFast : 1 +ISPFast : 0 FastMrg : 1 AMaxBT : 1 +FastMIP : 0 +FastLFNST : 0 +FastLocalDualTreeMode : 1 +ChromaTS : 1 # Encoder optimization tools AffineAmvrEncOpt : 1 - +MmvdDisNum : 6 ### DO NOT ADD ANYTHING BELOW THIS LINE ### ### DO NOT DELETE THE EMPTY LINE BELOW ### + diff --git a/cfg/field/VTM_encoder_lowdelay_field_GOP16.cfg b/cfg/field/VTM_encoder_lowdelay_field_GOP16.cfg new file mode 100644 index 0000000000000000000000000000000000000000..596ee9fd0a681a032abc9932ce509ccda1eeca49 --- /dev/null +++ b/cfg/field/VTM_encoder_lowdelay_field_GOP16.cfg @@ -0,0 +1,26 @@ +FieldCoding : 1 # (0: Frame based coding, 1: Field based coding) +TopFieldFirst : 1 # Field parity order (1: Top field first, 0: Bottom field first) +ConformanceMode : 1 +VuiParametersPresent : 1 +SEIPictureTiming : 1 +SEIFrameFieldInfo : 1 +IntraPeriod : -1 # Period of I-Frame ( -1 = only first) +DecodingRefreshType : 0 # Random Accesss 0:none, 1:CRA, 2:IDR, 3:Recovery Point SEI +GOPSize : 16 # GOP Size (number of B slice = GOPSize-1) +# Type POC QPoffset QPOffsetModelOff QPOffsetModelScale CbQPoffset CrQPoffset QPfactor tcOffsetDiv2 betaOffsetDiv2 temporal_id #ref_pics_active_L0 #ref_pics_L0 reference_pictures_L0 #ref_pics_active_L1 #ref_pics_L1 reference_pictures_L1 +Frame1: B 2 5 -6.5 0.2590 0 0 1.0 0 0 0 8 8 1 2 9 10 17 18 25 26 8 8 1 2 9 10 17 18 25 26 +Frame2: B 3 5 -6.5 0.2590 0 0 1.0 0 0 0 8 9 1 2 3 10 11 18 19 26 27 8 9 1 2 3 10 11 18 19 26 27 +Frame3: B 4 4 -6.5 0.2590 0 0 1.0 0 0 0 8 8 1 2 3 4 11 12 19 20 8 8 1 2 3 4 11 12 19 20 +Frame4: B 5 4 -6.5 0.2590 0 0 1.0 0 0 0 8 9 1 2 3 4 5 12 13 20 21 8 9 1 2 3 4 5 12 13 20 21 +Frame5: B 6 5 -6.5 0.2590 0 0 1.0 0 0 0 8 8 1 2 5 6 13 14 21 22 8 8 1 2 5 6 13 14 21 22 +Frame6: B 7 5 -6.5 0.2590 0 0 1.0 0 0 0 8 9 1 2 3 6 7 14 15 22 23 8 9 1 2 3 6 7 14 15 22 23 +Frame7: B 8 4 -6.5 0.2590 0 0 1.0 0 0 0 8 8 1 2 7 8 15 16 23 24 8 8 1 2 7 8 15 16 23 24 +Frame8: B 9 4 -6.5 0.2590 0 0 1.0 0 0 0 8 9 1 2 3 8 9 16 17 24 25 8 9 1 2 3 8 9 16 17 24 25 +Frame9: B 10 5 -6.5 0.2590 0 0 1.0 0 0 0 8 8 1 2 9 10 17 18 25 26 8 8 1 2 9 10 17 18 25 26 +Frame10: B 11 5 -6.5 0.2590 0 0 1.0 0 0 0 8 9 1 2 3 10 11 18 19 26 27 8 9 1 2 3 10 11 18 19 26 27 +Frame11: B 12 4 -6.5 0.2590 0 0 1.0 0 0 0 8 8 1 2 3 4 11 12 19 20 8 8 1 2 3 4 11 12 19 20 +Frame12: B 13 4 -6.5 0.2590 0 0 1.0 0 0 0 8 9 1 2 3 4 5 12 13 20 21 8 9 1 2 3 4 5 12 13 20 21 +Frame13: B 14 5 -6.5 0.2590 0 0 1.0 0 0 0 8 8 1 2 5 6 13 14 21 22 8 8 1 2 5 6 13 14 21 22 +Frame14: B 15 5 -6.5 0.2590 0 0 1.0 0 0 0 8 9 1 2 3 6 7 14 15 22 23 8 9 1 2 3 6 7 14 15 22 23 +Frame15: B 16 1 0.0 0.0 0 0 1.0 0 0 0 8 8 1 2 7 8 15 16 23 24 8 8 1 2 7 8 15 16 23 24 +Frame16: B 17 1 0.0 0.0 0 0 1.0 0 0 0 8 9 1 2 3 8 9 16 17 24 25 8 9 1 2 3 8 9 16 17 24 25 diff --git a/cfg/field/VTM_encoder_lowdelay_field_GOP8.cfg b/cfg/field/VTM_encoder_lowdelay_field_GOP8.cfg new file mode 100644 index 0000000000000000000000000000000000000000..dc23e0a35af85ff1022c69bdd50f90743050e486 --- /dev/null +++ b/cfg/field/VTM_encoder_lowdelay_field_GOP8.cfg @@ -0,0 +1,18 @@ +FieldCoding : 1 # (0: Frame based coding, 1: Field based coding) +TopFieldFirst : 1 # Field parity order (1: Top field first, 0: Bottom field first) +ConformanceMode : 1 +VuiParametersPresent : 1 +SEIPictureTiming : 1 +SEIFrameFieldInfo : 1 +IntraPeriod : -1 # Period of I-Frame ( -1 = only first) +DecodingRefreshType : 0 # Random Accesss 0:none, 1:CRA, 2:IDR, 3:Recovery Point SEI +GOPSize : 8 # GOP Size (number of B slice = GOPSize-1) +# Type POC QPoffset QPOffsetModelOff QPOffsetModelScale CbQPoffset CrQPoffset QPfactor tcOffsetDiv2 betaOffsetDiv2 temporal_id #ref_pics_active_L0 #ref_pics_L0 reference_pictures_L0 #ref_pics_active_L1 #ref_pics_L1 reference_pictures_L1 +Frame1: B 2 5 -6.5 0.2590 0 0 1.0 0 0 0 5 5 1 2 9 10 17 5 5 1 2 9 10 17 +Frame2: B 3 5 -6.5 0.2590 0 0 1.0 0 0 0 6 6 1 2 3 10 11 18 6 6 1 2 3 10 11 18 +Frame3: B 4 4 -6.5 0.2590 0 0 1.0 0 0 0 7 7 1 2 3 4 11 12 19 7 7 1 2 3 4 11 12 19 +Frame4: B 5 4 -6.5 0.2590 0 0 1.0 0 0 0 8 8 1 2 3 4 5 12 13 20 8 8 1 2 3 4 5 12 13 20 +Frame5: B 6 5 -6.5 0.2590 0 0 1.0 0 0 0 7 7 1 2 5 6 13 14 21 7 7 1 2 5 6 13 14 21 +Frame6: B 7 5 -6.5 0.2590 0 0 1.0 0 0 0 8 8 1 2 3 6 7 14 15 22 8 8 1 2 3 6 7 14 15 22 +Frame7: B 8 1 0.0 0.0 0 0 1.0 0 0 0 7 7 1 2 7 8 15 16 23 7 7 1 2 7 8 15 16 23 +Frame8: B 9 1 0.0 0.0 0 0 1.0 0 0 0 8 8 1 2 3 8 9 16 17 24 8 8 1 2 3 8 9 16 17 24 diff --git a/cfg/field/VTM_encoder_randomaccess_field_GOP16.cfg b/cfg/field/VTM_encoder_randomaccess_field_GOP16.cfg new file mode 100644 index 0000000000000000000000000000000000000000..040922b18643bb2a18a3c6cf964f68980f923363 --- /dev/null +++ b/cfg/field/VTM_encoder_randomaccess_field_GOP16.cfg @@ -0,0 +1,26 @@ +FieldCoding : 1 # (0: Frame based coding, 1: Field based coding) +TopFieldFirst : 1 # Field parity order (1: Top field first, 0: Bottom field first) +ConformanceMode : 1 +VuiParametersPresent : 1 +SEIPictureTiming : 1 +SEIFrameFieldInfo : 1 +IntraPeriod : 32 # Period of I-Frame ( -1 = only first) +DecodingRefreshType : 1 # Random Accesss 0:none, 1:CRA, 2:IDR, 3:Recovery Point SEI +GOPSize : 16 # GOP Size (number of B slice = GOPSize-1) +# Type POC QPoffset QPOffsetModelOff QPOffsetModelScale CbQPoffset CrQPoffset QPfactor tcOffsetDiv2 betaOffsetDiv2 temporal_id #ref_pics_active_L0 #ref_pics_L0 reference_pictures_L0 #ref_pics_active_L1 #ref_pics_L1 reference_pictures_L1 +Frame1: B 17 1 0.0 0.0 0 0 0.442 0 0 0 4 4 16 17 20 21 4 4 16 17 20 21 +Frame2: B 16 1 0.0 0.0 0 0 0.442 0 0 0 3 3 15 16 -1 3 3 -1 15 16 +Frame3: B 8 2 0.0 0.0 0 0 0.3536 0 0 1 2 4 7 8 -8 -9 2 4 -8 -9 7 8 +Frame4: B 9 2 0.0 0.0 0 0 0.3536 0 0 1 2 5 1 8 9 -7 -8 2 5 -7 -8 1 8 9 +Frame5: B 4 3 0.0 0.0 0 0 0.3536 0 0 2 2 4 3 4 -4 -5 2 4 -4 -5 -12 -13 +Frame6: B 5 3 0.0 0.0 0 0 0.3536 0 0 2 2 5 1 4 5 -3 -4 2 4 -3 -4 -11 -12 +Frame7: B 2 4 0.0 0.0 0 0 0.68 0 0 3 2 4 1 2 -2 -3 2 6 -2 -3 -6 -7 -14 -15 +Frame8: B 3 4 0.0 0.0 0 0 0.68 0 0 3 2 4 1 2 -1 -2 2 6 -1 -2 -5 -6 -13 -14 +Frame9: B 6 4 0.0 0.0 0 0 0.68 0 0 3 2 4 1 2 -2 -3 2 4 -2 -3 -10 -11 +Frame10: B 7 4 0.0 0.0 0 0 0.68 0 0 3 2 5 1 2 3 -1 -2 2 4 -1 -2 -9 -10 +Frame11: B 12 3 0.0 0.0 0 0 0.3536 0 0 2 2 4 3 4 -4 -5 2 4 -4 -5 3 4 +Frame12: B 13 3 0.0 0.0 0 0 0.3536 0 0 2 2 5 1 4 5 -3 -4 2 5 -3 -4 1 4 5 +Frame13: B 10 4 0.0 0.0 0 0 0.68 0 0 3 2 4 1 2 -2 -3 2 4 -2 -3 -6 -7 +Frame14: B 11 4 0.0 0.0 0 0 0.68 0 0 3 2 5 1 2 3 -1 -2 2 4 -1 -2 -5 -6 +Frame15: B 14 4 0.0 0.0 0 0 0.68 0 0 3 2 5 1 2 5 -2 -3 2 4 -2 -3 1 2 +Frame16: B 15 4 0.0 0.0 0 0 0.68 0 0 3 2 4 1 2 3 6 2 4 -1 -2 1 2 diff --git a/cfg/field/VTM_encoder_randomaccess_field_GOP32.cfg b/cfg/field/VTM_encoder_randomaccess_field_GOP32.cfg new file mode 100644 index 0000000000000000000000000000000000000000..34435952483b8187452b7b4602392b34516d24dc --- /dev/null +++ b/cfg/field/VTM_encoder_randomaccess_field_GOP32.cfg @@ -0,0 +1,42 @@ +FieldCoding : 1 # (0: Frame based coding, 1: Field based coding) +TopFieldFirst : 1 # Field parity order (1: Top field first, 0: Bottom field first) +ConformanceMode : 1 +VuiParametersPresent : 1 +SEIPictureTiming : 1 +SEIFrameFieldInfo : 1 +IntraPeriod : 64 # Period of I-Frame ( -1 = only first) +DecodingRefreshType : 1 # Random Accesss 0:none, 1:CRA, 2:IDR, 3:Recovery Point SEI +GOPSize : 32 # GOP Size (number of B slice = GOPSize-1) +# Type POC QPoffset QPOffsetModelOff QPOffsetModelScale CbQPoffset CrQPoffset QPfactor tcOffsetDiv2 betaOffsetDiv2 temporal_id #ref_pics_active_L0 #ref_pics_L0 reference_pictures_L0 #ref_pics_active_L1 #ref_pics_L1 reference_pictures_L1 +Frame1: B 33 1 0.0 0.0 0 0 1.0 0 0 0 4 4 32 33 48 49 4 4 32 33 48 49 +Frame2: B 32 1 0.0 0.0 0 0 1.0 0 0 0 4 5 31 32 47 48 -1 4 5 -1 31 32 47 48 +Frame3: B 16 1 -4.8848 0.2061 0 0 1.0 0 0 1 4 4 15 16 31 32 4 4 -16 -17 15 16 +Frame4: B 17 1 -4.8848 0.2061 0 0 1.0 0 0 1 4 5 1 16 17 32 33 4 4 -15 -16 16 17 +Frame5: B 8 4 -5.7476 0.2286 0 0 1.0 0 0 2 4 4 7 8 23 24 4 4 -8 -9 -24 -25 +Frame6: B 9 4 -5.7476 0.2286 0 0 1.0 0 0 2 4 5 1 8 9 24 25 4 4 -7 -8 -23 -24 +Frame7: B 4 5 -5.90 0.2333 0 0 1.0 0 0 3 4 4 3 4 19 20 4 6 -4 -5 -12 -13 -28 -29 +Frame8: B 5 5 -5.90 0.2333 0 0 1.0 0 0 3 4 5 1 4 5 20 21 4 6 -3 -4 -11 -12 -27 -28 +Frame9: B 2 6 -7.1444 0.3 0 0 1.0 0 0 4 4 4 1 2 -2 -3 4 8 -2 -3 -6 -7 -14 -15 -30 -31 +Frame10: B 3 6 -7.1444 0.3 0 0 1.0 0 0 4 4 5 1 2 3 -1 -2 4 8 -1 -2 -5 -6 -13 -14 -29 -30 +Frame11: B 6 6 -7.1444 0.3 0 0 1.0 0 0 4 4 4 1 2 5 6 4 6 -2 -3 -10 -11 -26 -27 +Frame12: B 7 6 -7.1444 0.3 0 0 1.0 0 0 4 4 5 1 2 3 6 7 4 6 -1 -2 -9 -10 -25 -26 +Frame13: B 12 5 -5.90 0.2333 0 0 1.0 0 0 3 4 4 3 4 11 12 4 4 -4 -5 -20 -21 +Frame14: B 13 5 -5.90 0.2333 0 0 1.0 0 0 3 4 5 1 4 5 12 13 4 4 -3 -4 -19 -20 +Frame15: B 10 6 -7.1444 0.3 0 0 1.0 0 0 4 4 4 1 2 9 10 4 6 -2 -3 -6 -7 -22 -23 +Frame16: B 11 6 -7.1444 0.3 0 0 1.0 0 0 4 4 5 1 2 3 10 11 4 6 -1 -2 -5 -6 -21 -22 +Frame17: B 14 6 -7.1444 0.3 0 0 1.0 0 0 4 4 6 1 2 5 6 13 14 4 4 -2 -3 -18 -19 +Frame18: B 15 6 -7.1444 0.3 0 0 1.0 0 0 4 4 7 1 2 3 6 7 14 15 4 4 -1 -2 -17 -18 +Frame19: B 24 4 -5.7476 0.2286 0 0 1.0 0 0 2 4 4 7 8 23 24 4 4 -8 -9 7 8 +Frame20: B 25 4 -5.7476 0.2286 0 0 1.0 0 0 2 4 5 1 8 9 24 25 4 4 -7 -8 8 9 +Frame21: B 20 5 -5.90 0.2333 0 0 1.0 0 0 3 4 4 3 4 19 20 4 4 -4 -5 -12 -13 +Frame22: B 21 5 -5.90 0.2333 0 0 1.0 0 0 3 4 5 1 4 5 20 21 4 4 -3 -4 -11 -12 +Frame23: B 18 6 -7.1444 0.3 0 0 1.0 0 0 4 4 4 1 2 17 18 4 6 -2 -3 -6 -7 -14 -15 +Frame24: B 19 6 -7.1444 0.3 0 0 1.0 0 0 4 4 5 1 2 3 18 19 4 6 -1 -2 -5 -6 -13 -14 +Frame25: B 22 6 -7.1444 0.3 0 0 1.0 0 0 4 4 6 1 2 5 6 21 22 4 4 -2 -3 -10 -11 +Frame26: B 23 6 -7.1444 0.3 0 0 1.0 0 0 4 4 7 1 2 3 6 7 22 23 4 4 -1 -2 -9 -10 +Frame27: B 28 5 -5.90 0.2333 0 0 1.0 0 0 3 4 6 3 4 11 12 27 28 4 4 -4 -5 3 4 +Frame28: B 29 5 -5.90 0.2333 0 0 1.0 0 0 3 4 7 1 4 5 12 13 28 29 4 4 -3 -4 4 5 +Frame29: B 26 6 -7.1444 0.3 0 0 1.0 0 0 4 4 6 1 2 9 10 25 26 4 4 -2 -3 -6 -7 +Frame30: B 27 6 -7.1444 0.3 0 0 1.0 0 0 4 4 7 1 2 3 10 11 26 27 4 4 -1 -2 -5 -6 +Frame31: B 30 6 -7.1444 0.3 0 0 1.0 0 0 4 4 8 1 2 5 6 13 14 29 30 4 4 -2 -3 1 2 +Frame32: B 31 6 -7.1444 0.3 0 0 1.0 0 0 4 4 9 1 2 3 6 7 14 15 30 31 4 4 -1 -2 2 3 diff --git a/cfg/layers.cfg b/cfg/layers.cfg new file mode 100644 index 0000000000000000000000000000000000000000..f083583f89767ae10f384c6dab19ebcc4dc2e28a --- /dev/null +++ b/cfg/layers.cfg @@ -0,0 +1,19 @@ +#======== Layers =============== +MaxLayers : 2 +MaxSublayers : 1 +AllLayersSameNumSublayersFlag : 0 +AllIndependentLayersFlag : 0 +#======== OLSs =============== +EachLayerIsAnOlsFlag : 0 +OlsModeIdc : 2 +NumOutputLayerSets : 2 +OlsOutputLayer1 : 1 0 +#======== Layer-0 =============== +LayerId0 : 0 +#======== Layer-1 =============== +LayerId1 : 1 +NumRefLayers1 : 1 +RefLayerIdx1 : 0 + + + diff --git a/cfg/lossless/lossless.cfg b/cfg/lossless/lossless.cfg new file mode 100644 index 0000000000000000000000000000000000000000..a0a00a933635eb13a64910aead5ba7c280832343 --- /dev/null +++ b/cfg/lossless/lossless.cfg @@ -0,0 +1,19 @@ +CostMode : lossless +ChromaTS : 1 +DepQuant : 0 +RDOQ : 0 +RDOQTS : 0 +SBT : 0 +LMCSEnable : 0 +ISP : 0 +MTS : 0 +LFNST : 0 +JointCbCr : 0 +LoopFilterDisable : 1 +SAO : 0 +ALF : 0 +DMVR : 0 +BIO : 0 +PROF : 0 +Log2MaxTbSize : 5 +InternalBitDepth : 0 \ No newline at end of file diff --git a/cfg/nonCTC-SliceConfigExamples/encoder_randomaccess_vtm_RasterScanSlice.cfg b/cfg/nonCTC-SliceConfigExamples/encoder_randomaccess_vtm_RasterScanSlice.cfg new file mode 100644 index 0000000000000000000000000000000000000000..7487dc814d1464b5616ade8693b73286b6daee37 --- /dev/null +++ b/cfg/nonCTC-SliceConfigExamples/encoder_randomaccess_vtm_RasterScanSlice.cfg @@ -0,0 +1,159 @@ +#======== File I/O ===================== +BitstreamFile : str.bin +ReconFile : rec.yuv + +#======== Profile ================ +Profile : auto + +#======== Unit definition ================ +MaxCUWidth : 64 # Maximum coding unit width in pixel +MaxCUHeight : 64 # Maximum coding unit height in pixel +MaxPartitionDepth : 4 # Maximum coding unit depth + +#======== Coding Structure ============= +IntraPeriod : 32 # Period of I-Frame ( -1 = only first) +DecodingRefreshType : 1 # Random Accesss 0:none, 1:CRA, 2:IDR, 3:Recovery Point SEI +GOPSize : 16 # GOP Size (number of B slice = GOPSize-1) + +IntraQPOffset : -3 +LambdaFromQpEnable : 1 # see JCTVC-X0038 for suitable parameters for IntraQPOffset, QPoffset, QPOffsetModelOff, QPOffsetModelScale when enabled +# Type POC QPoffset QPOffsetModelOff QPOffsetModelScale CbQPoffset CrQPoffset QPfactor tcOffsetDiv2 betaOffsetDiv2 temporal_id #ref_pics_active_L0 #ref_pics_L0 reference_pictures_L0 #ref_pics_active_L1 #ref_pics_L1 reference_pictures_L1 +Frame1: B 16 1 0.0 0.0 0 0 1.0 0 0 0 2 3 16 32 24 2 2 16 32 +Frame2: B 8 1 -4.8848 0.2061 0 0 1.0 0 0 1 2 2 8 16 2 2 -8 8 +Frame3: B 4 4 -5.7476 0.2286 0 0 1.0 0 0 2 2 2 4 12 2 2 -4 -12 +Frame4: B 2 5 -5.90 0.2333 0 0 1.0 0 0 3 2 2 2 10 2 3 -2 -6 -14 +Frame5: B 1 6 -7.1444 0.3 0 0 1.0 0 0 4 2 2 1 -1 2 4 -1 -3 -7 -15 +Frame6: B 3 6 -7.1444 0.3 0 0 1.0 0 0 4 2 2 1 3 2 3 -1 -5 -13 +Frame7: B 6 5 -5.90 0.2333 0 0 1.0 0 0 3 2 2 2 6 2 2 -2 -10 +Frame8: B 5 6 -7.1444 0.3 0 0 1.0 0 0 4 2 2 1 5 2 3 -1 -3 -11 +Frame9: B 7 6 -7.1444 0.3 0 0 1.0 0 0 4 2 3 1 3 7 2 2 -1 -9 +Frame10: B 12 4 -5.7476 0.2286 0 0 1.0 0 0 2 2 2 4 12 2 2 -4 4 +Frame11: B 10 5 -5.90 0.2333 0 0 1.0 0 0 3 2 2 2 10 2 2 -2 -6 +Frame12: B 9 6 -7.1444 0.3 0 0 1.0 0 0 4 2 2 1 9 2 3 -1 -3 -7 +Frame13: B 11 6 -7.1444 0.3 0 0 1.0 0 0 4 2 3 1 3 11 2 2 -1 -5 +Frame14: B 14 5 -5.90 0.2333 0 0 1.0 0 0 3 2 3 2 6 14 2 2 -2 2 +Frame15: B 13 6 -7.1444 0.3 0 0 1.0 0 0 4 2 3 1 5 13 2 2 -1 -3 +Frame16: B 15 6 -7.1444 0.3 0 0 1.0 0 0 4 2 4 1 3 7 15 2 2 -1 1 + +#=========== Motion Search ============= +FastSearch : 1 # 0:Full search 1:TZ search +SearchRange : 384 # (0: Search range is a Full frame) +ASR : 1 # Adaptive motion search range +MinSearchWindow : 96 # Minimum motion search window size for the adaptive window ME +BipredSearchRange : 4 # Search range for bi-prediction refinement +HadamardME : 1 # Use of hadamard measure for fractional ME +FEN : 1 # Fast encoder decision +FDM : 1 # Fast Decision for Merge RD cost + +#======== Quantization ============= +QP : 32 # Quantization parameter(0-51) +MaxDeltaQP : 0 # CU-based multi-QP optimization +MaxCuDQPSubdiv : 0 # Maximum subdiv for CU luma Qp adjustment +DeltaQpRD : 0 # Slice-based multi-QP optimization +RDOQ : 1 # RDOQ +RDOQTS : 1 # RDOQ for transform skip + +#=========== Deblock Filter ============ +LoopFilterOffsetInPPS : 1 # Dbl params: 0=varying params in SliceHeader, param = base_param + GOP_offset_param; 1 (default) =constant params in PPS, param = base_param) +LoopFilterDisable : 0 # Disable deblocking filter (0=Filter, 1=No Filter) +LoopFilterBetaOffset_div2 : 0 # base_param: -6 ~ 6 +LoopFilterTcOffset_div2 : 0 # base_param: -6 ~ 6 +DeblockingFilterMetric : 0 # blockiness metric (automatically configures deblocking parameters in bitstream). Applies slice-level loop filter offsets (LoopFilterOffsetInPPS and LoopFilterDisable must be 0) + +#=========== Misc. ============ +InternalBitDepth : 10 # codec operating bit-depth + +#=========== Coding Tools ================= +SAO : 1 # Sample adaptive offset (0: OFF, 1: ON) +TransformSkip : 1 # Transform skipping (0: OFF, 1: ON) +TransformSkipFast : 1 # Fast Transform skipping (0: OFF, 1: ON) +TransformSkipLog2MaxSize : 5 +SAOLcuBoundary : 0 # SAOLcuBoundary using non-deblocked pixels (0: OFF, 1: ON) + +#============ Tiles / Slices ================ +EnablePicPartitioning : 1 # Enable picture partitioning (0: single tile, single slice, 1: multiple tiles/slices can be used) + +# Figure 4 - Section 6.3.1 - 12 tiles and 3 raster-scan slices +TileColumnWidthArray : 6 6 6 # Tile column widths in units of CTUs. Last column width will be repeated uniformly to cover any remaining picture width +TileRowHeightArray : 3 3 3 3 # Tile row heights in units of CTUs. Last row height will be repeated uniformly to cover any remaining picture height +RasterScanSlices : 1 # Raster-scan or rectangular slices (0: rectangular, 1: raster-scan) +RasterSliceSizes : 2 5 5 # Raster-scan slice sizes in units of tiles. Last slice size will be repeated uniformly to cover any remaining tiles in the picture +DisableLoopFilterAcrossTiles : 0 # Loop filtering (DBLK/SAO/ALF) applied across tile boundaries or not (0: filter across tile boundaries 1: do not filter across tile boundaries) +DisableLoopFilterAcrossSlices : 0 # Loop filtering (DBLK/SAO/ALF) applied across slice boundaries or not (0: filter across slice boundaries 1: do not filter across slice boundaries) + +#============ Rate Control ====================== +RateControl : 0 # Rate control: enable rate control +TargetBitrate : 1000000 # Rate control: target bitrate, in bps +KeepHierarchicalBit : 2 # Rate control: 0: equal bit allocation; 1: fixed ratio bit allocation; 2: adaptive ratio bit allocation +LCULevelRateControl : 1 # Rate control: 1: LCU level RC; 0: picture level RC +RCLCUSeparateModel : 1 # Rate control: use LCU level separate R-lambda model +InitialQP : 0 # Rate control: initial QP +RCForceIntraQP : 0 # Rate control: force intra QP to be equal to initial QP + +#============ VTM settings ====================== +LoopFilterTcOffset_div2 : 0 +SEIDecodedPictureHash : 0 +CbQpOffset : 1 +CrQpOffset : 1 + +ReWriteParamSets : 1 +#============ NEXT ==================== + +# General +CTUSize : 128 +LCTUFast : 1 + +DualITree : 1 # separate partitioning of luma and chroma channels for I-slices +MinQTLumaISlice : 8 +MinQTChromaISlice : 4 +MinQTNonISlice : 8 +MaxMTTHierarchyDepth : 3 +MaxMTTHierarchyDepthISliceL : 3 +MaxMTTHierarchyDepthISliceC : 3 + +MTS : 1 +MTSIntraMaxCand : 3 +MTSInterMaxCand : 4 +SBT : 1 +LFNST : 1 +ISP : 1 +MMVD : 1 +Affine : 1 +SubPuMvp : 1 +MaxNumMergeCand : 6 +LMChroma : 1 # use CCLM only +DepQuant : 1 +IMV : 1 +ALF : 1 +BCW : 1 +BcwFast : 1 +BIO : 1 +CIIP : 1 +Triangle : 1 +IBC : 0 # turned off in CTC +AllowDisFracMMVD : 1 +AffineAmvr : 1 +LMCSEnable : 1 # LMCS: 0: disable, 1:enable +LMCSSignalType : 0 # Input signal type: 0:SDR, 1:HDR-PQ, 2:HDR-HLG +LMCSUpdateCtrl : 0 # LMCS model update control: 0:RA, 1:AI, 2:LDB/LDP +MRL : 1 +MIP : 1 +DMVR : 1 +SMVD : 1 + +# Fast tools +PBIntraFast : 1 +ISPFast : 1 +FastMrg : 1 +AMaxBT : 1 +FastMIP : 0 +FastLFNST : 0 + +# Encoder optimization tools +AffineAmvrEncOpt : 1 +MmvdDisNum : 6 +### DO NOT ADD ANYTHING BELOW THIS LINE ### +### DO NOT DELETE THE EMPTY LINE BELOW ### + + + diff --git a/cfg/nonCTC-SliceConfigExamples/encoder_randomaccess_vtm_RectangularSlice.cfg b/cfg/nonCTC-SliceConfigExamples/encoder_randomaccess_vtm_RectangularSlice.cfg new file mode 100644 index 0000000000000000000000000000000000000000..c0d3ecbbc396d50a6895e9a1b9e17e672e943071 --- /dev/null +++ b/cfg/nonCTC-SliceConfigExamples/encoder_randomaccess_vtm_RectangularSlice.cfg @@ -0,0 +1,159 @@ +#======== File I/O ===================== +BitstreamFile : str.bin +ReconFile : rec.yuv + +#======== Profile ================ +Profile : auto + +#======== Unit definition ================ +MaxCUWidth : 64 # Maximum coding unit width in pixel +MaxCUHeight : 64 # Maximum coding unit height in pixel +MaxPartitionDepth : 4 # Maximum coding unit depth + +#======== Coding Structure ============= +IntraPeriod : 32 # Period of I-Frame ( -1 = only first) +DecodingRefreshType : 1 # Random Accesss 0:none, 1:CRA, 2:IDR, 3:Recovery Point SEI +GOPSize : 16 # GOP Size (number of B slice = GOPSize-1) + +IntraQPOffset : -3 +LambdaFromQpEnable : 1 # see JCTVC-X0038 for suitable parameters for IntraQPOffset, QPoffset, QPOffsetModelOff, QPOffsetModelScale when enabled +# Type POC QPoffset QPOffsetModelOff QPOffsetModelScale CbQPoffset CrQPoffset QPfactor tcOffsetDiv2 betaOffsetDiv2 temporal_id #ref_pics_active_L0 #ref_pics_L0 reference_pictures_L0 #ref_pics_active_L1 #ref_pics_L1 reference_pictures_L1 +Frame1: B 16 1 0.0 0.0 0 0 1.0 0 0 0 2 3 16 32 24 2 2 16 32 +Frame2: B 8 1 -4.8848 0.2061 0 0 1.0 0 0 1 2 2 8 16 2 2 -8 8 +Frame3: B 4 4 -5.7476 0.2286 0 0 1.0 0 0 2 2 2 4 12 2 2 -4 -12 +Frame4: B 2 5 -5.90 0.2333 0 0 1.0 0 0 3 2 2 2 10 2 3 -2 -6 -14 +Frame5: B 1 6 -7.1444 0.3 0 0 1.0 0 0 4 2 2 1 -1 2 4 -1 -3 -7 -15 +Frame6: B 3 6 -7.1444 0.3 0 0 1.0 0 0 4 2 2 1 3 2 3 -1 -5 -13 +Frame7: B 6 5 -5.90 0.2333 0 0 1.0 0 0 3 2 2 2 6 2 2 -2 -10 +Frame8: B 5 6 -7.1444 0.3 0 0 1.0 0 0 4 2 2 1 5 2 3 -1 -3 -11 +Frame9: B 7 6 -7.1444 0.3 0 0 1.0 0 0 4 2 3 1 3 7 2 2 -1 -9 +Frame10: B 12 4 -5.7476 0.2286 0 0 1.0 0 0 2 2 2 4 12 2 2 -4 4 +Frame11: B 10 5 -5.90 0.2333 0 0 1.0 0 0 3 2 2 2 10 2 2 -2 -6 +Frame12: B 9 6 -7.1444 0.3 0 0 1.0 0 0 4 2 2 1 9 2 3 -1 -3 -7 +Frame13: B 11 6 -7.1444 0.3 0 0 1.0 0 0 4 2 3 1 3 11 2 2 -1 -5 +Frame14: B 14 5 -5.90 0.2333 0 0 1.0 0 0 3 2 3 2 6 14 2 2 -2 2 +Frame15: B 13 6 -7.1444 0.3 0 0 1.0 0 0 4 2 3 1 5 13 2 2 -1 -3 +Frame16: B 15 6 -7.1444 0.3 0 0 1.0 0 0 4 2 4 1 3 7 15 2 2 -1 1 + +#=========== Motion Search ============= +FastSearch : 1 # 0:Full search 1:TZ search +SearchRange : 384 # (0: Search range is a Full frame) +ASR : 1 # Adaptive motion search range +MinSearchWindow : 96 # Minimum motion search window size for the adaptive window ME +BipredSearchRange : 4 # Search range for bi-prediction refinement +HadamardME : 1 # Use of hadamard measure for fractional ME +FEN : 1 # Fast encoder decision +FDM : 1 # Fast Decision for Merge RD cost + +#======== Quantization ============= +QP : 32 # Quantization parameter(0-51) +MaxDeltaQP : 0 # CU-based multi-QP optimization +MaxCuDQPSubdiv : 0 # Maximum subdiv for CU luma Qp adjustment +DeltaQpRD : 0 # Slice-based multi-QP optimization +RDOQ : 1 # RDOQ +RDOQTS : 1 # RDOQ for transform skip + +#=========== Deblock Filter ============ +LoopFilterOffsetInPPS : 1 # Dbl params: 0=varying params in SliceHeader, param = base_param + GOP_offset_param; 1 (default) =constant params in PPS, param = base_param) +LoopFilterDisable : 0 # Disable deblocking filter (0=Filter, 1=No Filter) +LoopFilterBetaOffset_div2 : 0 # base_param: -6 ~ 6 +LoopFilterTcOffset_div2 : 0 # base_param: -6 ~ 6 +DeblockingFilterMetric : 0 # blockiness metric (automatically configures deblocking parameters in bitstream). Applies slice-level loop filter offsets (LoopFilterOffsetInPPS and LoopFilterDisable must be 0) + +#=========== Misc. ============ +InternalBitDepth : 10 # codec operating bit-depth + +#=========== Coding Tools ================= +SAO : 1 # Sample adaptive offset (0: OFF, 1: ON) +TransformSkip : 1 # Transform skipping (0: OFF, 1: ON) +TransformSkipFast : 1 # Fast Transform skipping (0: OFF, 1: ON) +TransformSkipLog2MaxSize : 5 +SAOLcuBoundary : 0 # SAOLcuBoundary using non-deblocked pixels (0: OFF, 1: ON) + +#============ Tiles / Slices ================ +EnablePicPartitioning : 1 # Enable picture partitioning (0: single tile, single slice, 1: multiple tiles/slices can be used) + +# Figure 6 - Section 6.3.1 - 4 tiles and 4 rectangular slices +TileColumnWidthArray : 9 9 # Tile column widths in units of CTUs. Last column width will be repeated uniformly to cover any remaining picture width +TileRowHeightArray : 6 6 # Tile row heights in units of CTUs. Last row height will be repeated uniformly to cover any remaining picture height +RasterScanSlices : 0 # Raster-scan or rectangular slices (0: rectangular, 1: raster-scan) +RectSlicePositions : 0 206 9 35 45 107 117 215 # Rectangular slice positions. List containing pairs of top-left CTU RS address followed by bottom-right CTU RS address +DisableLoopFilterAcrossTiles : 0 # Loop filtering (DBLK/SAO/ALF) applied across tile boundaries or not (0: filter across tile boundaries 1: do not filter across tile boundaries) +DisableLoopFilterAcrossSlices : 0 # Loop filtering (DBLK/SAO/ALF) applied across slice boundaries or not (0: filter across slice boundaries 1: do not filter across slice boundaries) + +#============ Rate Control ====================== +RateControl : 0 # Rate control: enable rate control +TargetBitrate : 1000000 # Rate control: target bitrate, in bps +KeepHierarchicalBit : 2 # Rate control: 0: equal bit allocation; 1: fixed ratio bit allocation; 2: adaptive ratio bit allocation +LCULevelRateControl : 1 # Rate control: 1: LCU level RC; 0: picture level RC +RCLCUSeparateModel : 1 # Rate control: use LCU level separate R-lambda model +InitialQP : 0 # Rate control: initial QP +RCForceIntraQP : 0 # Rate control: force intra QP to be equal to initial QP + +#============ VTM settings ====================== +LoopFilterTcOffset_div2 : 0 +SEIDecodedPictureHash : 0 +CbQpOffset : 1 +CrQpOffset : 1 + +ReWriteParamSets : 1 +#============ NEXT ==================== + +# General +CTUSize : 128 +LCTUFast : 1 + +DualITree : 1 # separate partitioning of luma and chroma channels for I-slices +MinQTLumaISlice : 8 +MinQTChromaISlice : 4 +MinQTNonISlice : 8 +MaxMTTHierarchyDepth : 3 +MaxMTTHierarchyDepthISliceL : 3 +MaxMTTHierarchyDepthISliceC : 3 + +MTS : 1 +MTSIntraMaxCand : 3 +MTSInterMaxCand : 4 +SBT : 1 +LFNST : 1 +ISP : 1 +MMVD : 1 +Affine : 1 +SubPuMvp : 1 +MaxNumMergeCand : 6 +LMChroma : 1 # use CCLM only +DepQuant : 1 +IMV : 1 +ALF : 1 +BCW : 1 +BcwFast : 1 +BIO : 1 +CIIP : 1 +Triangle : 1 +IBC : 0 # turned off in CTC +AllowDisFracMMVD : 1 +AffineAmvr : 1 +LMCSEnable : 1 # LMCS: 0: disable, 1:enable +LMCSSignalType : 0 # Input signal type: 0:SDR, 1:HDR-PQ, 2:HDR-HLG +LMCSUpdateCtrl : 0 # LMCS model update control: 0:RA, 1:AI, 2:LDB/LDP +MRL : 1 +MIP : 1 +DMVR : 1 +SMVD : 1 + +# Fast tools +PBIntraFast : 1 +ISPFast : 1 +FastMrg : 1 +AMaxBT : 1 +FastMIP : 0 +FastLFNST : 0 + +# Encoder optimization tools +AffineAmvrEncOpt : 1 +MmvdDisNum : 6 +### DO NOT ADD ANYTHING BELOW THIS LINE ### +### DO NOT DELETE THE EMPTY LINE BELOW ### + + + diff --git a/cfg/nonCTC-SliceConfigExamples/encoder_randomaccess_vtm_RectangularSliceFixedSize.cfg b/cfg/nonCTC-SliceConfigExamples/encoder_randomaccess_vtm_RectangularSliceFixedSize.cfg new file mode 100644 index 0000000000000000000000000000000000000000..9f29eb4d055d13f850e6e6b47573ca8e5d51250b --- /dev/null +++ b/cfg/nonCTC-SliceConfigExamples/encoder_randomaccess_vtm_RectangularSliceFixedSize.cfg @@ -0,0 +1,160 @@ +#======== File I/O ===================== +BitstreamFile : str.bin +ReconFile : rec.yuv + +#======== Profile ================ +Profile : auto + +#======== Unit definition ================ +MaxCUWidth : 64 # Maximum coding unit width in pixel +MaxCUHeight : 64 # Maximum coding unit height in pixel +MaxPartitionDepth : 4 # Maximum coding unit depth + +#======== Coding Structure ============= +IntraPeriod : 32 # Period of I-Frame ( -1 = only first) +DecodingRefreshType : 1 # Random Accesss 0:none, 1:CRA, 2:IDR, 3:Recovery Point SEI +GOPSize : 16 # GOP Size (number of B slice = GOPSize-1) + +IntraQPOffset : -3 +LambdaFromQpEnable : 1 # see JCTVC-X0038 for suitable parameters for IntraQPOffset, QPoffset, QPOffsetModelOff, QPOffsetModelScale when enabled +# Type POC QPoffset QPOffsetModelOff QPOffsetModelScale CbQPoffset CrQPoffset QPfactor tcOffsetDiv2 betaOffsetDiv2 temporal_id #ref_pics_active_L0 #ref_pics_L0 reference_pictures_L0 #ref_pics_active_L1 #ref_pics_L1 reference_pictures_L1 +Frame1: B 16 1 0.0 0.0 0 0 1.0 0 0 0 2 3 16 32 24 2 2 16 32 +Frame2: B 8 1 -4.8848 0.2061 0 0 1.0 0 0 1 2 2 8 16 2 2 -8 8 +Frame3: B 4 4 -5.7476 0.2286 0 0 1.0 0 0 2 2 2 4 12 2 2 -4 -12 +Frame4: B 2 5 -5.90 0.2333 0 0 1.0 0 0 3 2 2 2 10 2 3 -2 -6 -14 +Frame5: B 1 6 -7.1444 0.3 0 0 1.0 0 0 4 2 2 1 -1 2 4 -1 -3 -7 -15 +Frame6: B 3 6 -7.1444 0.3 0 0 1.0 0 0 4 2 2 1 3 2 3 -1 -5 -13 +Frame7: B 6 5 -5.90 0.2333 0 0 1.0 0 0 3 2 2 2 6 2 2 -2 -10 +Frame8: B 5 6 -7.1444 0.3 0 0 1.0 0 0 4 2 2 1 5 2 3 -1 -3 -11 +Frame9: B 7 6 -7.1444 0.3 0 0 1.0 0 0 4 2 3 1 3 7 2 2 -1 -9 +Frame10: B 12 4 -5.7476 0.2286 0 0 1.0 0 0 2 2 2 4 12 2 2 -4 4 +Frame11: B 10 5 -5.90 0.2333 0 0 1.0 0 0 3 2 2 2 10 2 2 -2 -6 +Frame12: B 9 6 -7.1444 0.3 0 0 1.0 0 0 4 2 2 1 9 2 3 -1 -3 -7 +Frame13: B 11 6 -7.1444 0.3 0 0 1.0 0 0 4 2 3 1 3 11 2 2 -1 -5 +Frame14: B 14 5 -5.90 0.2333 0 0 1.0 0 0 3 2 3 2 6 14 2 2 -2 2 +Frame15: B 13 6 -7.1444 0.3 0 0 1.0 0 0 4 2 3 1 5 13 2 2 -1 -3 +Frame16: B 15 6 -7.1444 0.3 0 0 1.0 0 0 4 2 4 1 3 7 15 2 2 -1 1 + +#=========== Motion Search ============= +FastSearch : 1 # 0:Full search 1:TZ search +SearchRange : 384 # (0: Search range is a Full frame) +ASR : 1 # Adaptive motion search range +MinSearchWindow : 96 # Minimum motion search window size for the adaptive window ME +BipredSearchRange : 4 # Search range for bi-prediction refinement +HadamardME : 1 # Use of hadamard measure for fractional ME +FEN : 1 # Fast encoder decision +FDM : 1 # Fast Decision for Merge RD cost + +#======== Quantization ============= +QP : 32 # Quantization parameter(0-51) +MaxDeltaQP : 0 # CU-based multi-QP optimization +MaxCuDQPSubdiv : 0 # Maximum subdiv for CU luma Qp adjustment +DeltaQpRD : 0 # Slice-based multi-QP optimization +RDOQ : 1 # RDOQ +RDOQTS : 1 # RDOQ for transform skip + +#=========== Deblock Filter ============ +LoopFilterOffsetInPPS : 1 # Dbl params: 0=varying params in SliceHeader, param = base_param + GOP_offset_param; 1 (default) =constant params in PPS, param = base_param) +LoopFilterDisable : 0 # Disable deblocking filter (0=Filter, 1=No Filter) +LoopFilterBetaOffset_div2 : 0 # base_param: -6 ~ 6 +LoopFilterTcOffset_div2 : 0 # base_param: -6 ~ 6 +DeblockingFilterMetric : 0 # blockiness metric (automatically configures deblocking parameters in bitstream). Applies slice-level loop filter offsets (LoopFilterOffsetInPPS and LoopFilterDisable must be 0) + +#=========== Misc. ============ +InternalBitDepth : 10 # codec operating bit-depth + +#=========== Coding Tools ================= +SAO : 1 # Sample adaptive offset (0: OFF, 1: ON) +TransformSkip : 1 # Transform skipping (0: OFF, 1: ON) +TransformSkipFast : 1 # Fast Transform skipping (0: OFF, 1: ON) +TransformSkipLog2MaxSize : 5 +SAOLcuBoundary : 0 # SAOLcuBoundary using non-deblocked pixels (0: OFF, 1: ON) + +#============ Tiles / Slices ================ +EnablePicPartitioning : 1 # Enable picture partitioning (0: single tile, single slice, 1: multiple tiles/slices can be used) + +# 24 tiles and 6 rectangular slices +TileColumnWidthArray : 3 3 3 3 3 3 # Tile column widths in units of CTUs. Last column width will be repeated uniformly to cover any remaining picture width +TileRowHeightArray : 3 3 3 3 # Tile row heights in units of CTUs. Last row height will be repeated uniformly to cover any remaining picture height +RasterScanSlices : 0 # Raster-scan or rectangular slices (0: rectangular, 1: raster-scan) +RectSliceFixedWidth : 2 # Fixed rectangular slice width in units of tiles (0: disable this feature and use RectSlicePositions instead) +RectSliceFixedHeight : 2 # Fixed rectangular slice height in units of tiles (0: disable this feature and use RectSlicePositions instead) +DisableLoopFilterAcrossTiles : 0 # Loop filtering (DBLK/SAO/ALF) applied across tile boundaries or not (0: filter across tile boundaries 1: do not filter across tile boundaries) +DisableLoopFilterAcrossSlices : 0 # Loop filtering (DBLK/SAO/ALF) applied across slice boundaries or not (0: filter across slice boundaries 1: do not filter across slice boundaries) + +#============ Rate Control ====================== +RateControl : 0 # Rate control: enable rate control +TargetBitrate : 1000000 # Rate control: target bitrate, in bps +KeepHierarchicalBit : 2 # Rate control: 0: equal bit allocation; 1: fixed ratio bit allocation; 2: adaptive ratio bit allocation +LCULevelRateControl : 1 # Rate control: 1: LCU level RC; 0: picture level RC +RCLCUSeparateModel : 1 # Rate control: use LCU level separate R-lambda model +InitialQP : 0 # Rate control: initial QP +RCForceIntraQP : 0 # Rate control: force intra QP to be equal to initial QP + +#============ VTM settings ====================== +LoopFilterTcOffset_div2 : 0 +SEIDecodedPictureHash : 0 +CbQpOffset : 1 +CrQpOffset : 1 + +ReWriteParamSets : 1 +#============ NEXT ==================== + +# General +CTUSize : 128 +LCTUFast : 1 + +DualITree : 1 # separate partitioning of luma and chroma channels for I-slices +MinQTLumaISlice : 8 +MinQTChromaISlice : 4 +MinQTNonISlice : 8 +MaxMTTHierarchyDepth : 3 +MaxMTTHierarchyDepthISliceL : 3 +MaxMTTHierarchyDepthISliceC : 3 + +MTS : 1 +MTSIntraMaxCand : 3 +MTSInterMaxCand : 4 +SBT : 1 +LFNST : 1 +ISP : 1 +MMVD : 1 +Affine : 1 +SubPuMvp : 1 +MaxNumMergeCand : 6 +LMChroma : 1 # use CCLM only +DepQuant : 1 +IMV : 1 +ALF : 1 +BCW : 1 +BcwFast : 1 +BIO : 1 +CIIP : 1 +Triangle : 1 +IBC : 0 # turned off in CTC +AllowDisFracMMVD : 1 +AffineAmvr : 1 +LMCSEnable : 1 # LMCS: 0: disable, 1:enable +LMCSSignalType : 0 # Input signal type: 0:SDR, 1:HDR-PQ, 2:HDR-HLG +LMCSUpdateCtrl : 0 # LMCS model update control: 0:RA, 1:AI, 2:LDB/LDP +MRL : 1 +MIP : 1 +DMVR : 1 +SMVD : 1 + +# Fast tools +PBIntraFast : 1 +ISPFast : 1 +FastMrg : 1 +AMaxBT : 1 +FastMIP : 0 +FastLFNST : 0 + +# Encoder optimization tools +AffineAmvrEncOpt : 1 +MmvdDisNum : 6 +### DO NOT ADD ANYTHING BELOW THIS LINE ### +### DO NOT DELETE THE EMPTY LINE BELOW ### + + + diff --git a/cfg/nonCTC-SliceConfigExamples/encoder_randomaccess_vtm_SingleTilePerSlice.cfg b/cfg/nonCTC-SliceConfigExamples/encoder_randomaccess_vtm_SingleTilePerSlice.cfg new file mode 100644 index 0000000000000000000000000000000000000000..4f9a7857f0daa71a9d60e94f5848555e39cd23c8 --- /dev/null +++ b/cfg/nonCTC-SliceConfigExamples/encoder_randomaccess_vtm_SingleTilePerSlice.cfg @@ -0,0 +1,157 @@ +#======== File I/O ===================== +BitstreamFile : str.bin +ReconFile : rec.yuv + +#======== Profile ================ +Profile : auto + +#======== Unit definition ================ +MaxCUWidth : 64 # Maximum coding unit width in pixel +MaxCUHeight : 64 # Maximum coding unit height in pixel +MaxPartitionDepth : 4 # Maximum coding unit depth + +#======== Coding Structure ============= +IntraPeriod : 32 # Period of I-Frame ( -1 = only first) +DecodingRefreshType : 1 # Random Accesss 0:none, 1:CRA, 2:IDR, 3:Recovery Point SEI +GOPSize : 16 # GOP Size (number of B slice = GOPSize-1) + +IntraQPOffset : -3 +LambdaFromQpEnable : 1 # see JCTVC-X0038 for suitable parameters for IntraQPOffset, QPoffset, QPOffsetModelOff, QPOffsetModelScale when enabled +# Type POC QPoffset QPOffsetModelOff QPOffsetModelScale CbQPoffset CrQPoffset QPfactor tcOffsetDiv2 betaOffsetDiv2 temporal_id #ref_pics_active_L0 #ref_pics_L0 reference_pictures_L0 #ref_pics_active_L1 #ref_pics_L1 reference_pictures_L1 +Frame1: B 16 1 0.0 0.0 0 0 1.0 0 0 0 2 3 16 32 24 2 2 16 32 +Frame2: B 8 1 -4.8848 0.2061 0 0 1.0 0 0 1 2 2 8 16 2 2 -8 8 +Frame3: B 4 4 -5.7476 0.2286 0 0 1.0 0 0 2 2 2 4 12 2 2 -4 -12 +Frame4: B 2 5 -5.90 0.2333 0 0 1.0 0 0 3 2 2 2 10 2 3 -2 -6 -14 +Frame5: B 1 6 -7.1444 0.3 0 0 1.0 0 0 4 2 2 1 -1 2 4 -1 -3 -7 -15 +Frame6: B 3 6 -7.1444 0.3 0 0 1.0 0 0 4 2 2 1 3 2 3 -1 -5 -13 +Frame7: B 6 5 -5.90 0.2333 0 0 1.0 0 0 3 2 2 2 6 2 2 -2 -10 +Frame8: B 5 6 -7.1444 0.3 0 0 1.0 0 0 4 2 2 1 5 2 3 -1 -3 -11 +Frame9: B 7 6 -7.1444 0.3 0 0 1.0 0 0 4 2 3 1 3 7 2 2 -1 -9 +Frame10: B 12 4 -5.7476 0.2286 0 0 1.0 0 0 2 2 2 4 12 2 2 -4 4 +Frame11: B 10 5 -5.90 0.2333 0 0 1.0 0 0 3 2 2 2 10 2 2 -2 -6 +Frame12: B 9 6 -7.1444 0.3 0 0 1.0 0 0 4 2 2 1 9 2 3 -1 -3 -7 +Frame13: B 11 6 -7.1444 0.3 0 0 1.0 0 0 4 2 3 1 3 11 2 2 -1 -5 +Frame14: B 14 5 -5.90 0.2333 0 0 1.0 0 0 3 2 3 2 6 14 2 2 -2 2 +Frame15: B 13 6 -7.1444 0.3 0 0 1.0 0 0 4 2 3 1 5 13 2 2 -1 -3 +Frame16: B 15 6 -7.1444 0.3 0 0 1.0 0 0 4 2 4 1 3 7 15 2 2 -1 1 + +#=========== Motion Search ============= +FastSearch : 1 # 0:Full search 1:TZ search +SearchRange : 384 # (0: Search range is a Full frame) +ASR : 1 # Adaptive motion search range +MinSearchWindow : 96 # Minimum motion search window size for the adaptive window ME +BipredSearchRange : 4 # Search range for bi-prediction refinement +HadamardME : 1 # Use of hadamard measure for fractional ME +FEN : 1 # Fast encoder decision +FDM : 1 # Fast Decision for Merge RD cost + +#======== Quantization ============= +QP : 32 # Quantization parameter(0-51) +MaxDeltaQP : 0 # CU-based multi-QP optimization +MaxCuDQPSubdiv : 0 # Maximum subdiv for CU luma Qp adjustment +DeltaQpRD : 0 # Slice-based multi-QP optimization +RDOQ : 1 # RDOQ +RDOQTS : 1 # RDOQ for transform skip + +#=========== Deblock Filter ============ +LoopFilterOffsetInPPS : 1 # Dbl params: 0=varying params in SliceHeader, param = base_param + GOP_offset_param; 1 (default) =constant params in PPS, param = base_param) +LoopFilterDisable : 0 # Disable deblocking filter (0=Filter, 1=No Filter) +LoopFilterBetaOffset_div2 : 0 # base_param: -6 ~ 6 +LoopFilterTcOffset_div2 : 0 # base_param: -6 ~ 6 +DeblockingFilterMetric : 0 # blockiness metric (automatically configures deblocking parameters in bitstream). Applies slice-level loop filter offsets (LoopFilterOffsetInPPS and LoopFilterDisable must be 0) + +#=========== Misc. ============ +InternalBitDepth : 10 # codec operating bit-depth + +#=========== Coding Tools ================= +SAO : 1 # Sample adaptive offset (0: OFF, 1: ON) +TransformSkip : 1 # Transform skipping (0: OFF, 1: ON) +TransformSkipFast : 1 # Fast Transform skipping (0: OFF, 1: ON) +TransformSkipLog2MaxSize : 5 +SAOLcuBoundary : 0 # SAOLcuBoundary using non-deblocked pixels (0: OFF, 1: ON) + +#============ Tiles / Slices ================ +EnablePicPartitioning : 1 # Enable picture partitioning (0: single tile, single slice, 1: multiple tiles/slices can be used) +TileColumnWidthArray : 1 2 3 4 # Tile column widths in units of CTUs. Last column width will be repeated uniformly to cover any remaining picture width +TileRowHeightArray : 1 2 3 4 # Tile row heights in units of CTUs. Last row height will be repeated uniformly to cover any remaining picture height +RasterScanSlices : 1 # Raster-scan or rectangular slices (0: rectangular, 1: raster-scan) +RasterSliceSizes : 1 # Raster-scan slice sizes in units of tiles. Last slice size will be repeated uniformly to cover any remaining tiles in the picture +DisableLoopFilterAcrossTiles : 0 # Loop filtering (DBLK/SAO/ALF) applied across tile boundaries or not (0: filter across tile boundaries 1: do not filter across tile boundaries) +DisableLoopFilterAcrossSlices : 0 # Loop filtering (DBLK/SAO/ALF) applied across slice boundaries or not (0: filter across slice boundaries 1: do not filter across slice boundaries) + +#============ Rate Control ====================== +RateControl : 0 # Rate control: enable rate control +TargetBitrate : 1000000 # Rate control: target bitrate, in bps +KeepHierarchicalBit : 2 # Rate control: 0: equal bit allocation; 1: fixed ratio bit allocation; 2: adaptive ratio bit allocation +LCULevelRateControl : 1 # Rate control: 1: LCU level RC; 0: picture level RC +RCLCUSeparateModel : 1 # Rate control: use LCU level separate R-lambda model +InitialQP : 0 # Rate control: initial QP +RCForceIntraQP : 0 # Rate control: force intra QP to be equal to initial QP + +#============ VTM settings ====================== +LoopFilterTcOffset_div2 : 0 +SEIDecodedPictureHash : 0 +CbQpOffset : 1 +CrQpOffset : 1 + +ReWriteParamSets : 1 +#============ NEXT ==================== + +# General +CTUSize : 128 +LCTUFast : 1 + +DualITree : 1 # separate partitioning of luma and chroma channels for I-slices +MinQTLumaISlice : 8 +MinQTChromaISlice : 4 +MinQTNonISlice : 8 +MaxMTTHierarchyDepth : 3 +MaxMTTHierarchyDepthISliceL : 3 +MaxMTTHierarchyDepthISliceC : 3 + +MTS : 1 +MTSIntraMaxCand : 3 +MTSInterMaxCand : 4 +SBT : 1 +LFNST : 1 +ISP : 1 +MMVD : 1 +Affine : 1 +SubPuMvp : 1 +MaxNumMergeCand : 6 +LMChroma : 1 # use CCLM only +DepQuant : 1 +IMV : 1 +ALF : 1 +BCW : 1 +BcwFast : 1 +BIO : 1 +CIIP : 1 +Triangle : 1 +IBC : 0 # turned off in CTC +AllowDisFracMMVD : 1 +AffineAmvr : 1 +LMCSEnable : 1 # LMCS: 0: disable, 1:enable +LMCSSignalType : 0 # Input signal type: 0:SDR, 1:HDR-PQ, 2:HDR-HLG +LMCSUpdateCtrl : 0 # LMCS model update control: 0:RA, 1:AI, 2:LDB/LDP +MRL : 1 +MIP : 1 +DMVR : 1 +SMVD : 1 + +# Fast tools +PBIntraFast : 1 +ISPFast : 1 +FastMrg : 1 +AMaxBT : 1 +FastMIP : 0 +FastLFNST : 0 + +# Encoder optimization tools +AffineAmvrEncOpt : 1 +MmvdDisNum : 6 +### DO NOT ADD ANYTHING BELOW THIS LINE ### +### DO NOT DELETE THE EMPTY LINE BELOW ### + + + diff --git a/cfg/per-class/classF.cfg b/cfg/per-class/classF.cfg index 52ae3dfa2c608bae4325b0c19695db04bd0ce589..0edc6f8c15bcee5cdd03a22df2d25406613a7c33 100644 --- a/cfg/per-class/classF.cfg +++ b/cfg/per-class/classF.cfg @@ -1,3 +1,3 @@ IBC : 1 HashME : 1 - +BDPCM: 1 diff --git a/cfg/per-class/classH1.cfg b/cfg/per-class/classH1.cfg new file mode 100644 index 0000000000000000000000000000000000000000..a6b09ffa1f9acffa0b742fe305e391b30c4a5283 --- /dev/null +++ b/cfg/per-class/classH1.cfg @@ -0,0 +1,27 @@ +# ======== Luma adaptive QP ========== +LumaLevelToDeltaQPMode : 0 # Change luma delta QP based on average luma +isSDR : 0 # 1: SDR in PQ container, 0: HDR + +# ======= LMCS ======================= +LMCSEnable : 1 # turned on in HDR CTC +LMCSSignalType : 1 # Input signal type: 0:SDR, 1:HDR-PQ, 2:HDR-HLG +LMCSOffset : 1 # chroma residual scaling offset + +#======== Chroma QP scale ============ +WCGPPSEnable : 0 # enable WCG Chroma scale + +CbQpOffset : 0 +CrQpOffset : 0 + +SameCQPTablesForAllChroma : 0 +QpInValCb : 13 20 36 38 43 54 +QpOutValCb : 13 21 29 29 32 37 +QpInValCr : 13 20 37 41 44 54 +QpOutValCr : 13 21 27 29 32 37 +QpInValCbCr : 12 21 41 43 54 +QpOutValCbCr : 12 22 30 32 37 + +VerCollocatedChroma : 1 + +#======== HDR Metrics ============ +CalculateHdrMetrics : 1 # Calculate HDR metrics for Class H1 (PQ) content diff --git a/cfg/per-class/classH2.cfg b/cfg/per-class/classH2.cfg new file mode 100644 index 0000000000000000000000000000000000000000..1792d27b2a577d359a67badd4e571c5cd507a3be --- /dev/null +++ b/cfg/per-class/classH2.cfg @@ -0,0 +1,16 @@ +# ======== Luma adaptive QP ======== +LumaLevelToDeltaQPMode : 0 # Change luma delta QP based on average luma + +# ======= LMCS ======================= +LMCSEnable : 1 # turned on in HLG CTC +LMCSSignalType : 2 # Input signal type: 0:SDR, 1:HDR-PQ, 2:HDR-HLG +LMCSOffset : 0 # chroma residual scaling offset + +#======== Chroma QP scale ============ +WCGPPSEnable : 0 # enable WCG Chroma scale + +CbQpOffset : 0 +CrQpOffset : 0 +SameCQPTablesForAllChroma : 1 +QpInValCb : 9 23 33 42 +QpOutValCb : 9 24 33 37 diff --git a/cfg/per-sequence-HDR/H1_BalloonFestival.cfg b/cfg/per-sequence-HDR/H1_BalloonFestival.cfg index 150521a880c2e700e4a085dbf1c7944eecef4044..ad5aa2663a0a8ca924b56e382db9bd04ebee9abc 100644 --- a/cfg/per-sequence-HDR/H1_BalloonFestival.cfg +++ b/cfg/per-sequence-HDR/H1_BalloonFestival.cfg @@ -9,15 +9,3 @@ SourceHeight : 1080 # Input frame height FramesToBeEncoded : 240 # Number of frames to be coded Level : 4.1 - -#======== Chroma QP scale ============= -WCGPPSEnable : 1 # enable WCG Chroma scale -WCGPPSEnable : 1 # enable WCG Chroma scale -WCGPPSChromaQpScale : -0.46 # Linear chroma QP offset mapping (scale) based on QP -WCGPPSChromaQpOffset : 9.26 # Linear chroma QP offset mapping (offset) based on QP -WCGPPSCbQpScale : 1.14 # Scale factor depending on capture and representation color space -WCGPPSCrQpScale : 1.79 # Scale factor depending on capture and representation color space - -# ======== Luma adaptive QP ======== -LumaLevelToDeltaQPMode : 1 # Change luma delta QP based on average luma -isSDR : 0 # 1: SDR in PQ container, 0: HDR diff --git a/cfg/per-sequence-HDR/H1_Cosmos1_TreeTrunk.cfg b/cfg/per-sequence-HDR/H1_Cosmos1_TreeTrunk.cfg index 540461fa8e9a2dc715e746f3616f3bef47b5e7ef..47162c8d732f10c609199d172b1a0cb2fa0657f8 100644 --- a/cfg/per-sequence-HDR/H1_Cosmos1_TreeTrunk.cfg +++ b/cfg/per-sequence-HDR/H1_Cosmos1_TreeTrunk.cfg @@ -11,12 +11,5 @@ FramesToBeEncoded : 240 # Number of frames to be coded Level : 4.1 #======== Chroma QP scale ============= -WCGPPSEnable : 1 # enable WCG Chroma scale -WCGPPSChromaQpScale : -0.46 # Linear chroma QP offset mapping (scale) based on QP -WCGPPSChromaQpOffset : 9.26 # Linear chroma QP offset mapping (offset) based on QP WCGPPSCbQpScale : 1.04 # Scale factor depending on capture and representation color space WCGPPSCrQpScale : 1.39 # Scale factor depending on capture and representation color space - -# ======== Luma adaptive QP ======== -LumaLevelToDeltaQPMode : 1 # Change luma delta QP based on average luma -isSDR : 0 # 1: SDR in PQ container, 0: HDR diff --git a/cfg/per-sequence-HDR/H1_EBU_Hurdles.cfg b/cfg/per-sequence-HDR/H1_EBU_Hurdles.cfg index 4d1f44ef8932ffdc459d96074fa26b882a39db8b..0f0b8e4f14b575199c0e83de4055af98a4511889 100644 --- a/cfg/per-sequence-HDR/H1_EBU_Hurdles.cfg +++ b/cfg/per-sequence-HDR/H1_EBU_Hurdles.cfg @@ -9,14 +9,3 @@ SourceHeight : 1080 # Input frame height FramesToBeEncoded : 500 # Number of frames to be coded Level : 4.1 - -#======== Chroma QP scale ============= -WCGPPSEnable : 1 # enable WCG Chroma scale -WCGPPSChromaQpScale : -0.46 # Linear chroma QP offset mapping (scale) based on QP -WCGPPSChromaQpOffset : 9.26 # Linear chroma QP offset mapping (offset) based on QP -WCGPPSCbQpScale : 1.14 # Scale factor depending on capture and representation color space -WCGPPSCrQpScale : 1.79 # Scale factor depending on capture and representation color space - -# ======== Luma adaptive QP ======== -LumaLevelToDeltaQPMode : 1 # Change luma delta QP based on average luma -isSDR : 0 # 1: SDR in PQ container, 0: HDR diff --git a/cfg/per-sequence-HDR/H1_EBU_Starting.cfg b/cfg/per-sequence-HDR/H1_EBU_Starting.cfg index 34505213bf9a2b06f1136744183452662b8e5e60..701df4b7e6b522d0862f9de5b8a4f5b8a9f0c33d 100644 --- a/cfg/per-sequence-HDR/H1_EBU_Starting.cfg +++ b/cfg/per-sequence-HDR/H1_EBU_Starting.cfg @@ -9,14 +9,3 @@ SourceHeight : 1080 # Input frame height FramesToBeEncoded : 500 # Number of frames to be coded Level : 4.1 - -#======== Chroma QP scale ============= -WCGPPSEnable : 1 # enable WCG Chroma scale -WCGPPSChromaQpScale : -0.46 # Linear chroma QP offset mapping (scale) based on QP -WCGPPSChromaQpOffset : 9.26 # Linear chroma QP offset mapping (offset) based on QP -WCGPPSCbQpScale : 1.14 # Scale factor depending on capture and representation color space -WCGPPSCrQpScale : 1.79 # Scale factor depending on capture and representation color space - -# ======== Luma adaptive QP ======== -LumaLevelToDeltaQPMode : 1 # Change luma delta QP based on average luma -isSDR : 0 # 1: SDR in PQ container, 0: HDR diff --git a/cfg/per-sequence-HDR/H1_Market.cfg b/cfg/per-sequence-HDR/H1_Market.cfg index 56b955537bc3ca755beb5abfde571a49e846196e..c4675e6fd4af0ff5f20705ffcbfb34dccf324800 100644 --- a/cfg/per-sequence-HDR/H1_Market.cfg +++ b/cfg/per-sequence-HDR/H1_Market.cfg @@ -9,14 +9,3 @@ SourceHeight : 1080 # Input frame height FramesToBeEncoded : 400 # Number of frames to be coded Level : 4.1 - -#======== Chroma QP scale ============= -WCGPPSEnable : 1 # enable WCG Chroma scale -WCGPPSChromaQpScale : -0.46 # Linear chroma QP offset mapping (scale) based on QP -WCGPPSChromaQpOffset : 9.26 # Linear chroma QP offset mapping (offset) based on QP -WCGPPSCbQpScale : 1.14 # Scale factor depending on capture and representation color space -WCGPPSCrQpScale : 1.79 # Scale factor depending on capture and representation color space - -# ======== Luma adaptive QP ======== -LumaLevelToDeltaQPMode : 1 # Change luma delta QP based on average luma -isSDR : 0 # 1: SDR in PQ container, 0: HDR diff --git a/cfg/per-sequence-HDR/H1_ShowGirl.cfg b/cfg/per-sequence-HDR/H1_ShowGirl.cfg index 2a6ff3d3d22de6c6cfdac7ad8b8d545baa3d185f..cdda6c4c10d5f80c7a26e3c8d425a7d3cb0aeb79 100644 --- a/cfg/per-sequence-HDR/H1_ShowGirl.cfg +++ b/cfg/per-sequence-HDR/H1_ShowGirl.cfg @@ -11,12 +11,11 @@ FramesToBeEncoded : 339 # Number of frames to be coded Level : 4.1 #======== Chroma QP scale ============= -WCGPPSEnable : 1 # enable WCG Chroma scale -WCGPPSChromaQpScale : -0.46 # Linear chroma QP offset mapping (scale) based on QP -WCGPPSChromaQpOffset : 9.26 # Linear chroma QP offset mapping (offset) based on QP WCGPPSCbQpScale : 1.04 # Scale factor depending on capture and representation color space -WCGPPSCrQpScale : 1.39 # Scale factor depending on capture and representation color space +WCGPPSCrQpScale : 1.39 # Scale factor depending on capture and representation color space -# ======== Luma adaptive QP ======== -LumaLevelToDeltaQPMode : 1 # Change luma delta QP based on average luma -isSDR : 0 # 1: SDR in PQ container, 0: HDR +#======== HDR Metrics ============ +CropOffsetLeft : 10 +CropOffsetTop : 10 +CropOffsetRight : -10 +CropOffsetBottom : -10 diff --git a/cfg/per-sequence-HDR/H1_SunRise.cfg b/cfg/per-sequence-HDR/H1_SunRise.cfg index 31c70dfb7a9a7992b0e74b01e63257ba7f99d344..7f6bb5b28882d05a59c8595a44ece14da03bc735 100644 --- a/cfg/per-sequence-HDR/H1_SunRise.cfg +++ b/cfg/per-sequence-HDR/H1_SunRise.cfg @@ -9,14 +9,3 @@ SourceHeight : 1080 # Input frame height FramesToBeEncoded : 200 # Number of frames to be coded Level : 4.1 - -#======== Chroma QP scale ============= -WCGPPSEnable : 1 # enable WCG Chroma scale -WCGPPSChromaQpScale : -0.46 # Linear chroma QP offset mapping (scale) based on QP -WCGPPSChromaQpOffset : 9.26 # Linear chroma QP offset mapping (offset) based on QP -WCGPPSCbQpScale : 1.14 # Scale factor depending on capture and representation color space -WCGPPSCrQpScale : 1.79 # Scale factor depending on capture and representation color space - -# ======== Luma adaptive QP ======== -LumaLevelToDeltaQPMode : 1 # Change luma delta QP based on average luma -isSDR : 0 # 1: SDR in PQ container, 0: HDR diff --git a/cfg/per-sequence-HDR/H2_DayStreet.cfg b/cfg/per-sequence-HDR/H2_DayStreet.cfg index 58a92baa6581a3dbffad0f3fefc31d584c2d74af..99e77a0fe91e46fe518ce78c3cbeff3b64c8db3c 100644 --- a/cfg/per-sequence-HDR/H2_DayStreet.cfg +++ b/cfg/per-sequence-HDR/H2_DayStreet.cfg @@ -9,10 +9,3 @@ SourceHeight : 2160 # Input frame height FramesToBeEncoded : 300 # Number of frames to be coded Level : 5.1 - -#======== Chroma QP scale ============= -WCGPPSEnable : 0 # enable WCG Chroma scale - -# ======== Luma adaptive QP ======== -LumaLevelToDeltaQPMode : 0 # Change luma delta QP based on average luma - diff --git a/cfg/per-sequence-HDR/H2_DayStreet_C2.cfg b/cfg/per-sequence-HDR/H2_DayStreet_C2.cfg deleted file mode 100644 index 1ee6bf149bdcec1310f78583e87f916d484443c8..0000000000000000000000000000000000000000 --- a/cfg/per-sequence-HDR/H2_DayStreet_C2.cfg +++ /dev/null @@ -1,18 +0,0 @@ -#======== File I/O =============== -InputFile : DayStreet_3840x2160_60p_10bit_420_hlg.yuv -InputBitDepth : 10 # Input bitdepth -InputChromaFormat : 420 # Ratio of luminance to chrominance samples -FrameRate : 60 # Frame Rate per second -FrameSkip : 0 # Number of frames to be skipped in input -SourceWidth : 3840 # Input frame width -SourceHeight : 2160 # Input frame height -FramesToBeEncoded : 600 # Number of frames to be coded - -Level : 5.1 - -#======== Chroma QP scale ============= -WCGPPSEnable : 0 # enable WCG Chroma scale - -# ======== Luma adaptive QP ======== -LumaLevelToDeltaQPMode : 0 # Change luma delta QP based on average luma - diff --git a/cfg/per-sequence-HDR/H2_FlyingBirds2_C2.cfg b/cfg/per-sequence-HDR/H2_FlyingBirds2.cfg similarity index 73% rename from cfg/per-sequence-HDR/H2_FlyingBirds2_C2.cfg rename to cfg/per-sequence-HDR/H2_FlyingBirds2.cfg index f029fea88c93367b4fbf3dd38bc811e79c58cdba..18f8fb078878b7a313402f03ff5226e16d3f79c9 100644 --- a/cfg/per-sequence-HDR/H2_FlyingBirds2_C2.cfg +++ b/cfg/per-sequence-HDR/H2_FlyingBirds2.cfg @@ -9,10 +9,3 @@ SourceHeight : 2160 # Input frame height FramesToBeEncoded : 300 # Number of frames to be coded Level : 5.1 - -#======== Chroma QP scale ============= -WCGPPSEnable : 0 # enable WCG Chroma scale - -# ======== Luma adaptive QP ======== -LumaLevelToDeltaQPMode : 0 # Change luma delta QP based on average luma - diff --git a/cfg/per-sequence-HDR/H2_PeopleInShoppingCenter.cfg b/cfg/per-sequence-HDR/H2_PeopleInShoppingCenter.cfg index 67db9625ae551783cac173ccd3b542ab288ac4c2..9412ad6837f8765d4bfc722e1ad9d1fefa5b74fa 100644 --- a/cfg/per-sequence-HDR/H2_PeopleInShoppingCenter.cfg +++ b/cfg/per-sequence-HDR/H2_PeopleInShoppingCenter.cfg @@ -9,10 +9,3 @@ SourceHeight : 2160 # Input frame height FramesToBeEncoded : 300 # Number of frames to be coded Level : 5.1 - -#======== Chroma QP scale ============= -WCGPPSEnable : 0 # enable WCG Chroma scale - -# ======== Luma adaptive QP ======== -LumaLevelToDeltaQPMode : 0 # Change luma delta QP based on average luma - diff --git a/cfg/per-sequence-HDR/H2_PeopleInShoppingCenter_C2.cfg b/cfg/per-sequence-HDR/H2_PeopleInShoppingCenter_C2.cfg deleted file mode 100644 index 18f585160328263f3a6ea32188d5e8465490f70c..0000000000000000000000000000000000000000 --- a/cfg/per-sequence-HDR/H2_PeopleInShoppingCenter_C2.cfg +++ /dev/null @@ -1,18 +0,0 @@ -#======== File I/O =============== -InputFile : PeopleInShoppingCenter_3840x2160_60p_10bit_420_hlg.yuv -InputBitDepth : 10 # Input bitdepth -InputChromaFormat : 420 # Ratio of luminance to chrominance samples -FrameRate : 60 # Frame Rate per second -FrameSkip : 0 # Number of frames to be skipped in input -SourceWidth : 3840 # Input frame width -SourceHeight : 2160 # Input frame height -FramesToBeEncoded : 600 # Number of frames to be coded - -Level : 5.1 - -#======== Chroma QP scale ============= -WCGPPSEnable : 0 # enable WCG Chroma scale - -# ======== Luma adaptive QP ======== -LumaLevelToDeltaQPMode : 0 # Change luma delta QP based on average luma - diff --git a/cfg/per-sequence-HDR/H2_SunsetBeach2.cfg b/cfg/per-sequence-HDR/H2_SunsetBeach2.cfg index 50fa5c60946351a767b47dba980c6cb9987b86b3..c673c0e9e890295fb5af84cb8f0f08c69bab1dc0 100644 --- a/cfg/per-sequence-HDR/H2_SunsetBeach2.cfg +++ b/cfg/per-sequence-HDR/H2_SunsetBeach2.cfg @@ -9,9 +9,3 @@ SourceHeight : 2160 # Input frame height FramesToBeEncoded : 300 # Number of frames to be coded Level : 5.1 - -#======== Chroma QP scale ============= -WCGPPSEnable : 0 # enable WCG Chroma scale - -# ======== Luma adaptive QP ======== -LumaLevelToDeltaQPMode : 0 # Change luma delta QP based on average luma \ No newline at end of file diff --git a/cfg/per-sequence-HDR/H2_SunsetBeach2_C2.cfg b/cfg/per-sequence-HDR/H2_SunsetBeach2_C2.cfg deleted file mode 100644 index d85a664678cd714c9acf9e1f54312492228a6fc8..0000000000000000000000000000000000000000 --- a/cfg/per-sequence-HDR/H2_SunsetBeach2_C2.cfg +++ /dev/null @@ -1,18 +0,0 @@ -#======== File I/O =============== -InputFile : SunsetBeach2_3840x2160p_60_10b_HLG_420.yuv -InputBitDepth : 10 # Input bitdepth -InputChromaFormat : 420 # Ratio of luminance to chrominance samples -FrameRate : 60 # Frame Rate per second -FrameSkip : 0 # Number of frames to be skipped in input -SourceWidth : 3840 # Input frame width -SourceHeight : 2160 # Input frame height -FramesToBeEncoded : 600 # Number of frames to be coded - -Level : 5.1 - -#======== Chroma QP scale ============= -WCGPPSEnable : 0 # enable WCG Chroma scale - -# ======== Luma adaptive QP ======== -LumaLevelToDeltaQPMode : 0 # Change luma delta QP based on average luma - diff --git a/cfg/per-sequence/Robot_444.cfg b/cfg/per-sequence/Robot_444.cfg new file mode 100644 index 0000000000000000000000000000000000000000..e8495c94387ce92c117befb66b91275c3a60d939 --- /dev/null +++ b/cfg/per-sequence/Robot_444.cfg @@ -0,0 +1,11 @@ +#======== File I/O =============== +InputFile : sc_robot_1280x720_30_8bit_300_444.yuv +InputBitDepth : 8 # Input bitdepth +InputChromaFormat : 444 # Ratio of luminance to chrominance samples +FrameRate : 30 # Frame Rate per second +FrameSkip : 0 # Number of frames to be skipped in input +SourceWidth : 1280 # Input frame width +SourceHeight : 720 # Input frame height +FramesToBeEncoded : 300 # Number of frames to be coded + +Level : 6.2 diff --git a/cfg/per-sequence/Robot_RGB.cfg b/cfg/per-sequence/Robot_RGB.cfg new file mode 100644 index 0000000000000000000000000000000000000000..6fc4be981268717be82746b1119b1431680b034a --- /dev/null +++ b/cfg/per-sequence/Robot_RGB.cfg @@ -0,0 +1,14 @@ +#======== File I/O =============== +InputFile : sc_robot_1280x720_30_8bit_300.rgb +InputBitDepth : 8 # Input bitdepth +InputChromaFormat : 444 # Ratio of luminance to chrominance samples +FrameRate : 30 # Frame Rate per second +FrameSkip : 0 # Number of frames to be skipped in input +SourceWidth : 1280 # Input frame width +SourceHeight : 720 # Input frame height +FramesToBeEncoded : 300 # Number of frames to be coded +InputColourSpaceConvert : RGBtoGBR # Non-normative colour space conversion to apply to input video +SNRInternalColourSpace : 1 # Evaluate SNRs in GBR order +OutputInternalColourSpace : 0 # Convert recon output back to RGB order. Use --OutputColourSpaceConvert GBRtoRGB on decoder to produce a matching output file. + +Level : 6.2 diff --git a/cfg/rpr/scale1.5x.cfg b/cfg/rpr/scale1.5x.cfg new file mode 100644 index 0000000000000000000000000000000000000000..f9733d0dbc11f436fcc50799d8abc7f91b2238d5 --- /dev/null +++ b/cfg/rpr/scale1.5x.cfg @@ -0,0 +1,6 @@ +# Reference picture resampling CE settings for scaling ratio and number of encoded frames + +ScalingRatioHor : 1.5 +ScalingRatioVer : 1.5 +FractionNumFrames : 0.5 +UpscaledOutput : 1 \ No newline at end of file diff --git a/cfg/rpr/scale2.0x.cfg b/cfg/rpr/scale2.0x.cfg new file mode 100644 index 0000000000000000000000000000000000000000..f0b9a6e92b924879d7fc5dfbe3be46c409c30f5e --- /dev/null +++ b/cfg/rpr/scale2.0x.cfg @@ -0,0 +1,6 @@ +# Reference picture resampling CE settings for scaling ratio and number of encoded frames + +ScalingRatioHor : 2.0 +ScalingRatioVer : 2.0 +FractionNumFrames : 0.5 +UpscaledOutput : 1 diff --git a/cfg/sei_vui/alternative_transfer_characteristics.cfg b/cfg/sei_vui/alternative_transfer_characteristics.cfg new file mode 100644 index 0000000000000000000000000000000000000000..8b72d975f659d2bb076ccbe34d300528e7d8c8df --- /dev/null +++ b/cfg/sei_vui/alternative_transfer_characteristics.cfg @@ -0,0 +1,2 @@ +#======== Alternative transfer characteristics SEI message ===================== +SEIPreferredTransferCharacterisics : 18 diff --git a/cfg/sei_vui/ambient_viewing_environment.cfg b/cfg/sei_vui/ambient_viewing_environment.cfg new file mode 100644 index 0000000000000000000000000000000000000000..9ac5c6a24a30a4bd90f5f000ccc88344ed842c84 --- /dev/null +++ b/cfg/sei_vui/ambient_viewing_environment.cfg @@ -0,0 +1,5 @@ +#======== Ambient viewing environment SEI message ===================== +SEIAVEEnabled : 1 +SEIAVEAmbientIlluminance : 100000 +SEIAVEAmbientLightX : 15635 +SEIAVEAmbientLightY : 16450 diff --git a/cfg/sei_vui/content_colour_volume.cfg b/cfg/sei_vui/content_colour_volume.cfg new file mode 100644 index 0000000000000000000000000000000000000000..b4ea22a809c0d2dc7f22c53a99b49abf86b55e80 --- /dev/null +++ b/cfg/sei_vui/content_colour_volume.cfg @@ -0,0 +1,17 @@ +#======== Content Colour Volume SEI message ===================== +SEICCVEnabled : 1 +SEICCVCancelFlag : 0 +SEICCVPersistenceFlag : 1 +SEICCVPrimariesPresent : 1 +m_ccvSEIPrimariesX0 : 0.300 +m_ccvSEIPrimariesY0 : 0.600 +m_ccvSEIPrimariesX1 : 0.150 +m_ccvSEIPrimariesY1 : 0.060 +m_ccvSEIPrimariesX2 : 0.640 +m_ccvSEIPrimariesY2 : 0.330 +SEICCVMinLuminanceValuePresent : 1 +SEICCVMinLuminanceValue : 0.0 +SEICCVMaxLuminanceValuePresent : 1 +SEICCVMaxLuminanceValue : 0.1 +SEICCVAvgLuminanceValuePresent : 1 +SEICCVAvgLuminanceValue : 0.01 diff --git a/cfg/sei_vui/content_light_level.cfg b/cfg/sei_vui/content_light_level.cfg new file mode 100644 index 0000000000000000000000000000000000000000..97129b5d56eedd7b44dab7cdf50c5c5b7dc01a5d --- /dev/null +++ b/cfg/sei_vui/content_light_level.cfg @@ -0,0 +1,4 @@ +#======== Content Light Level SEI message ===================== +SEICLLEnabled : 1 +SEICLLMaxContentLightLevel : 4000 +SEICLLMaxPicAvgLightLevel : 0 diff --git a/cfg/sei_vui/equirectangular.cfg b/cfg/sei_vui/equirectangular.cfg new file mode 100755 index 0000000000000000000000000000000000000000..c448477dded8aed2ed548d9d912225f73bc1adce --- /dev/null +++ b/cfg/sei_vui/equirectangular.cfg @@ -0,0 +1,9 @@ +#======== Equirectangular Projection SEI message ===================== +SEIErpEnabled : 1 +SEIErpCancelFlag : 0 +SEIErpPersistenceFlag : 1 +SEIErpGuardBandFlag : 1 +SEIErpGuardBandType : 0 +SEIErpLeftGuardBandWidth : 254 +SEIErpRightGuardBandWidth : 254 + diff --git a/cfg/sei_vui/film_grain_characterstics.cfg b/cfg/sei_vui/film_grain_characterstics.cfg new file mode 100644 index 0000000000000000000000000000000000000000..d8c9c739fc3c7d9597fad4246fe61c879d25a823 --- /dev/null +++ b/cfg/sei_vui/film_grain_characterstics.cfg @@ -0,0 +1,11 @@ +#======== Film grain characteristics SEI message ===================== +SEIFGCEnabled : 1 +SEIFGCCancelFlag : 0 +SEIFGCPersistenceFlag : 1 +SEIFGCModelID : 0 # 0: frequency filtering; 1: auto-regression; 2-3 are reserved +SEIFGCSepColourDescPresentFlag : 0 # if not 0, need to specify separate colour description (not implemented in current encoder cmd line) +SEIFGCBlendingModeID : 0 # 0: additive; 1: multipliciative +SEIFGCLog2ScaleFactor : 0 +SEIFGCCompModelPresentComp0 : 0 # if not 0, need to specify model for comp 0 (not implemented in current encoder cmd line) +SEIFGCCompModelPresentComp1 : 0 # if not 0, need to specify model for comp 1 (not implemented in current encoder cmd line) +SEIFGCCompModelPresentComp2 : 0 # if not 0, need to specify model for comp 2 (not implemented in current encoder cmd line) diff --git a/cfg/sei_vui/frame_packing.cfg b/cfg/sei_vui/frame_packing.cfg new file mode 100644 index 0000000000000000000000000000000000000000..0a8406dcfcb507d7099067121717f2dc6a25a860 --- /dev/null +++ b/cfg/sei_vui/frame_packing.cfg @@ -0,0 +1,6 @@ +#======== Frame Packing SEI message ===================== +SEIFramePacking : 1 +SEIFramePackingId : 0 +SEIFramePackingType : 3 +SEIFramePackingQuincunx : 1 +SEIFramePackingInterpretation : 0 diff --git a/cfg/sei_vui/generalized_cubemap_projection.cfg b/cfg/sei_vui/generalized_cubemap_projection.cfg new file mode 100644 index 0000000000000000000000000000000000000000..82cd6d8a491d7df90875bf3cd40bd1b74cce9a83 --- /dev/null +++ b/cfg/sei_vui/generalized_cubemap_projection.cfg @@ -0,0 +1,16 @@ +#======== Generalized Cubemap Projection SEI message ===================== +SEIGcmpEnabled : 1 +SEIGcmpCancelFlag : 0 +SEIGcmpPersistenceFlag : 1 +SEIGcmpPackingType : 2 +SEIGcmpMappingFunctionType : 2 +SEIGcmpFaceIndex : 5 0 4 3 1 2 +SEIGcmpFaceRotation : 2 2 2 2 2 2 +SEIGcmpFunctionCoeffU : 0.28 0.28 0.28 0.28 0.28 0.28 +SEIGcmpFunctionUAffectedByVFlag : 0 0 0 0 0 0 +SEIGcmpFunctionCoeffV : 0.4 0.4 0.4 0.28 0.4 0.28 +SEIGcmpFunctionVAffectedByUFlag : 1 1 1 0 1 0 +SEIGcmpGuardBandFlag : 1 +SEIGcmpGuardBandBoundaryType : 1 +SEIGcmpGuardBandSamplesMinus1 : 15 + diff --git a/cfg/sei_vui/mastering_display_colour_volume.cfg b/cfg/sei_vui/mastering_display_colour_volume.cfg new file mode 100644 index 0000000000000000000000000000000000000000..4566d40061a53fd4f8cbaee4eabe44176f0f2584 --- /dev/null +++ b/cfg/sei_vui/mastering_display_colour_volume.cfg @@ -0,0 +1,6 @@ +#======== Mastering Display Colour Volume SEI message ===================== +SEIMasteringDisplayColourVolume : 1 +SEIMasteringDisplayMaxLuminance : 10000 +SEIMasteringDisplayMinLuminance : 0 +SEIMasteringDisplayPrimaries : 0 50000 0 0 50000 0 +SEIMasteringDisplayWhitePoint : 16667 16667 diff --git a/cfg/sei_vui/omni_viewport.cfg b/cfg/sei_vui/omni_viewport.cfg new file mode 100755 index 0000000000000000000000000000000000000000..8a61f4db61a7b44ff918c522c0571fc8387d7814 --- /dev/null +++ b/cfg/sei_vui/omni_viewport.cfg @@ -0,0 +1,12 @@ +#======== Omni Viewport SEI message ===================== +SEIOmniViewportEnabled : 1 +SEIOmniViewportId : 0 +SEIOmniViewportCancelFlag : 0 +SEIOmniViewportPersistenceFlag : 1 +SEIOmniViewportCntMinus1 : 2 +SEIOmniViewportAzimuthCentre : -5898240 5898240 0 +SEIOmniViewportElevationCentre : -5898240 5898240 0 +SEIOmniViewportTiltCentre : -11796480 5898240 0 +SEIOmniViewportHorRange : 2949120 2949120 2949120 +SEIOmniViewportVerRange : 2949120 2949120 2949120 + diff --git a/cfg/sei_vui/region_wise_packing.cfg b/cfg/sei_vui/region_wise_packing.cfg new file mode 100755 index 0000000000000000000000000000000000000000..7f04516dfccb58497fa687ce0b28483eefa0d110 --- /dev/null +++ b/cfg/sei_vui/region_wise_packing.cfg @@ -0,0 +1,27 @@ +#======== Region-wise packing SEI message ===================== +SEIRwpEnabled : 1 +SEIRwpCancelFlag : 0 +SEIRwpPersistenceFlag : 1 +SEIRwpConstituentPictureMatchingFlag : 1 +SEIRwpNumPackedRegions : 6 +SEIRwpProjPictureWidth : 2048 +SEIRwpProjPictureHeight : 1024 +SEIRwpPackedPictureWidth : 1536 +SEIRwpPackedPictureHeight : 768 +SEIRwpTransformType : 0 1 2 3 4 5 +SEIRwpGuardBandFlag : 1 1 1 1 1 1 +SEIRwpProjRegionWidth : 512 512 512 512 512 512 +SEIRwpProjRegionHeight : 128 128 128 128 128 128 +SEIRwpProjRegionTop : 256 256 256 256 256 256 +SEIRwpProjRegionLeft : 0 256 512 768 1024 1280 +SEIRwpPackedRegionWidth : 512 512 512 384 384 384 +SEIRwpPackedRegionHeight : 128 128 128 192 192 192 +SEIRwpPackedRegionTop : 64 64 64 64 64 64 +SEIRwpPackedRegionLeft : 32 32 32 32 32 32 +SEIRwpLeftGuardBandWidth : 64 64 64 64 64 64 +SEIRwpRightGuardBandWidth : 128 128 128 128 128 128 +SEIRwpTopGuardBandHeight : 192 192 192 192 192 192 +SEIRwpBottomGuardBandHeight : 255 255 255 255 255 255 +SEIRwpGuardBandNotUsedForPredFlag : 1 1 1 1 1 1 +SEIRwpGuardBandType : 0 0 0 0 1 1 1 1 2 2 2 2 3 3 3 3 0 0 0 0 1 1 1 1 + diff --git a/cfg/sei_vui/sample_aspect_ratio.cfg b/cfg/sei_vui/sample_aspect_ratio.cfg new file mode 100755 index 0000000000000000000000000000000000000000..b33547473cf6062f619b684f5061293c9cc1b15f --- /dev/null +++ b/cfg/sei_vui/sample_aspect_ratio.cfg @@ -0,0 +1,6 @@ +SEISampleAspectRatioInfo: 1 +SEISARICancelFlag: 0 +SEISARIPersistenceFlag: 1 +SEISARIAspectRatioIdc: 255 +SEISARISarWidth: 1 +SEISARISarHeight: 1 diff --git a/cfg/sei_vui/sphere_rotation.cfg b/cfg/sei_vui/sphere_rotation.cfg new file mode 100755 index 0000000000000000000000000000000000000000..be144e63b058a18465ad0c6a469ab8f0a2074d73 --- /dev/null +++ b/cfg/sei_vui/sphere_rotation.cfg @@ -0,0 +1,8 @@ +#======== Sphere Rotation SEI message ===================== +SEISphereRotationEnabled : 1 +SEISphereRotationCancelFlag : 0 +SEISphereRotationPersistenceFlag : 1 +SEISphereRotationYaw : -5898240 +SEISphereRotationPitch : -5898240 +SEISphereRotationRoll : -11796480 + diff --git a/cfg/sei_vui/subpicture_level.cfg b/cfg/sei_vui/subpicture_level.cfg new file mode 100755 index 0000000000000000000000000000000000000000..46a3dce7aa0fcf354ec5a6a973acdfe47d6c2161 --- /dev/null +++ b/cfg/sei_vui/subpicture_level.cfg @@ -0,0 +1,2 @@ +SEISubpicureLevelInfo: 1 + diff --git a/cfg/sei_vui/timing.cfg b/cfg/sei_vui/timing.cfg new file mode 100755 index 0000000000000000000000000000000000000000..63f37a73919737867258e173f4c611176003d05d --- /dev/null +++ b/cfg/sei_vui/timing.cfg @@ -0,0 +1,4 @@ +SEIBufferingPeriod: 1 +SEIPictureTiming: 1 +VuiParametersPresent: 1 +RCCpbSize: 2000 diff --git a/cfg/sei_vui/vui_HD.cfg b/cfg/sei_vui/vui_HD.cfg new file mode 100644 index 0000000000000000000000000000000000000000..6705b1da5f934e0d1d41b4d9bf627a1af0b97073 --- /dev/null +++ b/cfg/sei_vui/vui_HD.cfg @@ -0,0 +1,17 @@ +VuiParametersPresent: 1 // enable VUI +AspectRatioInfoPresent: 1 // enable presence of sample aspect ratio information +AspectRatioIdc: 1 // sample aspect ratio pre-defined types according to Rec. ITU-T H.273 | ISO/IEC 23091-2 +SarWidth: 1 // sample aspect ratio width, if AspectRatioIdc is equal to 255 +SarHeight: 1 // sample aspect ratio height, if AspectRatioIdc is equal to 255 +ColourDescriptionPresent: 1 // enable presence of colour description information +ColourPrimaries: 1 // the source colour primaries according to Rec. ITU-T H.273 | ISO/IEC 23091-2 +TransferCharacteristics: 1 // transfer characteristics function according to Rec. ITU-T H.273 | ISO/IEC 23091-2 +MatrixCoefficients: 1 // the formulae used in deriving luma and chroma signals acc. to Rec. ITU-T H.273 | ISO/IEC 23091-2 +VideoFullRange: 0 // scaling and offset values applied according to Rec. ITU-T H.273 | ISO/IEC 23091-2 +ChromaLocInfoPresent: 1 // enable presence of chroma location information +ChromaSampleLocTypeTopField: 0 // the location of chroma sample top field +ChromaSampleLocTypeBottomField: 0 // the location of chroma sample bottom field +ChromaSampleLocType: 0 // the location of chroma sample frame +OverscanInfoPresent: 1 // enable presence of overscan information +OverscanAppropriate: 0 // indicates if the cropped decoded pictures output are suitable for display using overscan + diff --git a/cfg/sei_vui/vui_UHD_PQ.cfg b/cfg/sei_vui/vui_UHD_PQ.cfg new file mode 100644 index 0000000000000000000000000000000000000000..717815029e845436e2f792e9b0d6015c9014e610 --- /dev/null +++ b/cfg/sei_vui/vui_UHD_PQ.cfg @@ -0,0 +1,17 @@ +VuiParametersPresent: 1 // enable VUI +AspectRatioInfoPresent: 1 // enable presence of sample aspect ratio information +AspectRatioIdc: 1 // sample aspect ratio pre-defined types according to Rec. ITU-T H.273 | ISO/IEC 23091-2 +SarWidth: 1 // sample aspect ratio width, if AspectRatioIdc is equal to 255 +SarHeight: 1 // sample aspect ratio height, if AspectRatioIdc is equal to 255 +ColourDescriptionPresent: 1 // enable presence of colour description information +ColourPrimaries: 9 // the source colour primaries according to Rec. ITU-T H.273 | ISO/IEC 23091-2 +TransferCharacteristics: 16 // transfer characteristics function according to Rec. ITU-T H.273 | ISO/IEC 23091-2 +MatrixCoefficients: 9 // the formulae used in deriving luma and chroma signals acc. to Rec. ITU-T H.273 | ISO/IEC 23091-2 +VideoFullRange: 0 // scaling and offset values applied according to Rec. ITU-T H.273 | ISO/IEC 23091-2 +ChromaLocInfoPresent: 1 // enable presence of chroma location information +ChromaSampleLocTypeTopField: 2 // the location of chroma sample top field +ChromaSampleLocTypeBottomField: 2 // the location of chroma sample bottom field +ChromaSampleLocType: 2 // the location of chroma sample frame +OverscanInfoPresent: 1 // enable presence of overscan information +OverscanAppropriate: 0 // indicates if the cropped decoded pictures output are suitable for display using overscan + diff --git a/cmake/CMakeBuild/bin/pyhhi/build/app/cmk.py b/cmake/CMakeBuild/bin/pyhhi/build/app/cmk.py index eb351881c9571586e19549ccacfbbf37fb571abc..f231c70c4f0eb392f86cc0daab632ea994a3dc5a 100755 --- a/cmake/CMakeBuild/bin/pyhhi/build/app/cmk.py +++ b/cmake/CMakeBuild/bin/pyhhi/build/app/cmk.py @@ -11,6 +11,7 @@ import sys import pyhhi.build.common.system as system import pyhhi.build.common.util as util import pyhhi.build.common.ver as ver +import pyhhi.build.common.bldtools as bldtools import pyhhi.build.cmksupp as cmksupp from pyhhi.build.common.bldtools import BuildScriptInstaller from pyhhi.build.common.error import InvalidCommandLineArgumentError @@ -24,7 +25,7 @@ class CMakeLauncherApp(object): self._cmake_launcher = None self._dict_generator_choice = {'linux': ['umake', 'ninja'], 'macosx': ['xcode', 'umake', 'ninja'], - 'windows': ['vs15', 'vs14', 'vs12', 'vs11', 'vs10', 'umake', 'mgwmake', 'ninja']} + 'windows': ['vs16', 'vs15', 'vs14', 'vs12', 'vs11', 'vs10', 'umake', 'mgwmake', 'ninja']} self._top_dir = None self._cmake_mod_list = ['pyhhi.build.app.cmk', 'pyhhi.build.cmkfnd', @@ -93,14 +94,14 @@ class CMakeLauncherApp(object): %(prog)s [options] [variant=debug,release,relwithdebinfo,minsizerel] [link=static,shared] [toolset=<toolset_spec>] [address-model=32] %(prog)s is a script front end to cmake to simplify its usage on Linux, -Windows, MacOSX using cmake's generators "Unix Makefiles", "Xcode" and -"Visual Studio 15 - Visual Studio 10" and its compilers. +Windows, MacOSX using cmake's generators "Unix Makefiles", "Ninja", "Xcode" and +"Visual Studio 16 - Visual Studio 10" and its compilers. arguments: variant: debug if not specified link: static if not specified toolset: default c++ compiler if not specified - examples/windows: msvc-19.13, msvc-19.0, msvc-18.0, msvc-17.0, msvc-16.0, intel, gcc + examples/windows: msvc-19.1x, msvc-19.0, msvc-18.0, msvc-17.0, msvc-16.0, intel, gcc examples/linux: gcc-4.9, gcc-5, gcc-6, clang, intel address-model=32: windows: builds 32 bit binaries instead of 64 bit binaries @@ -131,7 +132,7 @@ usage examples: parser.add_argument("-g", "-G", action="store", dest="generator", choices=self._dict_generator_choice[self._sys_info.get_platform()], help="""specify a cmake generator the script has special support for. - Supported generators: ninja, umake, mgwmake, vs15, vs14, vs12, vs11, vs10, xcode. + Supported generators: ninja, umake, mgwmake, vs16, vs15, vs14, vs12, vs11, vs10, xcode. The choices accepted are platform and installation dependent. The environment variable DEFAULT_CMAKE_GENERATOR may be used to override the default value.""") @@ -155,13 +156,14 @@ usage examples: parser.add_argument("--clean-first", action="store_true", dest="clean_first", default=False, help="build target clean first, then build the active target.") - parser.add_argument("--verbosity", action="store", dest="build_verbosity", choices=['quiet', 'minimal', 'normal', 'detailed', 'diagnostic'], default='minimal', - help="specify msbuild verbosity level [default: %(default)s].") + parser.add_argument("--verbosity", action="store", dest="build_verbosity", choices=['cmake', 'quiet', 'minimal', 'normal', 'detailed', 'diagnostic'], default='minimal', + help="""specify (ms)build verbosity level [default: %(default)s]. + The choice 'cmake' requires cmake 3.14.x or higher to increase build verbosity for Visual Studio and other generators.""") util.app_args_add_log_level(parser) g = parser.add_argument_group("advanced options") - g.add_argument("-i", action="store", dest="install_dir", nargs='?', const=os.path.join(self._sys_info.get_home_dir(), 'bin'), + g.add_argument("-i", action="store", dest="install_dir", nargs='?', const=os.path.join(self._sys_info.get_home_dir(native=True), 'bin'), help="install this script and exit. The default destination directory is %(const)s.") g.add_argument("--py-cache-clean", action="store", dest="py_cache_dirs", nargs='+', @@ -282,11 +284,26 @@ usage examples: # looks like a cross compiler specification which requires a toolchain file matching the toolset spec and the linux system. toolset_spec_norm = self._find_toolchain_file(toolset_spec_norm) elif self._sys_info.is_windows(): - # msvc-19.00 -> normalized to 19.0 - re_match = re.match(r'msvc-(\d+)\.(\d+)', toolset_spec) - if re_match: - minor_version = int(re_match.group(2)) - toolset_spec_norm = "msvc-{0}.{1:d}".format(re_match.group(1), minor_version) + if toolset_spec.startswith('msvc-'): + msvc_registry = bldtools.MsvcRegistry() + if toolset_spec == 'msvc-19.2x': + if msvc_registry.is_version_installed((14, 2)): + cl_version = msvc_registry.get_compiler_version((14, 2)) + toolset_spec_norm = "msvc-{0:d}.{1:d}".format(cl_version[0], cl_version[1]) + else: + raise InvalidCommandLineArgumentError("toolset={} not available.".format(toolset_spec)) + elif toolset_spec == 'msvc-19.1x': + if msvc_registry.is_version_installed((14, 1)): + cl_version = msvc_registry.get_compiler_version((14, 1)) + toolset_spec_norm = "msvc-{0:d}.{1:d}".format(cl_version[0], cl_version[1]) + else: + raise InvalidCommandLineArgumentError("toolset={} not available.".format(toolset_spec)) + else: + # msvc-19.00 -> normalized to 19.0 + re_match = re.match(r'msvc-(\d+)\.(\d+)', toolset_spec) + if re_match: + minor_version = int(re_match.group(2)) + toolset_spec_norm = "msvc-{0}.{1:d}".format(re_match.group(1), minor_version) elif self._sys_info.is_macosx(): pass else: diff --git a/cmake/CMakeBuild/bin/pyhhi/build/cmksupp.py b/cmake/CMakeBuild/bin/pyhhi/build/cmksupp.py index 078586a78187fdc7b183989ee300e7ba80b86c3b..07754e6e03143caab41b35e0fe4987dce3b5a1c2 100755 --- a/cmake/CMakeBuild/bin/pyhhi/build/cmksupp.py +++ b/cmake/CMakeBuild/bin/pyhhi/build/cmksupp.py @@ -155,6 +155,7 @@ class CMakeLauncher(object): 'mgwmake': 'MinGW Makefiles', 'ninja': 'Ninja', 'xcode': 'Xcode', + 'vs16': 'Visual Studio 16 2019', 'vs15': 'Visual Studio 15 2017', 'vs14': 'Visual Studio 14 2015', 'vs12': 'Visual Studio 12 2013', @@ -178,13 +179,30 @@ class CMakeLauncher(object): 'vs12': ['msvc-18.0', 'msvc-17.0', 'msvc-16.0'], 'vs11': ['msvc-17.0', 'msvc-16.0'], 'vs10': ['msvc-16.0']} + # vs15 has not a fixed compiler version and therefore the mapping is generated dynamically. if self._msvc_registry.is_version_installed((14, 1)): cl_version = self._msvc_registry.get_compiler_version((14, 1)) msvc_str = 'msvc-' + ver.version_tuple_to_str(cl_version[:2]) if cl_version[1] < 20: self._dict_to_vs_platform_toolset[msvc_str] = 'v141' - self._dict_generator_alias_to_msvc_toolsets['vs15'] = [msvc_str, 'msvc-19.0', 'msvc-18.0', 'msvc-17.0', 'msvc-16.0'] + if not self._msvc_registry.is_vs2019_toolset((14, 1)): + self._dict_generator_alias_to_msvc_toolsets['vs15'] = [msvc_str, 'msvc-19.0', 'msvc-18.0', 'msvc-17.0', 'msvc-16.0'] + else: + assert False + + # vs16 has not a fixed compiler version and therefore the mapping is generated dynamically. + if self._msvc_registry.is_version_installed((14, 2)): + cl_version = self._msvc_registry.get_compiler_version((14, 2)) + msvc_str = 'msvc-' + ver.version_tuple_to_str(cl_version[:2]) + if cl_version[1] < 30: + self._dict_to_vs_platform_toolset[msvc_str] = 'v142' + msvc_version_list = [msvc_str] + if self._msvc_registry.is_version_installed((14, 1)): + cl_version = self._msvc_registry.get_compiler_version((14, 1)) + msvc_version_list.append("msvc-{0:d}.{1:d}".format(cl_version[0], cl_version[1])) + msvc_version_list.extend(['msvc-19.0', 'msvc-18.0', 'msvc-17.0', 'msvc-16.0']) + self._dict_generator_alias_to_msvc_toolsets['vs16'] = msvc_version_list else: assert False @@ -259,38 +277,43 @@ class CMakeLauncher(object): vs_toolset = "Intel C++ Compiler %d.%d" % (compiler_info.version_major_minor[0], compiler_info.version_major_minor[1]) else: assert False - cmake_argv = ['-G', self._dict_to_cmake_generator[generator_alias], - '-T', vs_toolset, - '-A', self._dict_to_vs_platform_name[compiler_info.target_arch]] + cmake_argv = ['-G', self._dict_to_cmake_generator[generator_alias]] + if generator_alias == 'vs16': + if ver.version_compare(compiler_info.version_major_minor, (19, 20)) < 0: + cmake_argv.extend(['-T', self._dict_to_vs_platform_toolset['msvc-' + ver.version_tuple_to_str(compiler_info.version_major_minor)]]) + if compiler_info.target_arch != 'x86_64': + cmake_argv.extend(['-A', self._dict_to_vs_platform_name[compiler_info.target_arch]]) + else: + cmake_argv.extend(['-T', vs_toolset, '-A', self._dict_to_vs_platform_name[compiler_info.target_arch]]) + elif generator_alias == 'xcode': cmake_argv = ['-G', self._dict_to_cmake_generator[generator_alias]] elif generator_alias in ['umake', 'mgwmake', 'ninja']: cmake_argv = ['-G', self._dict_to_cmake_generator[generator_alias], - '-DCMAKE_BUILD_TYPE=' + self._dict_to_cmake_config[cfg]] + '-DCMAKE_BUILD_TYPE:STRING=' + self._dict_to_cmake_config[cfg]] if compiler_info.is_cross_compiler(): - cmake_argv.append('-DCMAKE_TOOLCHAIN_FILE=' + compiler_info.cmake_toolchain_file) + cmake_argv.append('-DCMAKE_TOOLCHAIN_FILE:FILEPATH=' + compiler_info.cmake_toolchain_file) else: if compiler_info.cmake_cxx_compiler: - cmake_argv.append('-DCMAKE_CXX_COMPILER=' + compiler_info.cmake_cxx_compiler) + cmake_argv.append('-DCMAKE_CXX_COMPILER:FILEPATH=' + compiler_info.cmake_cxx_compiler) if compiler_info.cmake_c_compiler: - cmake_argv.append('-DCMAKE_C_COMPILER=' + compiler_info.cmake_c_compiler) + cmake_argv.append('-DCMAKE_C_COMPILER:FILEPATH=' + compiler_info.cmake_c_compiler) if cmake_argv_optional: # Add any additional arguments to the cmake command line. cmake_argv.extend(cmake_argv_optional) if lnk_variant == 'shared': - cmake_argv.append('-DBUILD_SHARED_LIBS=1') + cmake_argv.append('-DBUILD_SHARED_LIBS:BOOL=ON') if self._is_multi_configuration_generator(): cmake_config_types = [self._dict_to_cmake_config[x] for x in self._default_config_types] for b_cfg in build_configs: if b_cfg not in self._default_config_types: cmake_config_types.append(self._dict_to_cmake_config[b_cfg]) - cmake_argv.append('-DCMAKE_CONFIGURATION_TYPES=' + ';'.join(cmake_config_types)) + cmake_argv.append('-DCMAKE_CONFIGURATION_TYPES:STRING=' + ';'.join(cmake_config_types)) # cmake_argv.append(self._top_dir) # print("launch_config(): cmake_args", cmake_argv) # print("build dir:", b_dir) # print("top dir:", self._top_dir) - if (not self._sys_info.is_windows()) and (ver.version_compare(self._cmake_finder.get_cmake_version(), (3, 13, 0)) >= 0): - # Not done for windows yet avoiding potential issues with command line length limits. + if ver.version_compare(self._cmake_finder.get_cmake_version(), (3, 13, 0)) >= 0: cmake_argv.extend(['-S', self._top_dir, '-B', b_dir]) retv = self.launch_cmake(cmake_argv) else: @@ -410,9 +433,11 @@ class CMakeLauncher(object): elif self._sys_info.get_platform() == 'macosx': generator_alias = 'xcode' elif self._sys_info.get_platform() == 'windows': - # e.g. 14.1, 14.0, 12.0 etc. + # e.g. 14.2, 14.1, 14.0, 12.0 etc. bb_vs_latest_version = self._msvc_registry.get_latest_version() - if ver.version_compare(bb_vs_latest_version, (14,1)) == 0: + if ver.version_compare(bb_vs_latest_version, (14, 2)) == 0: + generator_alias = 'vs16' + elif ver.version_compare(bb_vs_latest_version, (14, 1)) == 0: generator_alias = 'vs15' else: generator_alias = 'vs' + str(bb_vs_latest_version[0]) @@ -565,8 +590,20 @@ class CMakeLauncher(object): self._add_cmake_build_tool_options(cmake_argv, ['-parallelizeTargets', '-jobs', str(build_jobs)]) def _add_cmake_build_verbosity_option(self, cmake_argv, generator_alias, verbosity_level): - if generator_alias.startswith('vs'): - self._add_cmake_build_tool_options(cmake_argv, ['/verbosity:' + verbosity_level]) + if verbosity_level == 'cmake': + cmake_version = self._cmake_finder.get_cmake_version() + if ver.version_compare(cmake_version, (3, 14)) >= 0: + # self._add_cmake_build_tool_options(cmake_argv, ['-v']) + # -v is a cmake option and not a build tool option and therefore + # it has to be inserted left of '--' + if '--' in cmake_argv: + index = cmake_argv.index('--') + cmake_argv.insert(index, '-v') + else: + cmake_argv.append('-v') + else: + if generator_alias.startswith('vs'): + self._add_cmake_build_tool_options(cmake_argv, ['/verbosity:' + verbosity_level]) def _add_cmake_build_tool_options(self, cmake_argv, build_tool_options): if not build_tool_options: diff --git a/cmake/CMakeBuild/bin/pyhhi/build/common/bldtools.py b/cmake/CMakeBuild/bin/pyhhi/build/common/bldtools.py index 7db8ae7f1e1f4497ae2203e7114d1ca0578d5c02..bbb6f70b925c50470b04c637fae4faa5f16371b0 100755 --- a/cmake/CMakeBuild/bin/pyhhi/build/common/bldtools.py +++ b/cmake/CMakeBuild/bin/pyhhi/build/common/bldtools.py @@ -23,33 +23,51 @@ class MsvcRegistry(object): def __init__(self): self._logger = logging.getLogger(__name__) self._sys_info = system.SystemInfo() - self._supported_msvc_versions = ['14.1', '14.0', '12.0', '11.0', '10.0'] + self._supported_msvc_versions = ['14.2', '14.1', '14.0', '12.0', '11.0', '10.0'] program_dir = self._sys_info.get_program_dir('x86') - self._msvc_install_dir_dict = {'14.1': [os.path.join(program_dir, "Microsoft Visual Studio", '2017', 'Enterprise', 'VC'), - os.path.join(program_dir, "Microsoft Visual Studio", '2017', 'Professional', 'VC'), - os.path.join(program_dir, "Microsoft Visual Studio", '2017', 'Community', 'VC')], - '14.0': [os.path.join(program_dir, "Microsoft Visual Studio 14.0", 'VC')], + # VS2019, VS2017 come with a locator tool vswhere to search for the installation directory. + # The dictionary _msvc_install_dir_dict will be augmented with keys 14.2 and 14.1 by method _do_inventory_vc14x(). + self._msvc_install_dir_dict = {'14.0': [os.path.join(program_dir, "Microsoft Visual Studio 14.0", 'VC')], '12.0': [os.path.join(program_dir, "Microsoft Visual Studio 12.0", 'VC')], '11.0': [os.path.join(program_dir, "Microsoft Visual Studio 11.0", 'VC')], '10.0': [os.path.join(program_dir, "Microsoft Visual Studio 10.0", 'VC')]} - # a list of sorted version tuples identifying the installed MSVC products self._installed_msvc_versions = [] # key = msvc_version, value = full path of vcvarsall.bat self._compiler_command_dict = {} - # key = msvc_version, value = options to be passed to the setup command; e.g. -vcvars_ver=14.0 + # key = msvc_version, value = options to be passed to the setup command; e.g. -vcvars_ver=14.0, -vcvars_ver=14.1x self._compiler_command_option_dict = {} # key = msvc_version, value = vc version self._compiler_version_dict = {} # key = msvc_version, value = True/False self._is_vs2017_toolset_dict = {} + # key = msvc_version, value = True/False; e.g. '14.1' -> True indicates 14.1 is an alternative toolset installed with vs2019. + self._is_vs2019_toolset_dict = {} + # clear information on alternative toolset upfront + for version in self._supported_msvc_versions: + self._is_vs2017_toolset_dict[version] = False + self._is_vs2019_toolset_dict[version] = False if self._logger.isEnabledFor(logging.DEBUG): self._logger.debug("performing in-depth VS inventory for debugging.") self._do_inventory_winreg() - # Update VS2017 installation paths via vswhere.exe - self._do_inventory_vc141() + vswhere = self._find_vswhere() + if vswhere: + # Update VS2019 installation paths via vswhere.exe + self._do_inventory_vc14x('14.2', vswhere) + # Update VS2017 installation paths via vswhere.exe + self._do_inventory_vc14x('14.1', vswhere) + else: + pass self._do_inventory() + self._dump_inventory() + + def _dump_inventory(self): + if self._logger.isEnabledFor(logging.DEBUG): + for version in self._installed_msvc_versions: + version_str = ver.version_tuple_to_str(version) + cl_version_str = ver.version_tuple_to_str(self._compiler_version_dict[version_str]) + self._logger.debug("found MSVC version {}, CL version {}, setup={}".format(version_str, cl_version_str, self._compiler_command_dict[version_str])) def get_compiler_command(self, version=None): if version is None: @@ -87,6 +105,14 @@ class MsvcRegistry(object): return self._is_vs2017_toolset_dict[version_str] return False + def is_vs2019_toolset(self, version): + if not self.is_version_installed(version): + return False + version_str = ver.version_tuple_to_str(version) + if version_str in self._is_vs2019_toolset_dict: + return self._is_vs2019_toolset_dict[version_str] + return False + def _do_inventory(self): for version in self._supported_msvc_versions: if version not in self._msvc_install_dir_dict: @@ -98,13 +124,18 @@ class MsvcRegistry(object): cl_cmd = self._find_cl_cmd(vc_dir, version) if cl_cmd: self._logger.debug("found VC compiler %s", cl_cmd) - if version in ['14.1']: + if version in ['14.2', '14.1']: setup_cmd = os.path.normpath(os.path.join(os.path.dirname(cl_cmd), '..', '..', '..', '..', '..', '..', 'Auxiliary', 'Build', 'vcvarsall.bat')) elif version in ['14.0']: if os.path.exists(os.path.join(vc_dir, '..', 'Common7', 'IDE', 'devenv.exe')): self._logger.debug("found VS 2015 IDE installed.") setup_cmd = os.path.join(vc_dir, 'vcvarsall.bat') - self._is_vs2017_toolset_dict[version] = False + elif '14.2' in self._compiler_command_dict: + # We've got 14.0 as an alternative VS 2019 toolset. + self._logger.debug("found msvc-14.0 installed as an alternative VS 2019 toolset.") + setup_cmd = self._compiler_command_dict['14.2'] + self._is_vs2019_toolset_dict[version] = True + self._compiler_command_option_dict[version] = '-vcvars_ver=14.0' elif '14.1' in self._compiler_command_dict: # We've got 14.0 as an alternative VS 2017 toolset. self._logger.debug("found msvc-14.0 installed as an alternative VS 2017 toolset.") @@ -119,6 +150,19 @@ class MsvcRegistry(object): cl_version = self._query_msvc_compiler_version(cl_cmd) self._compiler_command_dict[version] = setup_cmd self._compiler_version_dict[version] = cl_version + if (version == '14.2') and ('14.1' not in self._msvc_install_dir_dict): + # Search for alternative toolset vc141 installed with vs2019 + self._logger.debug("searching for alternative VS2019 toolset vc141.") + vc_dir in self._msvc_install_dir_dict[version][0] + setup_cmd = self._compiler_command_dict['14.2'] + cl_cmd = self._find_cl_cmd(vc_dir, '14.1') + if cl_cmd: + self._logger.debug("found alternative VC compiler {}".format(cl_cmd)) + cl_version = self._query_msvc_compiler_version(cl_cmd) + self._compiler_command_dict['14.1'] = setup_cmd + self._compiler_version_dict['14.1'] = cl_version + self._compiler_command_option_dict['14.1'] = '-vcvars_ver=14.1x' + self._is_vs2019_toolset_dict['14.1'] = True msvc_version_list = [] for version in self._compiler_version_dict: @@ -127,32 +171,36 @@ class MsvcRegistry(object): self._installed_msvc_versions = ver.version_list_sort(msvc_version_list) self._installed_msvc_versions.reverse() # print("sorted msvc versions: ", self._installed_msvc_versions) - for version in self._installed_msvc_versions: - version_str = ver.version_tuple_to_str(version) - if version_str in ['14.1']: - self._is_vs2017_toolset_dict[version_str] = True - elif version_str not in self._is_vs2017_toolset_dict: - self._is_vs2017_toolset_dict[version_str] = False def _find_cl_cmd(self, vc_inst_dir, version_str): cl_cmd = None - if version_str in ['14.1']: + if version_str in ['14.2', '14.1']: msvc_dir = os.path.join(vc_inst_dir, 'Tools', 'MSVC') if os.path.exists(msvc_dir): version_dir_list = [ver.version_tuple_from_str(x) for x in os.listdir(msvc_dir) if re.match(r'[0-9.]+$', x)] if version_dir_list: version_dir_list = ver.version_list_sort(version_dir_list) version_dir_list.reverse() + # VS2019 installs toolset v141 side-by-side in a folder named '14.16.27023', toolset v142 is + # installed in a folder named '14.20.27508'. for version in version_dir_list: + if (version_str == '14.2') and (version[1] >= 30): + self._logger.debug("ignoring cl installation folder: {}".format(os.path.join(msvc_dir, ver.version_tuple_to_str(version)))) + continue + if (version_str == '14.1') and (version[1] >= 20): + self._logger.debug("ignoring cl installation folder: {}".format(os.path.join(msvc_dir, ver.version_tuple_to_str(version)))) + continue cl_cmd = os.path.join(msvc_dir, ver.version_tuple_to_str(version), 'bin', 'HostX64', 'x64', 'cl.exe') if os.path.exists(cl_cmd): - return cl_cmd + break else: cl_cmd = None else: cl_cmd = os.path.join(vc_inst_dir, 'bin', 'amd64', 'cl.exe') if not os.path.exists(cl_cmd): cl_cmd = None + if cl_cmd: + self._logger.debug("found cl: {}".format(cl_cmd)) return cl_cmd def _query_msvc_compiler_version(self, cl_cmd): @@ -208,39 +256,46 @@ class MsvcRegistry(object): continue return vc_install_dir_dict - def _do_inventory_vc141(self): - vswhere = self._find_vswhere() - vc_dir_fnd = False - if vswhere: - self._logger.debug("found VS2017 locator: %s", vswhere) - try: - vswhere_argv = [vswhere, '-latest'] - # vswhere_argv.extend(['-products', 'Enterprise']) - # vswhere_argv.extend(['-products', 'Professional']) - # vswhere_argv.extend(['-products', 'Community']) - vswhere_argv.extend(['-products', '*']) - vswhere_argv.extend(['-requires', 'Microsoft.VisualStudio.Component.VC.Tools.x86.x64']) - vswhere_argv.extend(['-property', 'installationPath']) - vswhere_argv.extend(['-version', '[15.0,16.0)']) - retv = subprocess.check_output(vswhere_argv, universal_newlines=True).rstrip() - if retv != '': - self._logger.debug("VS2017 install path: %s", retv) - vc_dir = os.path.join(retv, 'VC') - if os.path.exists(vc_dir): - self._logger.debug("VS2017 VC install path: %s", vc_dir) - self._msvc_install_dir_dict['14.1'] = [vc_dir] - vc_dir_fnd = True - else: - self._logger.debug("VS2017 install path: <none>") - except subprocess.CalledProcessError: - self._logger.debug("VS2017 locator call failed for some reason.") + def _do_inventory_vc14x(self, msvc_version_str, vswhere=None): + if msvc_version_str == '14.2': + vswhere_version_expr = '[16.0,17.0)' + vs_alias_str = 'VS2019' + elif msvc_version_str == '14.1': + vswhere_version_expr = '[15.0,16.0)' + vs_alias_str = 'VS2017' + else: + assert False + if vswhere is None: + vswhere = self._find_vswhere() + if vswhere is None: + self._logger.debug("{0} locator vswhere.exe not found, {0} detection disabled.".format(vs_alias_str)) + return else: - self._logger.debug("VS2017 locator vswhere.exe not found, VS2017 detection disabled.") + self._logger.debug("found {} locator: {}".format(vs_alias_str, vswhere)) + vc_dir_fnd = False + try: + vswhere_argv = [vswhere, '-latest'] + # vswhere_argv.extend(['-products', 'Enterprise']) + # vswhere_argv.extend(['-products', 'Professional']) + # vswhere_argv.extend(['-products', 'Community']) + vswhere_argv.extend(['-products', '*']) + vswhere_argv.extend(['-requires', 'Microsoft.VisualStudio.Component.VC.Tools.x86.x64']) + vswhere_argv.extend(['-property', 'installationPath']) + vswhere_argv.extend(['-version', vswhere_version_expr]) + retv = subprocess.check_output(vswhere_argv, universal_newlines=True).rstrip() + if retv != '': + self._logger.debug("{} install path: {}".format(vs_alias_str, retv)) + vc_dir = os.path.join(retv, 'VC') + if os.path.exists(vc_dir): + self._logger.debug("{} VC install path: {}".format(vs_alias_str, vc_dir)) + self._msvc_install_dir_dict[msvc_version_str] = [vc_dir] + vc_dir_fnd = True + else: + self._logger.debug("{} install path: <none>".format(vs_alias_str)) + except subprocess.CalledProcessError: + self._logger.debug("{} vswhere locator call failed for some reason.".format(vs_alias_str)) if not vc_dir_fnd: - self._logger.debug("VS2017 VC not found, VS2017 detection disabled.") - # Disable VS2017 detection by path? - if '14.1' in self._msvc_install_dir_dict: - self._msvc_install_dir_dict.pop('14.1') + self._logger.debug("{0} VC not found, {0} detection disabled.".format(vs_alias_str)) def _find_vswhere(self): vswhere_prog = None @@ -264,68 +319,6 @@ class MsvcRegistry(object): return getattr(MsvcRegistry.instance, item) -class BjamToolset(object): - def __init__(self, sys_info, bb_version=None): - self._logger = logging.getLogger(__name__) - self._bjam_toolset_build_script = None - - if sys_info.is_linux(): - self._bjam_toolset = 'gcc' - elif sys_info.is_macosx(): - self._bjam_toolset = 'darwin' - elif sys_info.is_windows(): - # On windows the Boost.Build version is required as vsXXXX is only supported by Boost.Build x.y.z or higher. - assert bb_version is not None - self._msvc_registry = MsvcRegistry() - - # We have to constrain the search for the latest msvc because the bjam build scripts must have support for it. - # e.g. vc14 works only for 1.59.0 or higher. - if ver.version_compare(bb_version, (1, 50, 0)) < 0: - max_msvc_version = (10, 0) - elif ver.version_compare(bb_version, (1, 55, 0)) < 0: - max_msvc_version = (11, 0) - elif ver.version_compare(bb_version, (1, 59, 0)) < 0: - max_msvc_version = (12, 0) - elif ver.version_compare(bb_version, (1, 64, 0)) < 0: - max_msvc_version = (14, 0) - elif ver.version_compare(bb_version, (1, 64, 0)) == 0: - # This is Boost.Build 1.64.0 or higher. - # Since msvc-14.1 is supported by 1.64.0 but building bjam requires a suitable VC command prompt or patched - # bjam build scripts, a previous msvc-x.y is preferred. - max_msvc_version = None - for v in ['14.0', '12.0', '11.0', '10.0']: - if self._msvc_registry.is_version_installed(ver.version_tuple_from_str(v)): - max_msvc_version = ver.version_tuple_from_str(v) - break - else: - # Allow vc141 for 1.65.0 and higher by default, the toolset detection vswhere is now available. - max_msvc_version = None - - # mingw does not work the same way as msvc when launching helper scripts or programs and - # is not used to build bjam/b2. - msvc_version = self._msvc_registry.get_latest_version(max_msvc_version) - msvc_version_str = ver.version_tuple_to_str(msvc_version) - self._bjam_toolset = 'msvc-' + msvc_version_str - - # map the msvc toolset spec into a string supported by the bjam build script build.bat - if msvc_version[1] > 0: - # vc141 - self._bjam_toolset_build_script = 'vc' + str(msvc_version[0])+ str(msvc_version[1]) - else: - # vc14, vc12, vc11, vc10 - self._bjam_toolset_build_script = 'vc' + str(msvc_version[0]) - else: - raise Exception('Unknown platform detected, please contact technical support.') - if self._bjam_toolset_build_script is None: - self._bjam_toolset_build_script = self._bjam_toolset - - def get_bjam_toolset(self, build_script_format=False): - if build_script_format: - return self._bjam_toolset_build_script - else: - return self._bjam_toolset - - class Toolset(object): class PlatformInfo(object): @@ -463,6 +456,8 @@ class Toolset(object): if self._toolset.startswith('msvc'): if self._msvc_registry.is_vs2017_toolset(self._version): s += "VS 2017 toolset!\n" + if self._msvc_registry.is_vs2019_toolset(self._version): + s += "VS 2019 toolset!\n" s += "platform(s):\n" for platform_info in self._platform_info: diff --git a/cmake/CMakeBuild/bin/pyhhi/build/common/cmbldver.py b/cmake/CMakeBuild/bin/pyhhi/build/common/cmbldver.py index d065c13a0878ef863fceb52428fd41791ee74447..ea821ef1046d573193c3c6ef4e0843924c998651 100755 --- a/cmake/CMakeBuild/bin/pyhhi/build/common/cmbldver.py +++ b/cmake/CMakeBuild/bin/pyhhi/build/common/cmbldver.py @@ -4,4 +4,4 @@ # Any manual changes here will be overridden by the next build. #------------- -CMAKE_BUILD_VERSION_STR = "3.13.0.2" +CMAKE_BUILD_VERSION_STR = "3.14.4.4" diff --git a/cmake/CMakeBuild/bin/pyhhi/build/common/system.py b/cmake/CMakeBuild/bin/pyhhi/build/common/system.py index 69b02ea583eccf03b5874e87fad9041468c27a9e..b51f0fc6c2d2826e1f2d988592c130d36013d6db 100755 --- a/cmake/CMakeBuild/bin/pyhhi/build/common/system.py +++ b/cmake/CMakeBuild/bin/pyhhi/build/common/system.py @@ -98,18 +98,18 @@ class SystemInfo(object): if self._os_arch == 'x86_64': if self._python_arch == 'x86': - self._program_dir = os.getenv('PROGRAMW6432') - self._programx86_dir = os.getenv('PROGRAMFILES') + self._program_dir = os.path.normpath(os.getenv('PROGRAMW6432')) + self._programx86_dir = os.path.normpath(os.getenv('PROGRAMFILES')) else: - self._program_dir = os.getenv('PROGRAMFILES') - self._programx86_dir = os.getenv('PROGRAMFILES(X86)') + self._program_dir = os.path.normpath(os.getenv('PROGRAMFILES')) + self._programx86_dir = os.path.normpath(os.getenv('PROGRAMFILES(X86)')) assert self._programx86_dir is not None elif self._os_arch == 'x86': - self._program_dir = os.getenv('PROGRAMFILES') + self._program_dir = os.path.normpath(os.getenv('PROGRAMFILES')) else: assert False assert self._program_dir is not None - self._program_data_dir = os.getenv('PROGRAMDATA') + self._program_data_dir = os.path.normpath(os.getenv('PROGRAMDATA')) if self._windows_msys: pass @@ -315,8 +315,12 @@ class SystemInfo(object): def get_path(self): return self._search_path - def get_home_dir(self): - return self._home_dir + def get_home_dir(self, native=False): + if self.is_windows_msys() and native: + home_dir = os.path.normpath(os.path.expandvars('$USERPROFILE')) + else: + home_dir = self._home_dir + return home_dir def get_default_proj_home_dir(self): return self._default_proj_home_dir @@ -339,7 +343,7 @@ class SystemInfo(object): def get_short_path(self, fpath): if self.is_windows(): - fpath = self.get_short_path_win(fpath) + fpath = os.path.normpath(self.get_short_path_win(fpath)) return fpath def get_short_path_win(self, fpath): @@ -440,13 +444,13 @@ class SystemInfo(object): # make sure the user's home directory exists if not os.path.exists(home_dir): raise Exception('home directory "' + home_dir + '" does not exist.') - self._home_dir = home_dir + self._home_dir = os.path.normpath(home_dir) def _query_default_proj_home_dir(self): if 'PROJ_HOME' in os.environ: - proj_home_dir = os.path.expandvars('$PROJ_HOME') + proj_home_dir = os.path.normpath(os.path.expandvars('$PROJ_HOME')) else: - proj_home_dir = os.path.join(self.get_home_dir(), 'projects') + proj_home_dir = os.path.join(self.get_home_dir(native=True), 'projects') if os.path.exists(proj_home_dir): self._default_proj_home_dir = proj_home_dir else: @@ -459,7 +463,8 @@ class SystemInfo(object): self._search_path.append(util.normalize_path(dir)) def _query_desktop_dir(self): - home_dir = self.get_home_dir() + # MSYS has its own environment but Desktop comes from the native windows home. + home_dir = self.get_home_dir(native=True) desktop_dir = os.path.join(home_dir, 'Desktop') if os.path.exists(desktop_dir): self._desktop_dir = desktop_dir diff --git a/cmake/CMakeBuild/cmake/modules/BBuildEnv.cmake b/cmake/CMakeBuild/cmake/modules/BBuildEnv.cmake index 202c7e309cea573fcd40af14889ffbbff742eff5..07532ba3ef188edb7a1f52d08434ff82341b0eab 100644 --- a/cmake/CMakeBuild/cmake/modules/BBuildEnv.cmake +++ b/cmake/CMakeBuild/cmake/modules/BBuildEnv.cmake @@ -16,34 +16,30 @@ Unless explicitly disabled by configuration option ``BBuildEnv_EXCLUDE_MODULES`` module ``BBuildEnv`` loads the following submodules to provide additional support for Boost, Qt5, OpenCV, file downloads, MinGW and CPack: -- :module:`BBuildEnvAddProject` provides macros and functions to add standard - subproject like console applications, libraries, samples, UTF tests and Qt - applications to a standard workspace. -- :module:`BBuildEnvBoost` adds a few utility functions and macros helping to - use locally built Boost libraries. -- :module:`BBuildEnvOpenCV` -- :module:`BBuildEnvQt5` -- :module:`BBuildEnvDownload` -- :module:`BBuildEnvVersionUtil` -- :module:`BBuildEnvCPack` adds a few utility functions helping to create binary - distribution packages. -- :module:`BBuildEnvMingw` adds helper functions to copy MinGW runtime DLLs. - - -Reserved Identifiers -^^^^^^^^^^^^^^^^^^^^ - -Avoiding name clashes in CMakeLists.txt or project specific CMake files all -projects including module ``BBuildEnv``, or any of its submodules, are advised -not to use CMake variables, functions or macros starting with:: - - BBuildEnv, _BBuildEnv, _bb_, bb_, BB_, _BB_ - -Users may use variables starting with ``BBuildEnv_<var>`` only to configure the -behavior of ``BuildEnv`` modules or submodules or evaluate properties of loaded -``BuildEnv`` modules or submodules exposed through documented variables -``BBuildEnv_<var>``. - +============================== =========================================================== +Module Description +============================== =========================================================== +:module:`BBuildEnvAddProject` Macros and functions to add standard + subproject like console applications, libraries, samples, + UTF tests and Qt applications to a standard workspace +:module:`BBuildEnvGitSvn` Utility functions to support Git to SVN interoperability. +:module:`BBuildEnvDownload` Supports HTTPS downloads of single files at build time +:module:`BBuildEnvVersionUtil` Functions to parse version header files +:module:`BBuildEnvCPack` Functions helping to create binary distribution packages +:module:`BBuildEnvBoost` Macros and functions helping to use locally built Boost + libraries. +:module:`BBuildEnvOpenCV` Helper functions to copy OpenCV runtime DLLs +:module:`BBuildEnvQt5` Helper functions to copy Qt5 runtime DLLs +:module:`BBuildEnvMingw` Helper functions to copy MinGW runtime DLLs on Ubuntu +============================== =========================================================== + +The following modules are not loaded by default as they provide functionality not needed +by all main projects. + +- :module:`BBuildEnvGit` provides macros and functions to checkout Git repositories at + configuration time to aggregate them into a single build tree. Similar functionality + is provided by module :module:`FetchContent` with slightly different Git clone and + update behavior. Configuration Options ^^^^^^^^^^^^^^^^^^^^^ @@ -51,12 +47,19 @@ Configuration Options This module evaluates the following variables at load time allowing users to customize its behavior: -``BBuildEnv_DEBUG`` - Enable debugging messages. ``BBuildEnv_EXCLUDE_MODULES`` List of submodules to be excluded from loading. Use ``ALL`` to disable loading any submodule. +``BBuildEnv_DEBUG`` + Enable debugging messages. + +``BBuildEnv_USE_LIBRARY_NAME_POSTFIX`` + A boolean variable to enable a configuration specific library name postfix which + allows to install all library or executable variants in the same directory. It's unset/off by + default to provide backward compatibility with earlier releases. If enabled executable + targets will need a configuration postfix as well. + How to Use ^^^^^^^^^^ @@ -99,12 +102,48 @@ as an svn:external or as a versioned Git subtree: Provided Variables ^^^^^^^^^^^^^^^^^^ +Module ``BBuildEnv`` provides the following output variables +which are supposed to be treated readonly: + ``BBuildEnv_VERSION`` Module's version in decimal dotted format with a maximum of four components. - + ``BBuildEnv_MSYS`` Set to true when using MSYS. +``BBuildEnv_GENERATOR_ALIAS`` + CMake generator specific build directory. It's a plain name without any path separators. + + ``umake`` + Unix Makefiles + + ``vs16`` + Microsoft Visual Studio 2019 + + ``vs15`` + Microsoft Visual Studio 2017 + + ``vs14`` + Microsoft Visual Studio 2015 + + ``xcode`` + Xcode generator, switching between different Xcode versions is currently not supported within a single build tree. + + ``ninja`` + Ninja generator + +``BBuildEnv_<CONFIG>_POSTFIX`` + Configuration specific postfix strings to support side-by-side installation in the same + directory. + +``BBuildEnv_SHARED_DIR_POSTFIX`` + A string specific to the shared library configuration to allow for single + output directories or installation directories. + +``BBuildEnv_OUTPUT_DIR_SUFFIX`` + A generator specific relative path to be used in installation rules to support multiple + generators or compiler versions in combination with the same installation prefix. + ``BBuildEnv_ROOT_DIR`` Optional root directory of CMakeBuild customization files. @@ -151,6 +190,40 @@ Provided Functions and Macros Compiler specific flag to enable or disable a warning. +.. command:: bb_add_subdirectory + + The ``bb_add_subdirectory()`` macro adds an external in-tree Git subproject + provided variable ``USE_GIT_SUBPROJECTS`` is ON. + The macro silently assumes the subproject is checked out to + ``${CMAKE_SOURCE_DIR}/ext/<subproject>``. If variable ``USE_GIT_SUBPROJECTS`` + is OFF, the macro will invoke :command:`add_subdirectory` for backward compatibility + with SVN repositories and subproject aggregation via SVN externals:: + + bb_add_subdirectory(<subproject>) + + **Parameters:** + + ``subproject`` + A relative path to an in-tree subproject; e.g. ``BoostAddon/src/lib/LoggerLib`` + + +.. command:: bb_set_target_output_name + + The ``bb_set_target_output_name`` macro appends a configuration specific postfix to + the output name of executable targets if variable ``BBuildEnv_USE_LIBRARY_NAME_POSTFIX`` + is ON. If applied to library targets, it will change :prop_tgt:`COMPILE_PDB_NAME_<CONFIG>` + for static libraries to align the PDB filename with the library filename. + CMake's postfix machinery does it for linker generated PDB files but not for compiler + generated PDB files:: + + bb_set_target_output_name( <target> ) + + **Parameters:** + + ``target`` + An existing target to be modified. + + .. command:: bb_set_external_dir The ``bb_set_external_dir()`` function searches for a directory given a @@ -158,15 +231,15 @@ Provided Functions and Macros a shared folder holding an external project without searching any system paths or cross compiler specific paths:: - bb_set_external_dir(<abs_path> <relative_path> [<OPTIONAL>]) + bb_set_external_dir(<abs_path> <dir> [<OPTIONAL>]) **Parameters:** ``abs_path`` Absolute path to ``relative_path`` found in one of the default locations. - ``relative_path`` - Path to search for in one of the default locations. An absolute path + ``dir`` + Directory to search for in one of the default locations. An absolute path will be returned as-is. ``OPTIONAL`` @@ -184,25 +257,26 @@ Provided Functions and Macros ``${CMAKE_SOURCE_DIR}/../..`` ``$ENV{HOME}/projects`` - Ignored on windows host systems. + Ignored on native windows host systems. It is searched when MSYS has been detected + or any other non-windows platform. ``$ENV{USERPROFILE}/projects`` Ignored on non-windows host systems. -.. command:: bb_add_subdirectory +Reserved Identifiers +^^^^^^^^^^^^^^^^^^^^ - The ``bb_add_subdirectory()`` macro adds an external in-tree Git subproject. - The macro silently assumes the subproject is checked out to - ``${CMAKE_SOURCE_DIR}/ext/<subproject>``:: - - bb_add_subdirectory(<subproject>) - - **Parameters:** - - ``subproject`` - A relative path to an in-tree subproject; e.g. ``BoostAddon/src/lib/LoggerLib`` +Avoiding name clashes in CMakeLists.txt or project specific CMake files all +projects including module ``BBuildEnv``, or any of its submodules, are advised +not to use CMake variables, functions or macros starting with:: + + BBuildEnv, _BBuildEnv, _bb_, bb_, BB_, _BB_ +Users may use variables starting with ``BBuildEnv_<var>`` only to configure the +behavior of ``BuildEnv`` modules or submodules or evaluate properties of loaded +``BuildEnv`` modules or submodules exposed through documented variables +``BBuildEnv_<var>``. #]===] @@ -224,6 +298,7 @@ set( _BBuildEnvSubmoduleList BBuildEnvBoost BBuildEnvQt5 BBuildEnvOpenCV + BBuildEnvGitSvn ) foreach( _cmod IN LISTS _BBuildEnvSubmoduleList ) @@ -251,6 +326,21 @@ macro( bb_add_subdirectory subdirectory_ ) endmacro() +macro( bb_set_target_output_name target_ ) + if( BBuildEnv_USE_LIBRARY_NAME_POSTFIX ) + get_target_property( _bb_tmp_target_type ${target_} TYPE ) + if( _bb_tmp_target_type STREQUAL "EXECUTABLE" ) + set_target_properties( ${target_} PROPERTIES OUTPUT_NAME_DEBUG ${target_}${CMAKE_DEBUG_POSTFIX} + OUTPUT_NAME_RELWITHDEBINFO ${target_}${CMAKE_RELWITHDEBINFO_POSTFIX} + OUTPUT_NAME_MINSIZEREL ${target_}${CMAKE_MINSIZEREL_POSTFIX} ) + elseif( MSVC AND (_bb_tmp_target_type STREQUAL "STATIC_LIBRARY" ) ) + # message( STATUS "${target_} is static, setting COMPILE_PDB_NAME_DEBUG ..." ) + set_target_properties( ${target_} PROPERTIES COMPILE_PDB_NAME_DEBUG ${target_}${CMAKE_DEBUG_POSTFIX} COMPILE_PDB_NAME_RELWITHDEBINFO ${target_}${CMAKE_RELWITHDEBINFO_POSTFIX} ) + endif() + endif() +endmacro() + + macro( bb_save_find_context fnd_ctx ) if( CMAKE_CROSSCOMPILING ) # find_package must be told not to expect the BOOST libraries inside "CMAKE_FIND_ROOT_PATH". @@ -291,14 +381,38 @@ macro( _bb_get_cxx_compiler_version_major_minor version_major_minor_ ) string( REGEX REPLACE "([0-9]+)\\.([0-9]+)([0-9.]+)?" "\\1.\\2" ${version_major_minor_} ${CMAKE_CXX_COMPILER_VERSION} ) endmacro() -macro( bb_get_home_dir home_dir_ ) + +function( bb_get_home_dir home_dir_ ) + set( _native FALSE ) + + if( ARGC EQUAL 2 ) + if( ${ARGV1} STREQUAL "NATIVE" ) + set( _native TRUE ) + else() + message( FATAL_ERROR "bb_get_home_dir: argument ${ARGV1} not understood." ) + endif() + elseif( ARGC GREATER 2 ) + message( FATAL_ERROR "bb_get_home_dir: too many arguments specified, expected <home_dir> [NATIVE]." ) + endif() + if( CMAKE_HOST_WIN32 ) # Force forward slashes on Windows - file( TO_CMAKE_PATH "$ENV{USERPROFILE}" ${home_dir_} ) + if( BBuildEnv_MSYS ) + if( _native ) + file( TO_CMAKE_PATH "$ENV{USERPROFILE}" _home_dir ) + else() + file( TO_CMAKE_PATH "$ENV{HOME}" _home_dir ) + endif() + else() + file( TO_CMAKE_PATH "$ENV{USERPROFILE}" _home_dir ) + endif() else() - set( ${home_dir_} "$ENV{HOME}" ) + set( _home_dir "$ENV{HOME}" ) endif() -endmacro() + + set( ${home_dir_} "${_home_dir}" PARENT_SCOPE ) + +endfunction() macro( bb_set_home_dir home_dir_ ) @@ -601,10 +715,14 @@ function( _bb_find_proj_home proj_home_ home_dir_ ) points to a non-existing directory. " ) endif() - elseif( IS_DIRECTORY "${home_dir_}/projects" ) + elseif( EXISTS "${home_dir_}/projects" ) set( _proj_home "${home_dir_}/projects" ) - #else() - # get_filename_component( _proj_home ${CMAKE_SOURCE_DIR}/.. REALPATH ) + elseif( BBuildEnv_MSYS ) + # Check for %USERPROFILE%/projects as a fallback when using MSYS. + bb_get_home_dir( _home_dir NATIVE ) + if( EXISTS "${_home_dir}/projects" ) + set( _proj_home "${_home_dir}/projects" ) + endif() endif() if( DEFINED _proj_home ) set( ${proj_home_} "${_proj_home}" PARENT_SCOPE ) @@ -647,8 +765,15 @@ function( bb_set_external_dir dir_var_ dir_ ) list( APPEND _search_path "${_dir_norm}" ) endif() endforeach() - if( EXISTS "${bb_home_dir}/projects" ) - list( APPEND _search_path "${bb_home_dir}/projects" ) + bb_get_home_dir( _home_dir ) + if( EXISTS "${_home_dir}/projects" ) + list( APPEND _search_path "${_home_dir}/projects" ) + endif() + if( BBuildEnv_MSYS ) + bb_get_home_dir( _home_dir NATIVE ) + if( EXISTS "${_home_dir}/projects" ) + list( APPEND _search_path "${_home_dir}/projects" ) + endif() endif() list( REMOVE_DUPLICATES _search_path ) foreach( _path IN LISTS _search_path ) @@ -1132,18 +1257,6 @@ macro( bb_build_env_setup ) _bb_find_proj_home( bb_proj_home "${bb_home_dir}" ) # Add a cmake generator alias - # -- - # Visual Studio 15 2017 - # Visual Studio 14 2015 - # Visual Studio 12 2013 - # Visual Studio 11 2012 - # Visual Studio 10 2010 - # -- - # Xcode - # Unix Makefiles - # Ninja - # MinGW Makefiles - # -- unset( bb_generator_alias ) if( CMAKE_GENERATOR STREQUAL "Unix Makefiles" ) set( bb_generator_alias "umake" ) @@ -1169,11 +1282,15 @@ macro( bb_build_env_setup ) # set standard output directories: gcc-5.4/x86_64 if( DEFINED bb_generator_alias ) + set( BBuildEnv_GENERATOR_ALIAS "${bb_generator_alias}" ) set( bb_default_output_dir "${bb_generator_alias}/${bb_toolset_subdir}/${bb_platform_dir}" ) else() set( bb_default_output_dir "${bb_toolset_subdir}/${bb_platform_dir}" ) endif() + # BBuildEnv_OUTPUT_DIR_SUFFIX could be a cache variable to make it customizable. + set( BBuildEnv_OUTPUT_DIR_SUFFIX "${bb_default_output_dir}" ) + # the deploy folder may be used to save installer packages. set( bb_deploy_dir "${CMAKE_SOURCE_DIR}/deploy" ) @@ -1182,23 +1299,63 @@ macro( bb_build_env_setup ) endif() if( BUILD_SHARED_LIBS ) - set( _bb_shared_suffix "-shared" ) + set( BBuildEnv_SHARED_DIR_POSTFIX "-shared" ) + else() + unset( BBuildEnv_SHARED_DIR_POSTFIX ) + endif() + + if( NOT DEFINED BBuildEnv_USE_LIBRARY_NAME_POSTFIX ) + set( BBuildEnv_USE_LIBRARY_NAME_POSTFIX OFF CACHE BOOL "Enable library name postfix" ) endif() + + #set( BBuildEnv_RELEASE_POSTFIX "" ) + set( BBuildEnv_DEBUG_POSTFIX "-d" ) + set( BBuildEnv_RELWITHDEBINFO_POSTFIX "-rd" ) + set( BBuildEnv_MINSIZEREL_POSTFIX "-mr" ) - set( CMAKE_RUNTIME_OUTPUT_DIRECTORY_DEBUG "${CMAKE_SOURCE_DIR}/bin/${bb_default_output_dir}/debug${_bb_shared_suffix}" ) - set( CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE "${CMAKE_SOURCE_DIR}/bin/${bb_default_output_dir}/release${_bb_shared_suffix}" ) - set( CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELWITHDEBINFO "${CMAKE_SOURCE_DIR}/bin/${bb_default_output_dir}/relwithdebinfo${_bb_shared_suffix}" ) - set( CMAKE_RUNTIME_OUTPUT_DIRECTORY_MINSIZEREL "${CMAKE_SOURCE_DIR}/bin/${bb_default_output_dir}/minsizerel${_bb_shared_suffix}" ) - set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY_DEBUG "${CMAKE_SOURCE_DIR}/lib/${bb_default_output_dir}/debug${_bb_shared_suffix}" ) - set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY_RELEASE "${CMAKE_SOURCE_DIR}/lib/${bb_default_output_dir}/release${_bb_shared_suffix}" ) - set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY_RELWITHDEBINFO "${CMAKE_SOURCE_DIR}/lib/${bb_default_output_dir}/relwithdebinfo${_bb_shared_suffix}" ) - set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY_MINSIZEREL "${CMAKE_SOURCE_DIR}/lib/${bb_default_output_dir}/minsizerel${_bb_shared_suffix}" ) + if( BBuildEnv_USE_LIBRARY_NAME_POSTFIX ) + set( CMAKE_DEBUG_POSTFIX ${BBuildEnv_DEBUG_POSTFIX} ) + set( CMAKE_RELWITHDEBINFO_POSTFIX ${BBuildEnv_RELWITHDEBINFO_POSTFIX} ) + set( CMAKE_MINSIZEREL_POSTFIX ${BBuildEnv_MINSIZEREL_POSTFIX} ) + + set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_SOURCE_DIR}/lib${BBuildEnv_SHARED_DIR_POSTFIX}/${BBuildEnv_OUTPUT_DIR_SUFFIX}" ) + set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY_DEBUG "${CMAKE_ARCHIVE_OUTPUT_DIRECTORY}" ) + set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY_RELEASE "${CMAKE_ARCHIVE_OUTPUT_DIRECTORY}" ) + set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY_RELWITHDEBINFO "${CMAKE_ARCHIVE_OUTPUT_DIRECTORY}" ) + set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY_MINSIZEREL "${CMAKE_ARCHIVE_OUTPUT_DIRECTORY}" ) + + set( CMAKE_LIBRARY_OUTPUT_DIRECTORY_DEBUG "${CMAKE_ARCHIVE_OUTPUT_DIRECTORY_DEBUG}" ) + set( CMAKE_LIBRARY_OUTPUT_DIRECTORY_RELEASE "${CMAKE_ARCHIVE_OUTPUT_DIRECTORY_RELEASE}" ) + set( CMAKE_LIBRARY_OUTPUT_DIRECTORY_RELWITHDEBINFO "${CMAKE_ARCHIVE_OUTPUT_DIRECTORY_RELWITHDEBINFO}" ) + set( CMAKE_LIBRARY_OUTPUT_DIRECTORY_MINSIZEREL "${CMAKE_ARCHIVE_OUTPUT_DIRECTORY_MINSIZEREL}" ) + + set( CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_SOURCE_DIR}/bin${BBuildEnv_SHARED_DIR_POSTFIX}/${BBuildEnv_OUTPUT_DIR_SUFFIX}" ) + set( CMAKE_RUNTIME_OUTPUT_DIRECTORY_DEBUG "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}" ) + set( CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}" ) + set( CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELWITHDEBINFO "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}" ) + set( CMAKE_RUNTIME_OUTPUT_DIRECTORY_MINSIZEREL "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}" ) + + else() + # Using CMake's default library name convention which is the same for all configurations. + set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY_DEBUG "${CMAKE_SOURCE_DIR}/lib/${BBuildEnv_OUTPUT_DIR_SUFFIX}/debug${BBuildEnv_SHARED_DIR_POSTFIX}" ) + set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY_RELEASE "${CMAKE_SOURCE_DIR}/lib/${BBuildEnv_OUTPUT_DIR_SUFFIX}/release${BBuildEnv_SHARED_DIR_POSTFIX}" ) + set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY_RELWITHDEBINFO "${CMAKE_SOURCE_DIR}/lib/${BBuildEnv_OUTPUT_DIR_SUFFIX}/relwithdebinfo${BBuildEnv_SHARED_DIR_POSTFIX}" ) + set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY_MINSIZEREL "${CMAKE_SOURCE_DIR}/lib/${BBuildEnv_OUTPUT_DIR_SUFFIX}/minsizerel${BBuildEnv_SHARED_DIR_POSTFIX}" ) + + + set( CMAKE_LIBRARY_OUTPUT_DIRECTORY_DEBUG "${CMAKE_ARCHIVE_OUTPUT_DIRECTORY_DEBUG}" ) + set( CMAKE_LIBRARY_OUTPUT_DIRECTORY_RELEASE "${CMAKE_ARCHIVE_OUTPUT_DIRECTORY_RELEASE}" ) + set( CMAKE_LIBRARY_OUTPUT_DIRECTORY_RELWITHDEBINFO "${CMAKE_ARCHIVE_OUTPUT_DIRECTORY_RELWITHDEBINFO}" ) + set( CMAKE_LIBRARY_OUTPUT_DIRECTORY_MINSIZEREL "${CMAKE_ARCHIVE_OUTPUT_DIRECTORY_MINSIZEREL}" ) + + set( CMAKE_RUNTIME_OUTPUT_DIRECTORY_DEBUG "${CMAKE_SOURCE_DIR}/bin/${BBuildEnv_OUTPUT_DIR_SUFFIX}/debug${BBuildEnv_SHARED_DIR_POSTFIX}" ) + set( CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE "${CMAKE_SOURCE_DIR}/bin/${BBuildEnv_OUTPUT_DIR_SUFFIX}/release${BBuildEnv_SHARED_DIR_POSTFIX}" ) + set( CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELWITHDEBINFO "${CMAKE_SOURCE_DIR}/bin/${BBuildEnv_OUTPUT_DIR_SUFFIX}/relwithdebinfo${BBuildEnv_SHARED_DIR_POSTFIX}" ) + set( CMAKE_RUNTIME_OUTPUT_DIRECTORY_MINSIZEREL "${CMAKE_SOURCE_DIR}/bin/${BBuildEnv_OUTPUT_DIR_SUFFIX}/minsizerel${BBuildEnv_SHARED_DIR_POSTFIX}" ) + endif() + - set( CMAKE_LIBRARY_OUTPUT_DIRECTORY_DEBUG "${CMAKE_SOURCE_DIR}/lib/${bb_default_output_dir}/debug${_bb_shared_suffix}" ) - set( CMAKE_LIBRARY_OUTPUT_DIRECTORY_RELEASE "${CMAKE_SOURCE_DIR}/lib/${bb_default_output_dir}/release${_bb_shared_suffix}" ) - set( CMAKE_LIBRARY_OUTPUT_DIRECTORY_RELWITHDEBINFO "${CMAKE_SOURCE_DIR}/lib/${bb_default_output_dir}/relwithdebinfo${_bb_shared_suffix}" ) - set( CMAKE_LIBRARY_OUTPUT_DIRECTORY_MINSIZEREL "${CMAKE_SOURCE_DIR}/lib/${bb_default_output_dir}/minsizerel${_bb_shared_suffix}" ) endmacro( bb_build_env_setup ) #message( STATUS "BBuildEnv.cmake: starting: ${CMAKE_GENERATOR}" ) diff --git a/cmake/CMakeBuild/cmake/modules/BBuildEnvDebug.cmake b/cmake/CMakeBuild/cmake/modules/BBuildEnvDebug.cmake index 40de4dad5071bdb71e889d7408a789e43bd07585..3f195eb40ab0e4899bb1b9dbef49f4d4e5b6eaef 100644 --- a/cmake/CMakeBuild/cmake/modules/BBuildEnvDebug.cmake +++ b/cmake/CMakeBuild/cmake/modules/BBuildEnvDebug.cmake @@ -60,6 +60,8 @@ function( bb_dump_cmake_system_info ) message( STATUS "CMAKE_PROGRAM_PATH: ${CMAKE_PROGRAM_PATH}" ) message( STATUS "CMAKE_SYSTEM_PROGRAM_PATH: ${CMAKE_SYSTEM_PROGRAM_PATH}" ) message( STATUS "CMAKE_GENERATOR: ${CMAKE_GENERATOR}" ) + message( STATUS "CMAKE_GENERATOR_PLATFORM: ${CMAKE_GENERATOR_PLATFORM}" ) + if( CMAKE_CXX_COMPILER_LOADED ) message( STATUS "CMAKE_CXX_COMPILER_LOADED: on" ) endif() @@ -73,6 +75,7 @@ function( bb_dump_cmake_system_info ) message( STATUS "CMAKE_CXX_COMPILER_ARCHITECTURE_ID: ${CMAKE_CXX_COMPILER_ARCHITECTURE_ID}" ) endif() if( MSVC ) + message( STATUS "CMAKE_VS_PLATFORM_NAME: ${CMAKE_VS_PLATFORM_NAME}" ) message( STATUS "CMAKE_VS_PLATFORM_NAME: ${CMAKE_VS_PLATFORM_NAME}" ) message( STATUS "CMAKE_VS_PLATFORM_TOOLSET: ${CMAKE_VS_PLATFORM_TOOLSET}" ) message( STATUS "MSVC_VERSION: ${MSVC_VERSION}" ) @@ -119,6 +122,10 @@ endfunction( bb_dump_cmake_system_info ) function( bb_dump_target_properties target_ prop1_ ) + if( NOT TARGET ${target_} ) + message( WARNING "target ${target_} does not exist." ) + return() + endif() set( _prop_list ${prop1_} ) list( APPEND _prop_list ${ARGN} ) list( LENGTH _prop_list _prop_list_len ) @@ -126,9 +133,12 @@ function( bb_dump_target_properties target_ prop1_ ) foreach( _prop ${_prop_list} ) get_target_property( _prop_value ${target_} ${_prop} ) if( _prop_value ) - message( STATUS "bb_dump_target_properties: ${target_}: ${_prop}=${_prop_value}" ) + message( STATUS "${target_}: ${_prop}=${_prop_value}" ) endif() endforeach() message( STATUS "bb_dump_target_properties: leaving" ) endfunction() +macro( bb_dump_imported_target_properties target_ ) + bb_dump_target_properties( ${target_} IMPORTED_LOCATION IMPORTED_IMPLIB INTERFACE_INCLUDE_DIRECTORIES INTERFACE_COMPILE_DEFINITIONS INTERFACE_LINK_LIBRARIES ) +endmacro() diff --git a/cmake/CMakeBuild/cmake/modules/BBuildEnvVersion.cmake b/cmake/CMakeBuild/cmake/modules/BBuildEnvVersion.cmake index b4543a8d442aaf20971ec34d2c6bac56f587d6ee..2f5b5ea5b5c9240d99b8b3f764375336438a9db3 100644 --- a/cmake/CMakeBuild/cmake/modules/BBuildEnvVersion.cmake +++ b/cmake/CMakeBuild/cmake/modules/BBuildEnvVersion.cmake @@ -5,9 +5,9 @@ # set( BBuildEnv_VERSION_MAJOR 3 ) -set( BBuildEnv_VERSION_MINOR 13 ) -set( BBuildEnv_VERSION_PATCH 0 ) -set( BBuildEnv_VERSION_TWEAK 2 ) +set( BBuildEnv_VERSION_MINOR 14 ) +set( BBuildEnv_VERSION_PATCH 4 ) +set( BBuildEnv_VERSION_TWEAK 4 ) # BBuildEnv version in decimal dotted format as supported by CMake's version compare operations. set( BBuildEnv_VERSION "${BBuildEnv_VERSION_MAJOR}.${BBuildEnv_VERSION_MINOR}.${BBuildEnv_VERSION_PATCH}.${BBuildEnv_VERSION_TWEAK}" ) diff --git a/cmake/CMakeBuild/cmake/toolchains/aarch64-linux-gnu-gcc-ubuntu1804.cmake b/cmake/CMakeBuild/cmake/toolchains/aarch64-linux-gnu-gcc-ubuntu1804.cmake index 9c0ec5c3f62bc2d64d4a18b4adfc2abe63a818d3..3e78dc0815936ddf8c8ada05564b1f51c7110d37 100644 --- a/cmake/CMakeBuild/cmake/toolchains/aarch64-linux-gnu-gcc-ubuntu1804.cmake +++ b/cmake/CMakeBuild/cmake/toolchains/aarch64-linux-gnu-gcc-ubuntu1804.cmake @@ -19,7 +19,12 @@ list( APPEND CMAKE_FIND_ROOT_PATH ${ARM_LINUX_SYSROOT} ) # search headers and libraries in the target environment, search # programs in the host environment set( CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER ) -set( CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY ) + +# Ubuntu/amd64 + foreign architecture arm64 +set( CMAKE_LIBRARY_PATH /usr/lib/${GNU_MACHINE}-linux-gnu ) +set( CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH ) +#set( CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY ) + set( CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY ) set( USE_OPENCV_TOOLCHAIN_FLAGS ON ) diff --git a/doc/jvetdoc.cls b/doc/jvetdoc.cls index bdc22890e30fb89bfc9ac4a60b0b17d920c2af7b..f766488489577b40d4f1401611e89fe51e994dcb 100644 --- a/doc/jvetdoc.cls +++ b/doc/jvetdoc.cls @@ -117,7 +117,7 @@ \@strutb \it Title: & \@title \\ \@strutb \it Status: & \@jvetdocstatus \\ \@strutb \it Purpose: & \@jvetdocpurpose \\ - \@strutb \it Author(s): & % + \@strutb \it Editors: & % \setcounter{jvet@author@column}{0} \let\@and\\ \renewcommand{\and}{\@and\setcounter{jvet@author@column}{0}} diff --git a/doc/mainpage.h b/doc/mainpage.h index efe90264a414e5bca1df89df715511d12fd8c854..69186d6f62bbf41661e8bee5da088368b6f5ef98 100644 --- a/doc/mainpage.h +++ b/doc/mainpage.h @@ -24,7 +24,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/doc/software-manual.pdf b/doc/software-manual.pdf index 34b144cc8b9a980a393cc3c1a4a8bba40a247126..d882b72ad3d0098f52a4e4eaf55fefd37a4f725c 100644 Binary files a/doc/software-manual.pdf and b/doc/software-manual.pdf differ diff --git a/doc/software-manual.tex b/doc/software-manual.tex index e816dbe34f6722ddf22932558b173a04cfbcabca..582dcbe4f6685daedad9b725df548eabd84be4b5 100644 --- a/doc/software-manual.tex +++ b/doc/software-manual.tex @@ -184,7 +184,6 @@ fontsize=\footnotesize} \email{frank@bossentech.com} \and David Flynn - \email{dflynn@blackberry.com} \and Xiang Li \email{xlxiangli@tencent.com} @@ -206,7 +205,7 @@ fontsize=\footnotesize} \maketitle \begin{abstract} This document is a user manual describing usage of the VTM reference software -for the VVC project. It applies to version 4.0 of the software. +for the VVC project. It applies to version 7.1 of the software. \end{abstract} \tableofcontents @@ -258,8 +257,8 @@ be available in older compilers. \thead{Compiler environment} & \thead{Versions} \\ \hline -MS Visual Studio & 2015 and 2017 \\ -GCC & 5.4 and 7.3 \\ +MS Visual Studio & 2017 and 2019 \\ +GCC & 5.4, 7.3 and 8.3\\ Xcode/clang & latest \\ \hline \end{tabular} @@ -381,7 +380,7 @@ pacman -S --needed base-devel mingw-w64-i686-toolchain mingw-w64-x86_64-toolchai \section{Using the encoder} \begin{minted}{bash} -TAppEncoder [--help] [-c config.cfg] [--parameter=value] +EncoderApp [--help] [-li -c config.cfg] [-li --parameter=value] \end{minted} \begin{table}[ht] @@ -393,6 +392,7 @@ TAppEncoder [--help] [-c config.cfg] [--parameter=value] \thead{Description} \\ \hline \texttt{--help} & Prints parameter usage. \\ +\texttt{-li} & Applies to its next config file or command line parameter only to define i-th layer encoding option. If empty, the configuration file applies to all layers\\ \texttt{-c} & Defines configuration file to use. Multiple configuration files may be used with repeated --c options. \\ \texttt{--}\emph{parameter}\texttt{=}\emph{value} @@ -467,68 +467,48 @@ predict from a frame with a higher temporal id. If a frame with higher temporal IDs is listed among a frame's reference pictures, it is not used, but is kept for possible use in future frames. -\item[]\textbf{num_ref_pics_active}: Size of reference picture lists L0 -and L1, indicating how many reference pictures in each direction that -are used during coding. +\item[]\textbf{num_ref_pics_active_L0}: Number of reference pictures in lists L0 +that are used during coding. -\item[]\textbf{num_ref_pics}: The number of reference pictures kept for -this frame. This includes pictures that are used for reference for the +\item[]\textbf{num_ref_pics_L0}: Size of reference picture list L0. +This includes pictures that are used for reference for the current picture as well as pictures that will be used for reference in the future. -\item[]\textbf{reference_pictures}: A space-separated list of +\item[]\textbf{reference_pictures_L0}: A space-separated list of num_ref_pics integers, specifying the POC of the reference pictures kept, relative the POC of the current frame. The picture list shall be -ordered, first with negative numbers from largest to smallest, followed -by positive numbers from smallest to largest (e.g. \verb|-1 -3 -5 1 3|). -Note that any pictures not supplied in this list will be discarded and +ordered as their intendend order in the L0. +Note that any pictures not supplied in this list and in the list of L1 will be discarded and therefore not available as reference pictures later. -\item[]\textbf{predict}: Defines the value of the syntax element -inter_ref_pic_set_prediction_flag. A value of 0 indicates that the -reference picture set is encoded without inter RPS prediction and the -subsequent parameters deltaRIdx$-1$, deltaRPS, num_ref_idcs and -Reference_idcs are ignored and do not need to be present. A value of 1 -indicates that the reference picture set is encoded with inter -prediction RPS using the subsequent parameters deltaRIdx$-1$, deltaRPS, -num_ref_idcs and Reference_idcs in the line. A value of 2 indicates that -the reference picture set is encoded with inter RPS but only the -deltaRIdx$-1$ parameters is needed. The deltaRPS, num_ref_idcs and -Reference_idcs values are automatically derived by the encoder based on -the POC and refPic values of the current line and the RPS pointed to by -the deltaRIdx$-1$ parameters. - -\item[]\textbf{deltaRIdx$-1$}: The difference between the index of the -curent RPS and the predictor RPS minus 1. - -\item[]\textbf{deltaRPS}: The difference between the POC of the -predictor RPS and POC the current RPS. - -\item[]\textbf{num_ref_idcs}: The number of ref_idcs to encode for the -current RPS. The value is equal to the value of num_ref_pics of the -predictor RPS plus 1. - -\item[]\textbf{reference_idcs}: A space-separated list of num_ref_idcs -integers, specifying the ref idcs of the inter RPS prediction. The value -of ref_idcs may be 0, 1 or 2 indicating that the reference picture is a -reference picture used by the current picture, a reference picture used -for future picture or not a reference picture anymore, respectively. The -first num_ref_pics of ref_idcs correspond to the Reference pictures in -the predictor RPS. The last ref_idcs corresponds to the predictor -picture. -\end{itemize} +\item[]\textbf{num_ref_pics_active_L1}: Number of reference pictures in lists L1 +that are used during coding. + +\item[]\textbf{num_ref_pics_L1}: Size of reference picture list L1. +This includes pictures that are used for reference for the +current picture as well as pictures that will be used for reference in +the future. + +\item[]\textbf{reference_pictures_L1}: A space-separated list of +num_ref_pics integers, specifying the POC of the reference pictures +kept, relative the POC of the current frame. The picture list shall be +ordered as their intendend order in the L1. +Note that any pictures not supplied in this list and in the list of L0 will be discarded and +therefore not available as reference pictures later. For example, consider the coding structure of Figure~\ref{fig:gop-example}. This coding structure is of size 4. The pictures are listed in decoding order. Frame1 shall therefore describe picture with $\textrm{POC}=4$. It -references picture 0, and therefore has $-4$ as a reference picture. +references picture 0, and therefore has 4 as a reference picture. Similarly, Frame2 has a POC of 2, and since it references pictures 0 and -4, its reference pictures are listed as \verb|-2 2|. Frame3 is a special +4, its reference pictures are listed as \verb|2 -2|. Frame3 is a special case: even though it only references pictures with POC 0 and 2, it also needs to include the picture with POC 4, which must be kept in order to -be used as a reference picture in the future. The reference picture list -for Frame3 therefore becomes \verb|-1 1 3|. Frame4 has a POC of 3 and -its list of reference pictures is \verb|-1 1|. +be used as a reference picture in the future. Note that picture with POC 4 can be +included in the L0 or L1. The reference picture list for Frame3 therefore becomes \verb|1 -1 -3|. +Frame4 has a POC of 3 and its list of reference pictures is \verb|1 -1|. +\end{itemize} \begin{figure}[h] \caption{A GOP structure} @@ -537,27 +517,6 @@ its list of reference pictures is \verb|-1 1|. \includegraphics[width=0.7\textwidth]{figures/gop-structure-example} \end{figure} -Inter RPS prediction may be used for Frame2, Frame3 and Frame4, hence -the predict parameter is set to 1 for these frames. Frame2 uses Frame1 -as the predictor hence the deltaRIdx$-1$ is 0. Similarly for Frame3 and -Frame4 which use Frame2 and Frame3 as predictors, respectively. The -deltaRPS is equal to the POC of the predictor minus the POC of the -current picture, therefore the deltaRPS for Frame2 is $4 -2 = 2$, for -Frame3 is $2 - 1 = 1$ and for Frame4 is $1 - 3 = -2$. - -In Frame2, reference pictures with POC 0 and 2 are used, so the -reference idcs for Frame2 are \verb|1 1| indicating that the reference -picture, $-4$, in Frame1 is still a reference picture in Frame2 and -Frame1 is also a reference picture in Frame2. The reference idcs for -Frame3 are \verb|1 1 1|. The first and second “1â€s indicating that -the reference pictures "$-2$ $2$" in Frame2 are still reference pictures in -Frame3 and the last “1†indicating that Frame2 is also a reference -picture in Frame3. In Frame 4, the reference idcs are \verb|0 1 1 0|. -The first “0†indicates that the reference pictures “-1†in Frame 3 is -no longer a reference picture in Frame4. The next two “1â€s indicate that -the reference pictures “$1$ $3$†are now reference pictures of Frame4. -The final “0†indicates that Frame3 is not a reference picture. - In order to specify this to the encoder, the parameters in Table~\ref{tab:gop-example} could be used. @@ -574,25 +533,23 @@ Table~\ref{tab:gop-example} could be used. \thead{Frame3} & \thead{Frame4} \\ \hline -Type & P & B & B & B \\ -POC & 4 & 2 & 1 & 3 \\ -QPOffset & 1 & 2 & 3 & 3 \\ -QPOffsetModelOff & 0.0 & 0.0 & 0.0 & 0.0 \\ -QPOffsetModelScale & 0.0 & 0.0 & 0.0 & 0.0 \\ -SliceCbQPOffset & 0 & 0 & 0 & 0 \\ -SliceCrQPOffset & 0 & 0 & 0 & 0 \\ -QPfactor & 0.5 & 0.5 & 0.5 & 0.5 \\ -tcOffsetDiv2 & 0 & 1 & 2 & 2 \\ -betaOffsetDiv2 & 0 & 0 & 0 & 0 \\ -temporal_id & 0 & 1 & 2 & 2 \\ -num_ref_pics_active & 1 & 1 & 1 & 1 \\ -num_ref_pics & 1 & 2 & 3 & 2 \\ -reference_pictures & $-$4 & $-$2 2 & $-$1 1 3 & $-$1 1 \\ -predict & 0 & 1 & 1 & 1 \\ -deltaRIdx$-$1 & & 0 & 0 & 0 \\ -deltaRPS & & 2 & 1 & $-$2 \\ -num_ref_idcs & & 2 & 3 & 4 \\ -reference_idcs & & 1 1 & 1 1 1 & 0 1 1 0 \\ +Type & P & B & B & B \\ +POC & 4 & 2 & 1 & 3 \\ +QPOffset & 1 & 2 & 3 & 3 \\ +QPOffsetModelOff & 0.0 & 0.0 & 0.0 & 0.0 \\ +QPOffsetModelScale & 0.0 & 0.0 & 0.0 & 0.0 \\ +SliceCbQPOffset & 0 & 0 & 0 & 0 \\ +SliceCrQPOffset & 0 & 0 & 0 & 0 \\ +QPfactor & 0.5 & 0.5 & 0.5 & 0.5 \\ +tcOffsetDiv2 & 0 & 1 & 2 & 2 \\ +betaOffsetDiv2 & 0 & 0 & 0 & 0 \\ +temporal_id & 0 & 1 & 2 & 2 \\ +num_ref_pics_active_L0 & 1 & 1 & 1 & 1 \\ +num_ref_pics_L0 & 1 & 1 & 1 & 1 \\ +reference_pictures_L0 & 4 & 2 & 1 & 1 \\ +num_ref_pics_active_L1 & 0 & 1 & 1 & 1 \\ +num_ref_pics_L1 & 0 & 1 & 2 & 1 \\ +reference_pictures_L1 & & $-$2 & $-$1 $-$3 & $-$1 \\ \hline \end{tabular} \end{table} @@ -605,51 +562,12 @@ line should contain information for one frame, so this configuration would be specified as: \begin{verbatim} -Frame1: P 4 1 0 0 0.5 0 0 0 1 1 -4 0 -Frame2: B 2 2 0 0 0.5 1 0 1 1 2 -2 2 1 0 2 2 1 1 -Frame3: B 1 3 0 0 0.5 2 0 2 1 3 -1 1 3 1 0 1 3 1 1 1 -Frame4: B 3 3 0 0 0.5 2 0 2 1 2 -1 1 1 0 -2 4 0 1 1 0 +Frame1: P 4 1 0 0 0.5 0 0 0 1 1 4 1 1 4 +Frame2: B 2 2 0 0 0.5 1 0 1 1 1 2 1 1 -2 +Frame3: B 1 3 0 0 0.5 2 0 2 1 1 1 1 2 -1 -3 +Frame4: B 3 3 0 0 0.5 2 0 2 1 1 1 1 1 -1 \end{verbatim} -The values of deltaRIdx$-1$, deltaRPS, num_ref_idcs and reference -idcs of Frame$K$ can be derived from the POC value of Frame$_K$ and -the POC, num_ref_pics and reference_pictures values of Frame$_M$, where -$K$ is the index of the RPS to be inter coded and the $M$ is the -index of the reference RPS, as follows. - -\setlength{\algomargin}{2em} -\begin{algorithm}[ht] -\SetKwData{deltaRIdx}{deltaRIdx} -\SetKwData{deltaRPS}{deltaRPS} -\SetKwData{numrefidcs}{num_ref_idcs} -\SetKwData{numrefpics}{num_ref_pics} -\SetKwData{referencepictures}{reference_pictures} -\SetKwData{referenceidcs}{reference_idcs} -\SetKwData{POC}{POC} - -$\deltaRIdx_K - 1 \leftarrow K - M - 1$ \; -$\deltaRPS_K \leftarrow \POC_M - \POC_K$ \; -$\numrefidcs_K \leftarrow \numrefpics_M + 1$ \; - -\For{$j \leftarrow 0$ \KwTo $\numrefpics_M$}{ - \For{$i \leftarrow 0$ \KwTo $\numrefidcs_K$}{ - \eIf{$\referencepictures_{M,j} + \deltaRPS_K == \referencepictures_{K,i}$}{ - \lIf{$\referencepictures_{K,i}$ is used by the current frame}{ - $\referenceidcs_{K,j} = 1$} \; - \lElse{$\referenceidcs_{K,j} = 2$} \; - }{ - $\referenceidcs_K[j] = 0$ \; - } - } -} - -\tcc{$\referencepictures_{M,\numrefpics_M}$ does not exist and is assumed to be 0} -\end{algorithm} - -Note: The above (automatic) generation of the inter RPS parameter -values has been integrated into the encoder, and is activated by -the value of predict $= 2$ followed by the value of deltaRIdx$-1$, -only, as described above. @@ -685,7 +603,7 @@ Specifies the output coded bit stream file. \Option{ReconFile (-o)} & %\ShortOption{-o} & \Default{\NotSet} & -Specifies the output locally reconstructed video file. +Specifies the output locally reconstructed video file. If more than one layer is encoded (i.e. MaxLayers > 1), a reconstructed file is written for each layer and the layer index is added as suffix to ReconFile. If one or more dots exist in the file name, the layer id is added before the last dot, e.g. 'reconst.yuv' becomes 'reconst0.yuv' for layer id 0, 'reconst' becomes 'reconst0'. \\ \Option{SourceWidth (-wdt)}% @@ -743,12 +661,10 @@ Note: This option has no effect on the decoding process. \Option{InputBitDepthC}% \Option{MSBExtendedBitDepthC}% -\Option{InternalBitDepthC}% \Option{OutputBitDepthC} & %\ShortOption{\None} & \Default{0}% \Default{0}% -\Default{0}% \Default{0} & Specifies the various bit-depths for chroma components. These only need to be specified if non-equal luma and chroma bit-depth processing is @@ -936,10 +852,67 @@ Enables harmonization of Gop first field couple. \Option{AccessUnitDelimiter} & %\ShortOption{\None} & -\Default{0} & +\Default{1} & Add Access Unit Delimiter NAL units between all Access Units. \\ +\Option{ScalingRatioHor} & +%\ShortOption{\None} & +\Default{1.0} & +Scaling ratio in horizontal direction for reference picture resampling. +\\ + +\Option{ScalingRatioVer} & +%\ShortOption{\None} & +\Default{1.0} & +Scaling ratio in vertical direction for reference picture resampling. +\\ + +\Option{FractionNumFrames} & +%\ShortOption{\None} & +\Default{1.0} & +Encode a fraction of the specified in FramesToBeEncoded frames. +\\ + +\Option{SwitchPocPeriod} & +%\ShortOption{\None} & +\Default{0} & +POC period at which resolution is changed. +\\ + +\Option{UpscaledOutput} & +%\ShortOption{\None} & +\Default{0} & +Picture output options: output upscaled (2), decoded but in full resolution buffer (1) or decoded cropped (0, default) picture for reference picture resampling. +\\ + + +\end{OptionTableNoShorthand} + +%% +%% GOP based temporal filter parameters +%% + +\begin{OptionTableNoShorthand}{GOP based temporal filter paramters}{tab:gop-based-temporal-filter} + +\Option{TemporalFilter} & +%\ShortOption{\None} & +\Default{false} & +Enables or disables GOP based temporal filter. +\\ +\Option{TemporalFilterFutureReference} & +%\ShortOption{\None} & +\Default{true} & +Enables or disable referencing future frames in the GOP based temporal filter. Can be used to disable future referencing for +low delay configurations. +\\ +\Option{TemporalFilterStrengthFrame*} & +%\ShortOption{\None} & +\Default{} & +Strength for every * frame in GOP based temporal filter, where * is an integer. E.g. --TemporalFilterStrengthFrame8 0.95 will +enable GOP based temporal filter at every 8th frame with strength 0.95. Longer intervals overrides shorter when there are +multiple matches. +\\ \end{OptionTableNoShorthand} %% @@ -983,10 +956,22 @@ Valid values are: main, high. NB: There is currently only limited validation that the encoder configuration complies with the profile, level and tier constraints. \\ +\Option{SubProfile} & +%\ShortOption{\None} & +\Default{0} & +Indicates interoperability metadata registered as specified by X Recommendation ITU-T T.35. +\\ + +\Option{EnableDecodingParameterSet} & +%\ShortOption{\None} & +\Default{false} & +Enables writing of a decoding parameter set. If disabled, no parameter set will be written and the specical reserved ID zero will be used in the SPS indicating no constraint. +\\ + \Option{MaxBitDepthConstraint} & %\ShortOption{\None} & \Default{0} & -For --profile=main-RExt, specifies the value to use to derive the general_max_bit_depth constraint flags for RExt profiles; when 0, use $\max(InternalBitDepth, InternalBitDepthC)$ +For --profile=main-RExt, specifies the value to use to derive the general_max_bit_depth constraint flags for RExt profiles; when 0, use InternalBitDepth. \\ \Option{MaxChromaFormatConstraint} & @@ -1040,6 +1025,20 @@ Specifies the value of general_frame_only_constraint_flag \end{OptionTableNoShorthand} +%% +%% Layer parameters +%% + +\begin{OptionTableNoShorthand}{Layer parameters}{tab:layer} +\Option{MaxLayers} & +%\ShortOption{\None} & +\Default{1} & +Specifies the value to use to derive the vps_max_layers_minus1 for layered coding +\\ + +\end{OptionTableNoShorthand} + + %% %% Unit definition parameters %% @@ -1093,6 +1092,54 @@ Defines the depth of the TU tree for intra CUs. Defines the depth of the TU tree for inter CUs. \\ +\Option{MaxMTTHierarchyDepth} & +%\ShortOption{\None} & +\Default{3} & +Defines the maximum depth of the multi-type tree for inter slices. +\\ + +\Option{MaxMTTHierarchyDepthI} & +%\ShortOption{\None} & +\Default{3} & +Defines the maximum depth of the multi-type tree for intra slices. +\\ + +\Option{MaxMTTHierarchyDepthISliceC} & +%\ShortOption{\None} & +\Default{3} & +Defines the maximum depth of the multi-type tree in dual tree for chroma components. +\\ + +\Option{MaxMTTHierarchyDepthISliceL} & +%\ShortOption{\None} & +\Default{3} & +Defines the maximum depth of the multi-type tree in dual tree for luma component. +\\ + +\Option{MinQTChromaISlice} & +%\ShortOption{\None} & +\Default{4} & +Defines the minimum size of the quad tree in dual tree for chroma components. +\\ + +\Option{MinQTISlice} & +%\ShortOption{\None} & +\Default{8} & +Defines the minimum size of the quad tree for intra slices. +\\ + +\Option{MinQTLumaISlice} & +%\ShortOption{\None} & +\Default{8} & +Defines the minimum size of the quad tree in dual tree for luma component. +\\ + +\Option{MinQTNonISlice} & +%\ShortOption{\None} & +\Default{8} & +Defines the minimum size of the quad tree for inter slices. +\\ + \end{OptionTableNoShorthand} @@ -1122,6 +1169,13 @@ picture. \end{tabular} \\ +\Option{DRAPPeriod} & +%\ShortOption{\None} & +\Default{0} & +Specifies the DRAP period in frames. +Dependent RAP indication SEI messages are disabled if DRAPPeriod is 0. +\\ + \Option{GOPSize (-g)} & %\ShortOption{-g} & \Default{1} & @@ -1137,8 +1191,15 @@ elements. \par See section~\ref{sec:gop-structure} for further details. \\ + +\Option{ReWriteParamSets} & +%\ShortOption{-ip} & +\Default{$0$} & +Enable writing of parameter sets (SPS, PPS, etc.) before every (intra) random access point to enable true random access. +\\ \end{OptionTableNoShorthand} + %% %% Motion estimation parameters %% @@ -1222,12 +1283,41 @@ $} Specifies the maximum number of merge candidates to use. \\ +\Option{MaxNumTriangleCand} & +%\ShortOption{\None} & +\Default{5} & +Specifies the maximum number of triangle merge candidates to use. +\\ + +\Option{MaxNumIBCMergeCand} & +%\ShortOption{\None} & +\Default{6} & +Specifies the maximum number of IBC merge candidates to use. +\\ + \Option{DisableIntraInInter} & %\ShortOption{\None} & \Default{0} & Flag to disable intra PUs in inter slices. \\ +\Option{MMVD} & +%\ShortOption{\None} & +\Default{1} & +Enables or disables the merge mode with motion vector difference (MMVD). +\\ + +\Option{MmvdDisNum} & +%\ShortOption{\None} & +\Default{6} & +Specifies the number of MMVD distance entries used from the distance table at encoder. +\\ + +\Option{CIIP} & +%\ShortOption{\None} & +\Default{1} & +Enables or disables the merge mode with combined inter merge and intra prediction (CIIP). +\\ \end{OptionTableNoShorthand} @@ -1319,6 +1409,12 @@ candidate is not evaluated if the merge skip mode was the best merge mode for one of the previous candidates. \\ +\Option{SBTFast64WidthTh} & +%\ShortOption{\None} & +\Default{1920} & +Picture width threshold for testing size-64 SBT in RDO (now for HD and above sequences). +\\ + \Option{RDpenalty} & %\ShortOption{\None} & \Default{0} & @@ -1332,6 +1428,19 @@ Enabling this parameter can reduce the visibility of CU boundaries in the coded \end{tabular} \\ +\Option{FastLocalDualTreeMode} & +%\ShortOption{\None} & +\Default{0} & +Controls intra coding speedup introducted with local dual tree mode. +\par +\begin{tabular}{cp{0.45\textwidth}} + 0 & Disabled\\ + 1 & Stop testing intra modes in inter slices, if best cost is more that 1.5 times inter cost.\\ + 2 & Test only one intra mode in inter slices\\ +\end{tabular} +\\ + + \end{OptionTableNoShorthand} %% @@ -1350,6 +1459,12 @@ Specifies the base value of the quantization parameter. If it is non-integer, th Specifies a QP offset from the base QP value to be used for intra frames. \\ +\Option{DepQuant} & +%\ShortOption{\None} & +\Default{true} & +Enables or disables the usage of dependent quantization. +\\ + \Option{LambdaFromQpEnable} & %\ShortOption{\None} & \Default{false} & @@ -1380,6 +1495,62 @@ $\lambda = \lambda_{base} \times max(2, min(4, (sliceQP-12)/6))$ In addition, independent on the IntraQPFactor, if HadamardME=false, then for an inter slice the final $\lambda$ is scaled by a factor of $0.95$. \\ +\Option{UseIdentityTableForNon420Chroma}& +\Default{1}& +Specifies whether identity chroma QP mapping tables are used for 4:2:2 and 4:4:4 content. When set to 1, the identity chroma QP mapping table is used for all the three chroma components for 4:2:2 or 4:4:4 content. When set to 0, chroma QP +mapping table may be specified by other parameters in the configuration. +\\ + +\Option{SameCQPTablesForAllChroma}& +\Default{1}& +Specifies that the Cb, Cr and joint Cb-Cr components all use the same +chroma mapping table. When set to 1, the values of QpInValCr, +QpOutValCr, QpInValCbCr and QpOutValCbCr are ignored. When set to 0, all +Cb, Cr and joint Cb-Cr components may have different chroma QP mapping tables specified in the configuration file. Note that +SameCQPTablesForAllChroma is ignored when UseIdentityTableForNon420Chroma is set to 1 for 4:2:2 and 4:4:4 content. +\\ + +\Option{QpInValCb}% +\Option{QpOutValCb}& +\Default{\NotSet} & +Specifies the input and coordinates of the pivot points used to specify the chroma QP mapping tables for the Cb component. Default values are as follows: +\par +\begin{tabular}{cp{0.45\textwidth}} + QpInValCb & 25, 33, 43 \\ + QpOutValCb & 25, 32, 37 \\ +\end{tabular} +The values specify the pivot points for the chroma QP mapping table, the unspecified QP values are interpolated from the remaining values. E.g., the default values above specify that the pivot points for the chroma QP mapping table for the Cb component are (25, 25), (33, 32), (43, 37). +Note that that QpInValCr and QpOutValCr are ignored when UseIdentityTableForNon420Chroma is set to 1 for 4:2:2 and 4:4:4 content. +\\ + +\Option{QpInValCr}% +\Option{QpOutValCr}& +\Default{\NotSet} & +Specifies the input and coordinates of the pivot points used to specify the chroma QP mapping tables for the Cr component. Default values are as follows: +\par +\begin{tabular}{cp{0.45\textwidth}} + QpInValCr & 0 \\ + QpOutValCr & 0 \\ +\end{tabular} + +The default values specify a pivot point of (0,0) which corresponds to an identity chroma QP mapping table. Note that that QpInValCr and QpOutValCr are ignored +when SameCQPTablesForAllChroma is set to 1 or when UseIdentityTableForNon420Chroma is set to 1 for 4:2:2 and 4:4:4 content. +\\ + +\Option{QpInValCbCr}% +\Option{QpOutValCbCr}& +\Default{\NotSet} & +Specifies the input and coordinates of the pivot points used to specify the chroma QP mapping tables for the joint Cb-Cr component. Default values are as follows: +\par +\begin{tabular}{cp{0.45\textwidth}} + QpInValrCr & 0 \\ + QpOutValCbCr & 0 \\ +\end{tabular} + +The default values specify a pivot point of (0,0) which corresponds to a identity chroma QP mapping table. Note that that QpInValCbCr and QpOutVaCblCr are ignored +when SameCQPTablesForAllChroma is set to 1 or when UseIdentityTableForNon420Chroma is set to 1 for 4:2:2 and 4:4:4 content. +\\ + \Option{CbQpOffset (-cbqpofs)}% \Option{CrQpOffset (-crqpofs)} & %\ShortOption{-cbqpofs}% @@ -1392,6 +1563,20 @@ and cr_qp_offset, that are transmitted in the PPS. Valid values are in the range $[-12, 12]$. \\ +\Option{CbCrQpOffset (-cbcrqpofs)} & +\Default{-1} & +Global offset to apply to the luma QP to derive the QP for joint Cb-Cr +residual coding mode. This option corresponds to the value of cb_cr_qp_offset +transmitted in the PPS. Valid values are in the range $[-12, 12]$. +\\ + +\Option{CbCrQpOffsetDualTree} & +\Default{0} & +Tile group QP offset for joint Cb-Cr residual coding mode when separate luma and +chroma trees are used. This option corresponds to the value of tile_group_cb_cr_qp_offset +transmitted in the tile group header. Valid values are in the range $[-12, 12]$. +\\ + \Option{LumaLevelToDeltaQPMode} & \Default{0} & Luma-level based Delta QP modulation. @@ -1515,10 +1700,16 @@ Specifies a file containing a list of QP deltas. The $n$-th line value delta for the picture with POC value $n$. \\ -\Option{AdaptiveQp (-aq)} & +\Option{PerceptQPA (-qpa)} & +%\ShortOption{-qpa} & +\Default{false} & +Enables or disables the perceptually optimized QP adaptation (QPA) method described in JVET-H0047, JVET-K0206, and JVET-M0091. Use this together with 'SliceChromaQPOffsetPeriodicity=1' and, in case of HDR input, 'LumaLevelToDeltaQPMode=1' for best subjective quality. Cannot be used together with 'SelectiveRDOQ' (see above) or 'AdaptiveQP' (see below). +\\ + +\Option{AdaptiveQP (-aq)} & %\ShortOption{-aq} & \Default{false} & -Enable or disable QP adaptation based upon a psycho-visual model. +Enables or disables the legacy QP adaptation method based upon a psycho-visual model. \\ \Option{MaxQPAdaptationRange (-aqr)} & @@ -1560,6 +1751,12 @@ If ScalingList is set to 2 and this parameter is an empty string, information on is output and the encoder stops. \\ +\Option{DisableScalingMatrixForLFNST} & +%\ShortOption{\None} & +\Default{true} & +Specifies whether scaling matrices are to be applied to blocks coded with LFNST. +\\ + \Option{MaxCUChromaQpAdjustmentDepth} & %\ShortOption{\None} & \Default{-1} & @@ -1570,125 +1767,110 @@ Specifies the maximum depth for CU chroma QP adjustment; if negative, CU chroma %% -%% Slice coding parameters +%% Slice/Tile coding parameters %% -\begin{OptionTableNoShorthand}{Slice coding parameters}{tab:slice-coding} -%\Option{SliceGranularity} & -%\ShortOption{\None} & -%\Default{0} & -%Determines the depth in an LCU at which slices may begin and end. -%\par -%\begin{tabular}{cp{0.45\textwidth}} -% 0 & Slice addresses are LCU aligned \\ -% $1 \leq n \leq 3$ -% & Slice start addresses are aligned to CUs at depth $n$ \\ -%\end{tabular} -% -%Note: The smallest permissible alignment is 16x16 CUs. -%Values of $n$ must satisfy this constraint, for example, with a 64x64 -%LCU, $n$ must be less than or equal to 2. -%\\ +\begin{OptionTableNoShorthand}{Slice and tile coding parameters}{tab:slice-coding} -\Option{SliceMode} & +\Option{EnablePicPartitioning} & %\ShortOption{\None} & \Default{0} & -Controls the slice partitioning method in conjunction with -SliceArgument. -\par -\begin{tabular}{cp{0.45\textwidth}} - 0 & Single slice \\ - 1 & Maximum number of CTUs per slice \\ - 2 & Maximum number of bytes per slice \\ - 3 & Maximum number of tiles per slice \\ -\end{tabular} +Enable picture partitioning (0: single tile, single slice, 1: multiple tiles/slices can be used). +\\ + +\Option{TileColumnWidthArray} & +%\ShortOption{\None} & +\Default{\NotSet} & +Tile column widths in units of CTUs. Last column width in list will be repeated uniformly to cover any remaining picture width. \\ -\Option{SliceArgument} & +\Option{TileRowHeightArray} & %\ShortOption{\None} & \Default{\NotSet} & -Specifies the maximum number of CTUs, bytes or tiles in a slice depending on the -SliceMode setting. +Tile row heights in units of CTUs. Last row height in list will be repeated uniformly to cover any remaining picture height. \\ -\Option{SliceSegmentMode} & +\Option{RasterScanSlices} & %\ShortOption{\None} & \Default{0} & -Enables (dependent) slice segment coding in conjunction with -SliceSegmentArgument. -\par -\begin{tabular}{cp{0.45\textwidth}} - 0 & Single slice \\ - 1 & Maximum number of CTUs per slice segment\\ - 2 & Maximum number of bytes per slice segment\\ - 3 & Maximum number of tiles per slice segment\\ -\end{tabular} +Use raster-scan or rectangular slices (0: rectangular, 1: raster-scan). \\ -\Option{SliceSegmentArgument} & +\Option{RectSlicePositions} & %\ShortOption{\None} & \Default{\NotSet} & -Defines the maximum number of CTUs, bytes or tiles a slice segment -depending on the SliceSegmentMode setting. +Rectangular slice positions. List containing pairs of top-left CTU RS address followed by bottom-right CTU RS address. \\ -\Option{WaveFrontSynchro} & +\Option{RectSliceFixedWidth} & %\ShortOption{\None} & -\Default{false} & -Enables the use of specific CABAC probabilities synchronization at the -beginning of each line of CTBs in order to produce a bitstream that can -be encoded or decoded using one or more cores. +\Default{0} & +Fixed rectangular slice width in units of tiles (0: disable this feature and use RectSlicePositions instead). \\ -\Option{TileUniformSpacing} & +\Option{RectSliceFixedHeight} & %\ShortOption{\None} & -\Default{false} & -Controls the mode used to determine per row and column tile sizes. -\par -\begin{tabular}{cp{0.45\textwidth}} - 0 & Each tile column width and tile row height is explicitly set - by TileColumnWidthArray and TileRowHeightArray respectively \\ - 1 & Tile columns and tile rows are uniformly spaced. \\ -\end{tabular} +\Default{0} & +Fixed rectangular slice height in units of tiles (0: disable this feature and use RectSlicePositions instead). +\\ + +\Option{RasterSliceSizes} & +%\ShortOption{\None} & +\Default{\NotSet} & +Raster-scan slice sizes in units of tiles. Last size in list will be repeated uniformly to cover any remaining tiles in the picture. \\ -\Option{NumTileColumnsMinus1}% -\Option{NumTileRowsMinus1} & +\Option{DisableLoopFilterAcrossTiles} & %\ShortOption{\None} & \Default{0} & -Specifies the tile based picture partitioning geometry as -$\mathrm{NumTileColumnsMinus1} + 1 \times \mathrm{NumTileRowsMinus1} + 1$ -columns and rows. +Loop filtering applied across tile boundaries or not (0: filter across tile boundaries 1: do not filter across tile boundaries). \\ -\Option{TileColumnWidthArray}% -\Option{TileRowHeightArray} & +\Option{DisableLoopFilterAcrossSlices} & %\ShortOption{\None} & -\Default{\NotSet} & -Specifies a space or comma separated list of widths and heights, -respectively, of each tile column or tile row. The first value in the -list corresponds to the leftmost tile column or topmost tile row. +\Default{0} & +Loop filtering applied across slice boundaries or not (0: filter across slice boundaries 1: do not filter across slice boundaries). +\\ + +\Option{IDRRefParamList} & +%\ShortOption{\None} & +\Default{false} & +Enables the signalling of reference picture list syntax elements in slice headers of IDR pictures +\\ + +\Option{WaveFrontSynchro} & +%\ShortOption{\None} & +\Default{false} & +Enables the use of specific CABAC probabilities synchronization at the +beginning of each line of CTBs in order to produce a bitstream that can +be encoded or decoded using one or more cores. \\ + + \end{OptionTableNoShorthand} +%% +%% Slice/Sub-Picture coding parameters +%% +\begin{OptionTableNoShorthand}{Slice and Sub-Picture coding parameters}{tab:subpicture-coding} + +\Option{EnableSubPicPartitioning} & +%\ShortOption{\None} & +\Default{1} & +Enable Sub Picture partitioning (0: single slice per sub-picture, 1: multiple slices per sub-picture can be used). +\\ +\end{OptionTableNoShorthand} %% -%% Deblocking filter parameters +%% In-loop filtering parameters %% -\begin{OptionTableNoShorthand}{Deblocking filter parameters}{tab:deblocking-filter} +\begin{OptionTableNoShorthand}{In-loop filtering parameters}{tab:inloop-filter} \Option{LoopFilterDisable} & %\ShortOption{\None} & \Default{false} & Enables or disables the in-loop deblocking filter. \\ -\Option{LFCrossSliceBoundaryFlag} & -%\ShortOption{\None} & -\Default{true} & -Enables or disables the use of in-loop filtering across slice -boundaries. -\\ - \Option{LoopFilterOffsetInPPS}& %\ShortOption{\None}& \Default{false}& @@ -1723,21 +1905,64 @@ Specifies the use of a deblocking filter metric to evaluate the suitability of d LoopFilterOffsetInPPS and LoopFilterDisable must be 0. Currently excepted values are 0, 1 and 2. \\ -\Option{LFCrossSliceBoundaryFlag}& +\Option{LoopFilterAcrossVirtualBoundariesDisabledFlag}& %\ShortOption{\None}& -\Default{true}& -Enables or disables the use of a deblocking across tile boundaries. +\Default{false}& +Disables in-loop filtering operations across the virtual boundaries. \\ -\end{OptionTableNoShorthand} +\Option{NumVerVirtualBoundaries}& +%\ShortOption{\None}& +\Default{0}& +Specifies the number of vertical virtual boundaries.The value of NumVerVirtualBoundaries shall be in the range of 0 to 3, inclusive. +\\ +\Option{NumHorVirtualBoundaries}& +%\ShortOption{\None}& +\Default{0}& +Specifies the number of horizontal virtual boundaries. The value of NumHorVirtualBoundaries shall be in the range of 0 to 3, inclusive. +\\ +\Option{VirtualBoundariesPosX}& +%\ShortOption{\None}& +\Default{\NotSet}& +Specifies the locations of the vertical virtual boundaries in units of luma samples +\\ -%% -%% Coding tools parameters -%% +\Option{VirtualBoundariesPosY}& +%\ShortOption{\None}& +\Default{\NotSet}& +Specifies the locations of the horizontal virtual boundaries in units of luma samples +\\ -\begin{OptionTableNoShorthand}{Coding tools parameters}{tab:coding-tools} +\Option{EncDbOpt}& +%\ShortOption{\None}& +\Default{false}& +Enables or disables encoder-side deblocking optimization. When it is enabled, deblocking filter is applied during mode decision. +\\ + + +\end{OptionTableNoShorthand} + + + +%% +%% Coding tools parameters +%% + +\begin{OptionTableNoShorthand}{Coding tools parameters}{tab:coding-tools} + +\Option{MRL} & +%\ShortOption{\None} & +\Default{false} & +Enables or disables the use of multiple reference line intra prediction (MRL). +\\ + +\Option{MIP} & +%\ShortOption{\None} & +\Default{true} & +Enables or disables the use of matrix-based intra prediction (MIP). +\\ \Option{AMP} & %\ShortOption{\None} & @@ -1745,6 +1970,24 @@ Enables or disables the use of a deblocking across tile boundaries. Enables or disables the use of asymmetric motion partitions. \\ +\Option{ISP} & +%\ShortOption{\None} & +\Default{false} & +Enables or disables the Intra Sub-Partitions coding mode. +\\ + +\Option{ISPFast} & +%\ShortOption{\None} & +\Default{false} & +Enables or disables fast encoder methods for ISP. +\\ + +\Option{JointCbCr} & +%\ShortOption{\None} & +\Default{false} & +Enables or disables the joint coding of chroma residuals. +\\ + \Option{SAO} & %\ShortOption{\None} & \Default{true} & @@ -1782,14 +2025,6 @@ for LCU bottom and right boundary areas. When true, resets the encoder's SAO state after an IRAP (POC order). \\ -\Option{ConstrainedIntraPred} & -%\ShortOption{\None} & -\Default{false} & -Enables or disables constrained intra prediction. Constrained intra -prediction only permits samples from intra blocks in the same slice as the -current block to be used for intra prediction. -\\ - \Option{FastUDIUseMPMEnabled} & %\ShortOption{\None} & \Default{true} & @@ -1808,32 +2043,6 @@ If enabled use a fast ME for generalised B Low Delay slices Enables use of B-Lambda for non-key low-delay pictures \\ -\Option{TransquantBypassEnable} & -%\ShortOption{\None} & -\Default{false} & -Enables or disables the ability to bypass the transform, -quantization and filtering stages at CU level. -This option corresponds to the value of -transquant_bypass_enabled_flag that is transmitted in the PPS. - -See CUTransquantBypassFlagForce for further details. -\\ - -\Option{CUTransquantBypassFlagForce} & -%\ShortOption{\None} & -\Default{0} & -Controls the per CU transformation, quantization and filtering -mode decision. -This option controls the value of the per CU cu_transquant_bypass_flag. -\par -\begin{tabular}{cp{0.45\textwidth}} - 0 & Bypass is searched on a CU-by-CU basis and will be used if the cost is lower than not bypassing. \\ - 1 & Bypass is forced for all CUs. \\ -\end{tabular} - -This option has no effect if TransquantBypassEnable is disabled. -\\ - \Option{PCMEnabledFlag} & %\ShortOption{\None} & \Default{false} & @@ -1887,7 +2096,7 @@ Enables the use of weighted prediction in P slices. Enables the use of weighted prediction in B slices. \\ -\Option{WPMethod (-wpM)} & +\Option{WeightedPredMethod (-wpM)} & %\ShortOption{\-wpM} & \Default{0} & Sets the Weighted Prediction method to be used. @@ -1902,12 +2111,6 @@ Sets the Weighted Prediction method to be used. \\ -\Option{Log2ParallelMergeLevel} & -%\ShortOption{\None} & -\Default{2} & -Defines the PPS-derived Log2ParMrgLevel variable. -\\ - \Option{SignHideFlag (-SBH)} & %\ShortOption{-SBH} & \Default{true} & @@ -1919,16 +2122,6 @@ bitstream, but may be inferred from the parity of the sum of all nonzero coefficients in the current coefficient group. \\ -\Option{StrongIntraSmoothing (-sis)} & -%\ShortOption{-sis} & -\Default{true} & -If enabled specifies that for 32x32 intra prediction block, the intra smoothing -when applied is either the 1:2:1 smoothing filter or a stronger bi-linear -interpolation filter. Key reference sample values are tested and if the criteria -is satisfied, the stronger intra smoothing filter is applied. -If disabled, the intra smoothing filter when applied is the 1:2:1 smoothing filter. -\\ - \Option{TMVPMode} & %\ShortOption{\None} & \Default{1} & @@ -1941,6 +2134,52 @@ Controls the temporal motion vector prediction mode. \end{tabular} \\ +\Option{PPSorSliceMode} & +%\ShortOption{\None} & +\Default{0} & +Enables signaling the below parameters either in PPS or for each slice according to the following preset modes: +\par +\begin{tabular}{cp{0.45\textwidth}} + 0 & Always signaled per slice. \\ + 1 & RA settings. \\ + 2 & LDB settings. \\ + 3 & LDP settings. \\ +\end{tabular} +\par +\begin{tabular}{p{5cm}llll} + & & & & \\ + Parameter & \multicolumn{3}{l}{Mode} \\ + & 0 & 1 & 2 & 3 \\ + dep_quant_enabled_flag & s & p & p & p \\ + ref_pic_list_sps_flag0 & s & s & p & p \\ + ref_pic_list_sps_flag1 & s & s & p & p \\ + temporal_mvp_enabled_flag & s & s & p & p \\ + mvd_l1_zero_flag & s & s & p & s \\ + collocated_from_l0_flag & s & s & p & s \\ + six_minus_max_num_merge_cand & s & p & p & p \\ + five_minus_max_num_subblock_merge_cand & s & p & p & p \\ + max_num_merge_cand_minus_max_num_triangle_cand & s & p & p & s \\ +\end{tabular} +\\ + +\Option{SliceLevelRpl} & +%\ShortOption{\None} & +\Default{true} & +Code reference picture lists in slice headers rather than picture header. +\\ + +\Option{SliceLevelDblk} & +%\ShortOption{\None} & +\Default{true} & +Code deblocking filter parameters in slice headers rather than picture header. +\\ + +\Option{SliceLevelSao} & +%\ShortOption{\None} & +\Default{true} & +Code SAO parameters in slice headers rather than picture header. +\\ + \Option{TransformSkip} & %\ShortOption{\None} & \Default{false} & @@ -1957,6 +2196,157 @@ luma TUs are also skipped. \par This option has no effect if TransformSkip is disabled. \\ + +\Option{ChromaTS} & +%\ShortOption{\None} & +\Default{false} & +Enables or disables reduced testing of the transform-skipping mode +decision for chroma TUs. When disabled, no RDO search is performed for +chroma TUs. +\par +This option has no effect if TransformSkip is disabled. +\\ + +\Option{ALF} & +%\ShortOption{\None} & +\Default{true} & +Enables or disables adaptive loop filter. +\\ + +\Option{UseNonLinearAlfLuma} & +%\ShortOption{\None} & +\Default{true} & +Enables optimization of non-linear filters for ALF on Luma channel. +\\ + +\Option{UseNonLinearAlfChroma} & +%\ShortOption{\None} & +\Default{true} & +Enables optimization of non-linear filters for ALF on Chroma channels. +\\ + +\Option{MaxNumAlfAlternativesChroma} & +%\ShortOption{\None} & +\Default{8} & +Specified the maximum number of alternative chroma filters that can be +switched at CTB level. Set to 1 to disable alternative chroma filters. +Value shall be in the range 1..8. +\\ + +\Option{SMVD} & +%\ShortOption{\None} & +\Default{false} & +Enables or disables symmetric MVD mode. +\\ + +\Option{PLT} & +%\ShortOption{\None} & +\Default{false} & +Enables or disables palette mode coding. +\\ + +\Option{BDPCM} & +%\ShortOption{\None} & +\Default{0} & +Enables or disables the use of intra block differential pulse code modulation mode. +\par +\begin{tabular}{cp{0.45\textwidth}} + 0 & Disable BDPCM for luma and chroma.\\ + 1 & Enable BDPCM for luma.\\ + 2 & Enable BDPCM for luma and chroma. BDPCM for chroma is available for 444.\\ +\end{tabular} +\\ + +\Option{LFNST} & +%\ShortOption{\None} & +\Default{false} & +Enables or disables the use of low frequency non-separable transform (LFNST). +\\ + +\Option{FastLFNST} & +%\ShortOption{\None} & +\Default{false} & +Enables or disables the fast encoding of low frequency non-separable transform (LFNST). +\\ + +\Option{LMCSEnable} & +%\ShortOption{\None} & +\Default{true} & +Enables or disables the use of LMCS (luma mapping with chroma scaling). +\\ + +\Option{LMCSSignalType} & +%\ShortOption{\None} & +\Default{0} & +LMCS signal type: 0:SDR, 1:HDR-PQ, 2:HDR-HLG. +\\ + +\Option{LMCSUpdateCtrl} & +%\ShortOption{\None} & +\Default{0} & +LMCS model update control: 0:RA, 1:AI, 2:LDB/LDP. +\par +\begin{tabular}{cp{0.45\textwidth}} + 0 & Random access: derive a new LMCS model at each IRAP.\\ + 1 & All intra: derive a new LMCS model at each intra slice.\\ + 2 & Low delay: derive a new LMCS model every second. \\ +\end{tabular} +\\ + +\Option{LMCSAdpOption} & +%\ShortOption{\None} & +\Default{0} & +Adaptive LMCS mapping derivation options: Options 1 to 4 are for experimental testing purposes and need to set parameter LMCSInitialCW. +\par +\begin{tabular}{cp{0.45\textwidth}} + 0 & Automatic adaptive algorithm (default).\\ + 1 & Derives LMCS mapping with input LMCSInitialCW and enables LMCS for all slices. Uses a static LMCS mapping for low QP ($QP<=22$). \\ + 2 & Derives LMCS mapping with input LMCSInitialCW and enables LMCS only for slices in lowest temporal layer. \\ + 3 & In addition to 1, disables LMCS for intra slices. \\ + 4 & Derives LMCS mapping with input LMCSInitialCW and enables LMCS only for inter slices. \\ +\end{tabular} +\\ + +\Option{LMCSInitialCW} & +%\ShortOption{\None} & +\Default{0} & +LMCS initial total codeword (valid values [$0 - 1023$]) to be used in LMCS mapping derivation when LMCSAdpOption is not equal to 0. +\\ + +\Option{LMCSOffset} & +%\ShortOption{\None} & +\Default{0} & +Specifies the LMCS chroma residual scaling offset. This parameter corresponds to the value of lmcsDeltaCrs, derived from lmcs_delta_sign_crs_flag and lmcs_delta_abs_crs, that are transmitted in the APS. Valid values are in the range [-7;7]. +\\ + +\Option{ColorTransform} & +%\ShortOption{\None} & +\Default{false} & +Enables or disables the use of adaptive color transform (ACT). +\\ + +\Option{HorCollocatedChroma} & +%\ShortOption{\None} & +\Default{true} & +Specifies location of a chroma sample relatively to the luma sample in horizontal direction in the reference picture resampling. +\par +\begin{tabular}{cp{0.45\textwidth}} + 0 & horizontally shifted by 0.5 units of luma samples.\\ + 1 & collocated (default). \\ +\end{tabular} +\\ + +\Option{VerCollocatedChroma} & +%\ShortOption{\None} & +\Default{false} & +Specifies location of a chroma sample relatively to the luma sample in vertical direction in the cross-component linear model intra prediction and the reference picture resampling. +\par +\begin{tabular}{cp{0.45\textwidth}} + 0 & vertically shifted by 0.5 units of luma samples (default).\\ + 1 & collocated. \\ +\end{tabular} +\\ + \end{OptionTableNoShorthand} %% @@ -2125,32 +2515,15 @@ Indicates whether cropped decoded pictures are suitable for display using oversc 1 & Indicates that the decoded pictures may be displayed using overscan. \\ \end{tabular} \\ -\Option{VideoSignalTypePresent} & -\Default{false} & -Signals whether video_format, video_full_range_flag, and colour_description_present_flag are present. -\\ -\Option{VideoFormat} & -\Default{5} & -Indicates representation of pictures. -\\ -\Option{VideoFullRange} & -\Default{false} & -Indicates the black level and range of luma and chroma signals. -\par -\begin{tabular}{cp{0.45\textwidth}} - 0 & Indicates that the luma and chroma signals are to be scaled prior to display. \\ - 1 & Indicates that the luma and chroma signals are not to be scaled prior to display. \\ -\end{tabular} -\\ \Option{ColourDescriptionPresent} & \Default{false} & -Signals whether colour_primaries, transfer_characteristics and matrix_coefficients are present. +Signals whether colour_primaries, transfer_characteristics, matrix_coefficients and video_full_range_flag are present. \\ \Option{ColourPrimaries} & \Default{2} & Indicates chromaticity coordinates of the source primaries. \\ -\Option{TransferCharateristics} & +\Option{TransferCharacteristics} & \Default{2} & Indicates the opto-electronic transfer characteristics of the source. \\ @@ -2158,6 +2531,15 @@ Indicates the opto-electronic transfer characteristics of the source. \Default{2} & Describes the matrix coefficients used in deriving luma and chroma from RGB primaries. \\ +\Option{VideoFullRange} & +\Default{false} & +Indicates the black level and range of luma and chroma signals. +\par +\begin{tabular}{cp{0.45\textwidth}} + 0 & Indicates that the luma and chroma signals are to be scaled prior to display. \\ + 1 & Indicates that the luma and chroma signals are not to be scaled prior to display. \\ +\end{tabular} +\\ \Option{ChromaLocInfoPresent} & \Default{false} & Signals whether chroma_sample_loc_type_top_field and chroma_sample_loc_type_bottom_field are present. @@ -2170,74 +2552,6 @@ Specifies the location of chroma samples for top field. \Default{0} & Specifies the location of chroma samples for bottom field. \\ -\Option{NeutralChromaIndication} & -\Default{false} & -Indicates that the value of all decoded chroma samples is equal to 1<<(BitDepthCr-1). -\\ - -\Option{DefaultDisplayWindowFlag} & -\Default{flag} & -Indicates the presence of the Default Window parameters. -\par -\begin{tabular}{cp{0.45\textwidth}} -false & Disabled \\ -true & Enabled \\ -\end{tabular} -\\ - -\Option{DefDispWinLeftOffset}% -\Option{DefDispWinRightOffset}% -\Option{DefDispWinTopOffset}% -\Option{DefDispWinBottomOffset} & -\Default{0} & -Specifies the horizontal and vertical offset to be applied to the -input video from the conformance window in luma samples. -Must be a multiple of the chroma resolution (e.g. a multiple of two for 4:2:0). -\\ - -\Option{FrameFieldInfoPresentFlag} & -\Default{false} & -Specificies the value of the VUI syntax element `frame_field_info_present_flag', which indicates that pic_struct and field coding related values are present in picture timing SEI messages. -\\ - -\Option{PocProportionalToTimingFlag} & -\Default{false} & -Specificies the value of the VUI syntax element `vui_poc_proportional_to_timing_flag', which indicates that the POC value is proportional to the output time with respect to the first picture in the CVS. -\\ - -\Option{NumTicksPocDiffOneMinus} & -\Default{0} & -Specificies the value of the VUI syntax element `vui_num_ticks_poc_diff_one_minus1', which specifies the number of clock ticks corresponding to a difference of picture order count values equal to 1, and is used only when PocProportionalToTimingFlag is true. -\\ - -\Option{BitstreamRestriction} & -\Default{false} & -Signals whether bitstream restriction parameters are present. -\\ -\Option{TilesFixedStructure} & -\Default{false} & -Indicates that each active picture parameter set has the same values of the syntax elements related to tiles. -\\ -\Option{MotionVectorsOverPicBoundaries} & -\Default{false} & -Indicates that no samples outside the picture boundaries are used for inter prediction. -\\ -\Option{MaxBytesPerPicDenom} & -\Default{2} & -Indicates a number of bytes not exceeded by the sum of the sizes of the VCL NAL units associated with any coded picture. -\\ -\Option{MaxBitsPerMinCuDenom} & -\Default{1} & -Indicates an upper bound for the number of bits of coding_unit() data. -\\ -\Option{Log2MaxMvLengthHorizontal} & -\Default{15} & -Indicate the maximum absolute value of a decoded horizontal MV component in quarter-pel luma units. -\\ -\Option{Log2MaxMvLengthVertical} & -\Default{15} & -Indicate the maximum absolute value of a decoded vertical MV component in quarter-pel luma units. -\\ \end{OptionTableNoShorthand} @@ -2249,9 +2563,9 @@ Specifies the cost mode to use. \par \begin{tabular}{lp{0.3\textwidth}} lossy & $cost=distortion+\lambda \times bits$ \\ - sequence_level_lossless & $cost=distortion / \lambda + bits$. \\ - lossless & As with sequence_level_lossless, but QP is also set to 0 (this will be deprecated in the future) \\ - mixed_lossless_lossy & As with sequence_level_lossless, but QP'=4 is used for pre-estimates of transquant-bypass blocks \\ +% sequence_level_lossless & $cost=distortion / \lambda + bits$. \\ + lossless & $cost = bits$, QP'=0 is used for all transform blocks and the only allowed encoder result is either an empty transform block or an transform skipped block. \\ +% mixed_lossless_lossy & As with sequence_level_lossless, but QP'=4 is used for pre-estimates of transquant-bypass blocks \\ \end{tabular} \\ @@ -2288,6 +2602,7 @@ Specifies the shift to apply to the SAO parameters. If negative, an estimate wil Specifies the maximum TU size for which transform-skip can be used; the minimum value is 2. Version 1 and some Version 2 (RExt) profiles require this to be 2. \\ + \Option{ImplicitResidualDPCM} & \Default{false} & When true, specifies the use of the implicitly signalled residual RDPCM tool (for intra). Version 1 and some Version 2 (RExt) profiles require this to be false. @@ -2341,7 +2656,7 @@ The table below lists the SEI messages defined for Version 1 and Range-Extension 15 & Picture snapshot & (Not handled)\\ 16 & Progressive refinement segment start & (Not handled)\\ 17 & Progressive refinement segment end & (Not handled)\\ - 19 & Film grain characteristics & (Not handled)\\ + 19 & Film grain characteristics & Table \ref{tab:sei-film-grain} \\ 22 & Post-filter hint & (Not handled)\\ 23 & Tone mapping information & Table \ref{tab:sei-tone-mapping-info} \\ 45 & Frame packing arrangement & Table \ref{tab:sei-frame-packing-arrangement} \\ @@ -2363,6 +2678,18 @@ The table below lists the SEI messages defined for Version 1 and Range-Extension 141 & Knee function information & Table \ref{tab:sei-knee-function} \\ 142 & Colour remapping information & Table \ref{tab:sei-colour-remapping}\\ 143 & Deinterlaced field identification & (Not handled)\\ + 144 & Content light level info & Table \ref{tab:sei-content-light-level}\\ + 147 & Alternative transfer characteristics & Table \ref{tab:sei-alternative-transfer-characteristics}\\ + 148 & Ambient viewing environment & Table \ref{tab:sei-ambient-viewing-environment}\\ + 149 & Content colour volume & Table \ref{tab:sei-content-colour-volume}\\ + 150 & Equirectangular projection & Table \ref{tab:sei-erp} \\ + 153 & Generalized cubemap projection & Table \ref{tab:sei-gcmp} \\ + 154 & Sphere rotation & Table \ref{tab:sei-sphere-rotation} \\ + 155 & Region-wise packing & Table \ref{tab:sei-rwp} \\ + 156 & Omni viewport & Table \ref{tab:sei-omni-viewport} \\ + 168 & Frame-field information & Table \ref{tab:sei-frame-field} \\ + 203 & Subpicture Level Information & Table \ref{tab:sei-subpic-level} \\ + 204 & Sample Aspect Ratio Information & Table \ref{tab:sei-sari} \\ \end{SEIListTable} %% %% SEI messages @@ -2401,6 +2728,59 @@ SEI messages. +\begin{OptionTableNoShorthand}{Film grain characteristics SEI message encoder parameters}{tab:sei-film-grain} +\Option{SEIFGCEnabled} & +\Default{0} & +Enables or disables the insertion of the film grain characteristics SEI message. +\\ +\Option{SEIFGCCancelFlag} & +\Default{0} & +Specifies the persistence of any previous film grain characteristics SEI message in output order. +\\ +\Option{SEIFGCPersistenceFlag} & +\Default{1} & +Specifies the persistence of the film grain characteristics SEI message for the current layer. +\\ +\Option{SEIFGCModelID} & +\Default{0} & +Specifies the film grain simulation model. +\par +\begin{tabular}{cp{0.35\textwidth}} + 0 & frequency filtering \\ + 1 & auto-regression \\ +\end{tabular} +\\ +\Option{SEIFGCSepColourDescPresentFlag} & +\Default{0} & +Specifies the presence of a distinct colour space description for the film grain characteristics specified in the SEI message. +\\ +\Option{SEIFGCBlendingModeID} & +\Default{0} & +Specifies the blending mode used to blend the simulated film grain with the decoded images. +\par +\begin{tabular}{cp{0.35\textwidth}} + 0 & additive \\ + 1 & multiplicative \\ +\end{tabular} +\\ +\Option{SEIFGCLog2ScaleFactor} & +\Default{0} & +Specifies a scale factor used in the film grain characterization equations. +\\ +\Option{SEIFGCCompModelPresentComp0} & +\Default{0} & +Specifies the presence of film grain modelling on colour component 0. +\\ +\Option{SEIFGCCompModelPresentComp1} & +\Default{0} & +Specifies the presence of film grain modelling on colour component 1. +\\ +\Option{SEIFGCCompModelPresentComp2} & +\Default{0} & +Specifies the presence of film grain modelling on colour component 2. +\\ +\end{OptionTableNoShorthand} + \begin{OptionTableNoShorthand}{Tone mapping information SEI message encoder parameters}{tab:sei-tone-mapping-info} \Option{SEIToneMappingInfo} & \Default{0} & @@ -2582,7 +2962,7 @@ Specifies luma sample value of the extended dynamic range assigned decoded pictu Enables or disables the insertion of the Frame packing arrangement SEI messages. \\ \Option{SEIFramePackingType} & -\Default{0} & +\Default{3} & Indicates the arrangement type in the Frame packing arrangement SEI message. This option has no effect if SEIFramePacking is disabled. \par @@ -2604,7 +2984,7 @@ This option has no effect if SEIFramePacking is disabled. \end{tabular} \\ \Option{SEIFramePackingQuincunx} & -\Default{0} & +\Default{1} & Enables or disables the quincunx_sampling signalling in the Frame packing arrangement SEI messages. This option has no effect if SEIFramePacking is disabled. @@ -2967,6 +3347,463 @@ An example file can be found in cfg/misc/example_colour_remapping_sei_encoder_0. \\ \end{OptionTableNoShorthand} +\begin{OptionTableNoShorthand}{Equirectangular Projection SEI message encoder parameters}{tab:sei-erp} +\Option{SEIErpEnabled} & +\Default{false} & +Enables (true) or disables (false) the insertion of equirectangular projection SEI message. +\\ +\Option{SEIErpCancelFlag} & +\Default{true} & +Indicates that equirectangular projection SEI message cancels the persistence (true) or follows (false). +\\ +\Option{SEIErpPersistenceFlag} & +\Default{false} & +Specifies the persistence of the equirectangular projection SEI message. +\\ +\Option{SEIErpGuardBandFlag} & +\Default{false} & +Indicates the existence of guard band areas in the constituent picture. +\\ +\Option{SEIErpGuardBandType} & +\Default{0} & +Indicates the type of the guard bands. +\\ +\Option{SEIErpLeftGuardBandWidth} & +\Default{0} & +Inicates the width of the guard band on the left side of the onstituent picture. +\\ +\Option{SEIErpRightGuardBandWidth} & +\Default{0} & +Inicates the width of the guard band on the right side of the onstituent picture. +\\ +\end{OptionTableNoShorthand} + +\begin{OptionTableNoShorthand}{Generalized Cubemap Projection SEI message encoder parameters}{tab:sei-gcmp} +\Option{SEIGcmpEnabled} & +\Default{false} & +Enables (true) or disables (false) the insertion of generalized cubemap projection SEI message. +\\ +\Option{SEIGcmpCancelFlag} & +\Default{true} & +Indicates that generalized cubemap projection SEI message cancels the persistence (true) or follows (false). +\\ +\Option{SEIGcmpPersistenceFlag} & +\Default{false} & +Specifies the persistence of the generalized cubemap projection SEI message. +\\ +\Option{SEIGcmpPackingType} & +\Default{0} & +Specifies the packing type. +\par +\begin{tabular}{cp{0.35\textwidth}} + 0 & 6 rows and 1 columns \\ + 1 & 3 rows and 2 columns \\ + 2 & 2 rows and 3 columns \\ + 3 & 1 rows and 6 columns \\ + 4 & 1 rows and 5 columns (hemisphere cubemap) \\ + 5 & 5 rows and 1 columns (hemisphere cubemap) \\ +\end{tabular} +\\ +\Option{SEIGcmpMappingFunctionType} & +\Default{0} & +Specifies the mapping function used to adjust the sample locations. +\par +\begin{tabular}{cp{0.35\textwidth}} + 0 & Disabled (conventional cubemap projection) \\ + 1 & Equi-angular mapping function \\ + 2 & Defined by SEIGcmpFunctionCoeffU, SEIGcmpFunctionUAffectedByVFlag, SEIGcmpFunctionCoeffV, and SEIGcmpFunctionVAffectedByUFlag \\ +\end{tabular} +\\ +\Option{SEIGcmpFaceIndex} & +\Default{} & +An array that specifies the face index for the faces packed in the cubemap projected picture. +\par +\begin{tabular}{cp{0.35\textwidth}} + 0 & Front face \\ + 1 & Back face \\ + 2 & Top face \\ + 3 & Bottom face \\ + 4 & Right face \\ + 5 & Left face \\ +\end{tabular} +\\ +\Option{SEIGcmpFaceRotation} & +\Default{} & +An array that specifies the rotation to be applied to the faces. +\par +\begin{tabular}{cp{0.35\textwidth}} + 0 & No rotation \\ + 1 & 90 degree anticlockwise \\ + 2 & 180 degree anticlockwise \\ + 3 & 270 degree anticlockwise \\ +\end{tabular} +\\ +\Option{SEIGcmpFunctionCoeffU} & +\Default{} & +An array that specifies the coefficients used in the cubemap mapping function of the u-axis for the faces when SEIGcmpMappingFunctionType is set to 2. +\\ +\Option{SEIGcmpFunctionUAffectedByVFlag} & +\Default{} & +An array that specifies whether the cubemap mapping function of the u-axis refers to the v position of the sample location for the faces when SEIGcmpMappingFunctionType is set to 2. +\\ +\Option{SEIGcmpFunctionCoeffV} & +\Default{} & +An array that specifies the coefficients used in the cubemap mapping function of the v-axis for the faces when SEIGcmpMappingFunctionType is set to 2. +\\ +\Option{SEIGcmpFunctionVAffectedByUFlag} & +\Default{} & +An array that specifies whether the cubemap mapping function of the v-axis refers to the u position of the sample location for the faces when SEIGcmpMappingFunctionType is set to 2. +\\ +\Option{SEIGcmpGuardBandFlag} & +\Default{false} & +Indicates the existence of guard band areas in the picture. +\\ +\Option{SEIGcmpGuardBandBoundaryType} & +\Default{false} & +Enables (true) or disables (false) the boundary guard bands. +\\ +\Option{SEIGcmpGuardBandSamplesMinus1} & +\Default{0} & +Specifies the number of guard band samples minus 1 used in the cubemap projected picture. +\\ +\end{OptionTableNoShorthand} + +\begin{OptionTableNoShorthand}{Sphere Rotation SEI message encoder parameters}{tab:sei-sphere-rotation} +\Option{SEISphereRotationEnabled} & +\Default{false} & +Enables (true) or disables (false) the insertion of sphere rotation SEI message. +\\ +\Option{SEISphereRotationCancelFlag} & +\Default{true} & +Indicates that the sphere rotation SEI message cancels the persistence (true) or follows (false). +\\ +\Option{SEISphereRotationPersistenceFlag} & +\Default{false} & +Specifies the persistence of the sphere rotation SEI message. +\\ +\Option{SEISphereRotationYaw} & +\Default{0} & +Specifies the value of the yaw rotation angle. +\\ +\Option{SEISphereRotationPitch} & +\Default{0} & +Specifies the value of the pitch rotation angle. +\\ +\Option{SEISphereRotationRoll} & +\Default{0} & +Specifies the value of the roll rotation angle. +\\ +\end{OptionTableNoShorthand} + +\begin{OptionTableNoShorthand}{Region-wise packing SEI message encoder parameters}{tab:sei-rwp} +\Option{SEIRwpEnabled} & +\Default{false} & +Enables (true) or disables (false) the insertion of region-wise packing SEI message. +\\ +\Option{SEIRwpCancelFlag} & +\Default{true} & +Indicates that RWP SEI message cancels the persistence (true) or follows (false). +\\ +\Option{SEIRwpPersistenceFlag} & +\Default{false} & +Specifies the persistence of the RWP SEI message. +\\ +\Option{SEIRwpConstituentPictureMatchingFlag} & +\Default{false} & +Specifies the RWP SEI message applies individually to each constituent picture (true) or to the projected picture (false). +\\ +\Option{SEIRwpNumPackedRegions} & +\Default{0} & +Specifies the number of packed regions when constituent picture matching flag is equal to 0. +\\ +\Option{SEIRwpProjPictureWidth} & +\Default{0} & +Specifies the width of the projected picture. +\\ +\Option{SEIRwpProjPictureHeight} & +\Default{0} & +Specifies the height of the projected picture. +\\ +\Option{SEIRwpPackedPictureWidth} & +\Default{0} & +Specifies the width of the packed picture. +\\ +\Option{SEIRwpPackedPictureHeight} & +\Default{0} & +Specifies the height of the packed picture. +\\ +\Option{SEIRwpTransformType} & +\Default{} & +An array that specifies the rotation and mirroring to be applied to the packed regions. +\\ +\Option{SEIRwpGuardBandFlag} & +\Default{} & +An array that specifies the existence of guard band in the packed regions. +\\ +\Option{SEIRwpProjRegionWidth} & +\Default{} & +An array that specifies the width of the projected regions. +\\ +\Option{SEIRwpProjRegionHeight} & +\Default{} & +An array that specifies the height of the projected regions. +\\ +\Option{SEIRwpGuardBandFlag} & +\Default{} & +An array that specifies the existence of guard band in the packed regions. +\\ +\Option{SEIRwpProjRegionTop} & +\Default{} & +An array that specifies the top sample row of the projected regions. +\\ +\Option{SEIRwpProjRegionLeft} & +\Default{} & +An array that specifies the left-most sample column of the projected regions. +\\ +\Option{SEIRwpPackedRegionWidth} & +\Default{} & +An array that specifies the width of the packed regions. +\\ +\Option{SEIRwpPackedRegionHeight} & +\Default{} & +An array that specifies the height of the packed regions. +\\ +\Option{SEIRwpPackedRegionTop} & +\Default{} & +An array that specifies the top luma sample row of the packed regions. +\\ +\Option{SEIRwpPackedRegionLeft} & +\Default{} & +An array that specifies the left-most luma sample column of the packed regions. +\\ +\Option{SEIRwpLeftGuardBandWidth} & +\Default{} & +An array that specifies the width of the guard band on the left side of the packed regions. +\\ +\Option{SEIRwpRightGuardBandWidth} & +\Default{} & +An array that specifies the width of the guard band on the right side of the packed regions. +\\ +\Option{SEIRwpTopGuardBandHeight} & +\Default{} & +An array that specifies the height of the guard band above the packed regions. +\\ +\Option{SEIRwpBottomGuardBandHeight} & +\Default{} & +An array that specifies the height of the guard band below the packed regions. +\\ +\Option{SEIRwpGuardBandNotUsedForPredFlag} & +\Default{} & +An array that specifies if the guard bands is used in the inter prediction process. +\\ +\Option{SEIRwpGuardBandType} & +\Default{} & +An array that specifies the type of the guard bands for the packed regions. +\\ +\end{OptionTableNoShorthand} + +\begin{OptionTableNoShorthand}{Omni Viewport SEI message encoder parameters}{tab:sei-omni-viewport} +\Option{SEIOmniViewportEnabled} & +\Default{false} & +Enables (true) or disables (false) the insertion of omni viewport SEI message. +\\ +\Option{SEIOmniViewportId} & +\Default{0} & +Contains an identifying number that may be used to identify the purpose of the one or more recommended viewport regions. +\\ +\Option{SEIOmniViewportCancelFlag} & +\Default{true} & +Indicates that the omni viewport SEI message cancels the persistence (true) or follows (false). +\\ +\Option{SEIOmniViewportPersistenceFlag} & +\Default{false} & +Specifies the persistence of the omni viewport SEI message. +\\ +\Option{SEIOmniViewportCntMinus1} & +\Default{0} & +Specifies the number of recommended viewport regions minus 1. +\\ +\Option{SEIOmniViewportAzimuthCentre} & +\Default{} & +An array that indicates the centre of the i-th recommended viewport region. +\\ +\Option{SEIOmniViewportElevationCentre} & +\Default{} & +An array that indicates the centre of the i-th recommended viewport region. +\\ +\Option{SEIOmniViewportTiltCentre} & +\Default{} & +An array that indicates the tilt angle of the i-th recommended viewport region. +\\ +\Option{SEIOmniViewportHorRange} & +\Default{} & +An array that indicates the azimuth range of the i-th recommended viewport region. +\\ +\Option{SEIOmniViewportVerRange} & +\Default{} & +An array that indicates the elevation range of the i-th recommended viewport region. +\\ +\end{OptionTableNoShorthand} + +\begin{OptionTableNoShorthand}{Sample Aspect Ratio Information SEI message encoder parameters}{tab:sei-sari} +\Option{SEISampleAspectRatioInfo} & +\Default{false} & +Enables (true) or disables (false) the insertion of Sample Aspect Ratio Information SEI message. +\\ +\Option{SEISARICancelFlag} & +\Default{true} & +Indicates that the Sample Aspect Ratio Information SEI message cancels the persistence (true) or follows (false). +\\ +\Option{SEISARIPersistenceFlag} & +\Default{false} & +Specifies the persistence of the Sample Aspect Ratio Information SEI message. +\\ +\Option{SEISARIAspectRatioIdc} & +\Default{0} & +Specifies aspect ratio IDC as defined in the standard. +\\ +\Option{SEISARISarWidth} & +\Default{0} & +Specifies the horizontal size of the sample aspect ratio, if SEISARIAspectRatioIdc is equal to 255. +\\ +\Option{SEISARISarHeight} & +\Default{0} & +Specifies the vertical size of the sample aspect ratio, if SEISARIAspectRatioIdc is equal to 255. +\\ +\end{OptionTableNoShorthand} + +\begin{OptionTableNoShorthand}{Frame-Field Information SEI message encoder parameters}{tab:sei-frame-field} +\Option{SEIFrameFieldInfo} & +\Default{false} & +Enables (true) or disables (false) the insertion of Frame-Field Information SEI message. +\\ +\end{OptionTableNoShorthand} + +\begin{OptionTableNoShorthand}{Subpicture Level Information SEI message encoder parameters}{tab:sei-subpic-level} +\Option{SEISubpictureLevelInfo} & +\Default{false} & +Enables (true) or disables (false) the insertion of Subpicture Level Information SEI message. +Note, currently no other configuration options are available, because this depends on the number of subpictures, +which are still not supported in the software. An example SEI with dummy values is generated, when the option is enabled. +\\ +\end{OptionTableNoShorthand} + + +\begin{OptionTableNoShorthand}{Content light level info SEI message encoder parameters}{tab:sei-content-light-level} +\Option{SEICLLEnabled} & +\Default{false} & +Enables or disables the insertion of the content light level SEI message. +\\ +\Option{SEICLLMaxContentLightLevel} & +\Default{4000} & +When not equal to 0, specifies an upper bound on the maximum light level among all individual samples in a 4:4:4 representation of red, green, and blue colour primary intensities in the linear light domain for the pictures of the CLVS, in units of candelas per square metre. When equal to 0, no such upper bound is indicated. +\\ +\Option{SEICLLMaxPicAvgLightLevel} & +\Default{0} & +When not equal to 0, specifies an upper bound on the maximum average light level among the samples in a 4:4:4 representation of red, green, and blue colour primary intensities in the linear light domain for any individual picture of the CLVS, in units of candelas per square metre. When equal to 0, no such upper bound is indicated. +\\ +\end{OptionTableNoShorthand} + + + +\begin{OptionTableNoShorthand}{Alternative transfer characteristics SEI message encoder parameters}{tab:sei-alternative-transfer-characteristics} +\Option{SEIPreferredTransferCharacteristics} & +\Default{18} & +Indicates a preferred alternative value for the transfer_characteristics syntax element that is indicated by the colour description syntax of VUI parameters. +\\ +\end{OptionTableNoShorthand} + + + +\begin{OptionTableNoShorthand}{Ambient viewing environment SEI message encoder parameters}{tab:sei-ambient-viewing-environment} +\Option{SEIAVEEnabled} & +\Default{false} & +Enables or disables the insertion of the ambient viewing environment SEI message. +\\ +\Option{SEIAVEAmbientIlluminance} & +\Default{100000} & +Specifies the environmental illuminance of the ambient viewing environment in units of 1/10000 lux. The value shall not be 0. +\\ +\Option{SEIAVEAmbientLightX} & +\Default{15635} & +Specifies the x chromaticity coordinate, according to the CIE 1931 definition, of the environmental ambient light in the nominal viewing environment in normalized increments of 1/50000. The value shall be in the range of 0 to 50,000, inclusive. +\\ +\Option{SEIAVEAmbientLightY} & +\Default{16450} & +Specifies the y chromaticity coordinate, according to the CIE 1931 definition, of the environmental ambient light in the nominal viewing environment in normalized increments of 1/50000. The value shall be in the range of 0 to 50,000, inclusive. +\\ +\end{OptionTableNoShorthand} + + + +\begin{OptionTableNoShorthand}{Content colour volume SEI message encoder parameters}{tab:sei-content-colour-volume} +\Option{SEICCVEnabled} & +\Default{false} & +Enables or disables the insertion of the content colour volume SEI message. +\\ +\Option{SEICCVCancelFlag} & +\Default{0} & +Specifies the persistence of any previous content colour volume SEI message in output order. +\\ +\Option{SEICCVPersistenceFlag} & +\Default{1} & +Specifies the persistence of the content colour volume SEI message for the current layer. +\\ +\Option{SEICCVPrimariesPresent} & +\Default{1} & +Specifies whether the CCV primaries are present in the content colour volume SEI message. +\\ +\Option{m_ccvSEIPrimariesX0} & +\Default{0.300} & +Specifies the x coordinate, according to the CIE 1931 definition, of the first (green) colour primary component in normalized increments of 1/50000. +\\ +\Option{m_ccvSEIPrimariesY0} & +\Default{0.600} & +Specifies the y coordinate, according to the CIE 1931 definition, of the first (green) colour primary component in normalized increments of 1/50000. +\\ +\Option{m_ccvSEIPrimariesX1} & +\Default{0.150} & +Specifies the x coordinate, according to the CIE 1931 definition, of the second (blue) colour primary component in normalized increments of 1/50000. +\\ +\Option{m_ccvSEIPrimariesY1} & +\Default{0.060} & +Specifies the y coordinate, according to the CIE 1931 definition, of the second (blue) colour primary component in normalized increments of 1/50000. +\\ +\Option{m_ccvSEIPrimariesX2} & +\Default{0.640} & +Specifies the x coordinate, according to the CIE 1931 definition, of the third (red) colour primary component in normalized increments of 1/50000. +\\ +\Option{m_ccvSEIPrimariesY2} & +\Default{0.330} & +Specifies the y coordinate, according to the CIE 1931 definition, of the third (red) colour primary component in normalized increments of 1/50000. +\\ +\Option{SEICCVMinLuminanceValuePresent} & +\Default{1} & +Specifies whether the CCV min luminance value is present in the content colour volume SEI message. +\\ +\Option{SEICCVMinLuminanceValue} & +\Default{0.0} & +specifies the CCV min luminance value in the content colour volume SEI message. +\\ +\Option{SEICCVMaxLuminanceValuePresent} & +\Default{1} & +Specifies whether the CCV max luminance value is present in the content colour volume SEI message. +\\ +\Option{SEICCVMaxLuminanceValue} & +\Default{0.1} & +specifies the CCV max luminance value in the content colour volume SEI message. +\\ +\Option{SEICCVAvgLuminanceValuePresent} & +\Default{1} & +Specifies whether the CCV avg luminance value is present in the content colour volume SEI message. +\\ +\Option{SEICCVAvgLuminanceValue} & +\Default{0.01} & +specifies the CCV avg luminance value in the content colour volume SEI message. +\\ +\end{OptionTableNoShorthand} + + %\Option{SEITimeCode} & @@ -3054,7 +3891,7 @@ Numerous constants that guard individual adoptions are defined within \section{Using the decoder} \subsection{General} \begin{minted}{bash} -TAppDecoder -b str.bin -o dec.yuv [options] +DecoderApp -b str.bin -o dec.yuv [options] \end{minted} \begin{OptionTableNoShorthand}{Decoder options}{tab:decoder-options} @@ -3073,7 +3910,7 @@ Defines the input bit stream file name. \Option{ReconFile (-o)} & %\ShortOption{-o} & \Default{\NotSet} & -Defines reconstructed YUV file name. If empty, no file is generated. +Defines the reconstructed video file name. If empty, no file is generated. If the bitstream contains multiple layer and no single target layer is specified (i.e. TargetLayer=-1), a reconstructed file is written for each layer and the layer index is added as suffix to ReconFile. If one or more dots exist in the file name, the layer id is added before the last dot, e.g. 'decoded.yuv' becomes 'decoded0.yuv' for layer id 0, 'decoded' becomes 'decoded0'. \\ \Option{SkipFrames (-s)} & @@ -3109,6 +3946,13 @@ Defines the chroma bit-depth of the reconstructed YUV file (the value 0 indicate that the native bit-depth is used) \\ +\Option{TargetLayer (-p)} & +%\ShortOption{-p} & +\Default{-1 \\ (Native)} & +Specifies the target bitstream Layer to be decoded. (the value -1 indicates +that decoding the whole bitstream ) +\\ + \Option{SEIDecodedPictureHash} & %\ShortOption{\None} & \Default{1} & @@ -3153,17 +3997,6 @@ Specifies that the colour remapping SEI message should be applied to the output If no value is specified, the SEI message is ignored and no mapping is applied. \\ -\Option{RespectDefDispWindow (-w)} & -%\ShortOption{-w} & -\Default{0} & -Video region to be output by the decoder. -\par -\begin{tabular}{cp{0.45\textwidth}} - 0 & Output content inside the conformance window. \\ - 1 & Output content inside the default window. \\ -\end{tabular} -\\ - \Option{OutputColourSpaceConvert} & \Default{\NotSet} & Specifies the colour space conversion to apply to 444 video. Permitted values are: @@ -3176,6 +4009,11 @@ Specifies the colour space conversion to apply to 444 video. Permitted values ar If no value is specified, no colour space conversion is applied. The list may eventually also include RGB to YCbCr or YCgCo conversions.\\ \\ +\Option{PYUV} & +\Default{false} & +When true, output 10-bit and 12-bit YUV data as 5-byte and 3-byte (respectively) packed YUV data. See doc/pyuv_format.pdf for details. Ignored for interlaced output. +\\ + \Option{SEINoDisplay} & \Default{false} & When true, do not output frames for which there is an SEI NoDisplay message. @@ -3417,4 +4255,28 @@ DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, \end{minted} \end{description} + + + + +\section{Using the stream merge tool} +\label{sec:stream-merge-tool} + +The StreamMergeApp tool takes multiple single-layer (singe nuh_layer_id) bistreams +as inputs and merge them into a multi-layer bistream by interleaving the NALUs +from the input single layer bistreams. During the merge, the tool assigns a new unique +nuh_layer_id for each input bitstream. Then the decoder could specify which layer +bitstream to be decoded through the command line option "-p nuh_layer_id". + +\subsection{Usage} +\label{sec:stream-merge-usage} + +\begin{minted}{bash} +StreamMergeApp <bitstream1> <bitstream2> [<bitstream3> ...] <outfile> +\end{minted} + +The command line options bistreamX specify the file names of the input single-layer +bistreams. At least two input bitstreams need to be specified. The merged multi-layer +bistream will be stored into the outfile. + \end{document} diff --git a/source/App/DecoderApp/DecApp.cpp b/source/App/DecoderApp/DecApp.cpp index 7735309afc36ac6d4e62a2e33522f93f757c0f1a..7d9adcb83ad87c01360e65189c64c0884c574f86 100644 --- a/source/App/DecoderApp/DecApp.cpp +++ b/source/App/DecoderApp/DecApp.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -111,77 +111,76 @@ uint32_t DecApp::decode() } // main decoder loop - bool openedReconFile = false; // reconstruction file not yet opened. (must be performed after SPS is seen) bool loopFiltered = false; + bool bPicSkipped = false; + while (!!bitstreamFile) { - /* location serves to work around a design fault in the decoder, whereby - * the process of reading a new slice that is the first slice of a new frame - * requires the DecApp::decode() method to be called again with the same - * nal unit. */ -#if RExt__DECODER_DEBUG_STATISTICS - CodingStatistics& stat = CodingStatistics::GetSingletonInstance(); - CHECK(m_statMode < STATS__MODE_NONE || m_statMode > STATS__MODE_ALL, "Wrong coding statistics output mode"); - stat.m_mode = m_statMode; - - CodingStatistics::CodingStatisticsData* backupStats = new CodingStatistics::CodingStatisticsData(CodingStatistics::GetStatistics()); -#endif - -#if RExt__DECODER_DEBUG_BIT_STATISTICS - streampos location = bitstreamFile.tellg() - streampos(bytestream.GetNumBufferedBytes()); -#else - streampos location = bitstreamFile.tellg(); -#endif - AnnexBStats stats = AnnexBStats(); - InputNALUnit nalu; - byteStreamNALUnit(bytestream, nalu.getBitstream().getFifo(), stats); - - // call actual decoding function - bool bNewPicture = false; - if (nalu.getBitstream().getFifo().empty()) - { - /* this can happen if the following occur: - * - empty input file - * - two back-to-back start_code_prefixes - * - start_code_prefix immediately followed by EOF - */ - msg( ERROR, "Warning: Attempt to decode an empty NAL unit\n"); - } - else - { - read(nalu); - - if( (m_iMaxTemporalLayer >= 0 && nalu.m_temporalId > m_iMaxTemporalLayer) || !isNaluWithinTargetDecLayerIdSet(&nalu) ) + nalu.m_nalUnitType = NAL_UNIT_INVALID; + + // determine if next NAL unit will be the first one from a new picture + bool bNewPicture = isNewPicture(&bitstreamFile, &bytestream); + bool bNewAccessUnit = bNewPicture && isNewAccessUnit( bNewPicture, &bitstreamFile, &bytestream ); + if(!bNewPicture) + { + AnnexBStats stats = AnnexBStats(); + + // find next NAL unit in stream + byteStreamNALUnit(bytestream, nalu.getBitstream().getFifo(), stats); + if (nalu.getBitstream().getFifo().empty()) { - bNewPicture = false; + /* this can happen if the following occur: + * - empty input file + * - two back-to-back start_code_prefixes + * - start_code_prefix immediately followed by EOF + */ + msg( ERROR, "Warning: Attempt to decode an empty NAL unit\n"); } else { - bNewPicture = m_cDecLib.decode(nalu, m_iSkipFrame, m_iPOCLastDisplay); - if (bNewPicture) + // read NAL unit header + read(nalu); + + // flush output for first slice of an IDR picture + if(m_cDecLib.getFirstSliceInPicture() && + (nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_IDR_W_RADL || + nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_IDR_N_LP)) { - bitstreamFile.clear(); - /* location points to the current nalunit payload[1] due to the - * need for the annexB parser to read three extra bytes. - * [1] except for the first NAL unit in the file - * (but bNewPicture doesn't happen then) */ -#if RExt__DECODER_DEBUG_BIT_STATISTICS - bitstreamFile.seekg(location); - bytestream.reset(); - CodingStatistics::SetStatistics(*backupStats); -#else - bitstreamFile.seekg(location-streamoff(3)); - bytestream.reset(); -#endif + xFlushOutput(pcListPic, nalu.m_nuhLayerId); + } + + // parse NAL unit syntax if within target decoding layer + if ((m_iMaxTemporalLayer < 0 || nalu.m_temporalId <= m_iMaxTemporalLayer) && isNaluWithinTargetDecLayerIdSet(&nalu)) + { + if (bPicSkipped) + { + if ((nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_TRAIL) || (nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_STSA) || (nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_RASL) || (nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_RADL) || (nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_IDR_W_RADL) || (nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_IDR_N_LP) || (nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_CRA) || (nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_GDR)) + { + if (m_cDecLib.isSliceNaluFirstInAU(true, nalu)) + { + m_cDecLib.resetAccessUnitNals(); + m_cDecLib.resetAccessUnitApsNals(); + } + bPicSkipped = false; + } + } + m_cDecLib.decode(nalu, m_iSkipFrame, m_iPOCLastDisplay); + if (nalu.m_nalUnitType == NAL_UNIT_VPS) + { + deriveOutputLayerSet(); + } + } + else + { + bPicSkipped = true; } } } - - if( ( bNewPicture || !bitstreamFile || nalu.m_nalUnitType == NAL_UNIT_EOS ) && !m_cDecLib.getFirstSliceInSequence() ) + if ((bNewPicture || !bitstreamFile || nalu.m_nalUnitType == NAL_UNIT_EOS) && !m_cDecLib.getFirstSliceInSequence() && !bPicSkipped) { if (!loopFiltered || bitstreamFile) { @@ -203,7 +202,7 @@ uint32_t DecApp::decode() if( pcListPic ) { - if ( (!m_reconFileName.empty()) && (!openedReconFile) ) + if( !m_reconFileName.empty() && !m_cVideoIOYuvReconFile[nalu.m_nuhLayerId].isOpen() ) { const BitDepths &bitDepths=pcListPic->front()->cs->sps->getBitDepths(); // use bit depths of first reconstructed picture. for( uint32_t channelType = 0; channelType < MAX_NUM_CHANNEL_TYPE; channelType++ ) @@ -219,51 +218,46 @@ uint32_t DecApp::decode() EXIT ("Invalid output bit-depth for packed YUV output, aborting\n"); } - m_cVideoIOYuvReconFile.open( m_reconFileName, true, m_outputBitDepth, m_outputBitDepth, bitDepths.recon ); // write mode - openedReconFile = true; + std::string reconFileName = m_reconFileName; + if (m_reconFileName.compare("/dev/null") && (m_cDecLib.getVPS() != nullptr) && (m_cDecLib.getVPS()->getMaxLayers() > 1) && (isNaluWithinTargetOutputLayerIdSet(&nalu))) + { + size_t pos = reconFileName.find_last_of('.'); + if (pos != string::npos) + { + reconFileName.insert( pos, std::to_string( nalu.m_nuhLayerId ) ); + } + else + { + reconFileName.append( std::to_string( nalu.m_nuhLayerId ) ); + } + } + if(((m_cDecLib.getVPS() != nullptr) && + ((m_cDecLib.getVPS()->getMaxLayers() == 1) || (isNaluWithinTargetOutputLayerIdSet(&nalu)))) || + (m_cDecLib.getVPS() == nullptr)) + m_cVideoIOYuvReconFile[nalu.m_nuhLayerId].open(reconFileName, true, m_outputBitDepth, m_outputBitDepth, bitDepths.recon); // write mode } // write reconstruction to file if( bNewPicture ) { xWriteOutput( pcListPic, nalu.m_temporalId ); } - if ( (bNewPicture || nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_CRA) && m_cDecLib.getNoOutputPriorPicsFlag() ) - { - m_cDecLib.checkNoOutputPriorPics( pcListPic ); - m_cDecLib.setNoOutputPriorPicsFlag (false); - } - if ( bNewPicture && -#if !JVET_M0101_HLS - ( nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_IDR_W_RADL - || nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_IDR_N_LP - || nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_BLA_N_LP - || nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_BLA_W_RADL - || nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_BLA_W_LP ) ) -#else - ( nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_IDR_W_RADL - || nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_IDR_N_LP) ) -#endif - { - xFlushOutput( pcListPic ); - } if (nalu.m_nalUnitType == NAL_UNIT_EOS) { xWriteOutput( pcListPic, nalu.m_temporalId ); m_cDecLib.setFirstSliceInPicture (false); } // write reconstruction to file -- for additional bumping as defined in C.5.2.3 -#if !JVET_M0101_HLS - if(!bNewPicture && nalu.m_nalUnitType >= NAL_UNIT_CODED_SLICE_TRAIL_N && nalu.m_nalUnitType <= NAL_UNIT_RESERVED_VCL31) -#else - if (!bNewPicture && nalu.m_nalUnitType >= NAL_UNIT_CODED_SLICE_TRAIL && nalu.m_nalUnitType <= NAL_UNIT_RESERVED_VCL15) -#endif + if (!bNewPicture && ((nalu.m_nalUnitType >= NAL_UNIT_CODED_SLICE_TRAIL && nalu.m_nalUnitType <= NAL_UNIT_RESERVED_IRAP_VCL_12) + || (nalu.m_nalUnitType >= NAL_UNIT_CODED_SLICE_IDR_W_RADL && nalu.m_nalUnitType <= NAL_UNIT_CODED_SLICE_GDR))) { xWriteOutput( pcListPic, nalu.m_temporalId ); } } -#if RExt__DECODER_DEBUG_STATISTICS - delete backupStats; -#endif + if(bNewAccessUnit) + { + m_cDecLib.resetAccessUnitNals(); + m_cDecLib.resetAccessUnitApsNals(); + } } xFlushOutput( pcListPic ); @@ -285,6 +279,366 @@ uint32_t DecApp::decode() return nRet; } +bool DecApp::deriveOutputLayerSet() +{ + int vps_max_layers_minus1 = m_cDecLib.getVPS()->getMaxLayers() - 1; + int TotalNumOlss = 0; + int each_layer_is_an_ols_flag = m_cDecLib.getVPS()->getEachLayerIsAnOlsFlag(); + int ols_mode_idc = m_cDecLib.getVPS()->getOlsModeIdc(); + int num_output_layer_sets_minus1 = m_cDecLib.getVPS()->getNumOutputLayerSets() - 1; + int i = 0, j = 0, k = 0, r = 0; + int* NumOutputLayersInOls; + int* NumLayersInOls; + int** OutputLayerIdInOls; + int** OutputLayerIdx; + int** layerIncludedInOlsFlag; + int** LayerIdInOls; + int** dependencyFlag; + int** RefLayerIdx; + int* NumRefLayers; + + if (vps_max_layers_minus1 == 0) + TotalNumOlss = 1; + else if (each_layer_is_an_ols_flag || ols_mode_idc == 0 || ols_mode_idc == 1) + TotalNumOlss = vps_max_layers_minus1 + 1; + else if (ols_mode_idc == 2) + TotalNumOlss = num_output_layer_sets_minus1 + 1; + + NumOutputLayersInOls = new int[m_cDecLib.getVPS()->getNumOutputLayerSets()]; + NumLayersInOls = new int[m_cDecLib.getVPS()->getNumOutputLayerSets()]; + OutputLayerIdInOls = new int*[TotalNumOlss]; + OutputLayerIdx = new int*[TotalNumOlss]; + layerIncludedInOlsFlag = new int*[TotalNumOlss]; + LayerIdInOls = new int*[TotalNumOlss]; + + for (i = 0; i < TotalNumOlss; i++) + { + OutputLayerIdInOls[i] = new int[vps_max_layers_minus1 + 1]; + OutputLayerIdx[i] = new int[vps_max_layers_minus1 + 1]; + layerIncludedInOlsFlag[i] = new int[vps_max_layers_minus1 + 1]; + LayerIdInOls[i] = new int[vps_max_layers_minus1 + 1]; + } + + dependencyFlag = new int*[vps_max_layers_minus1 + 1]; + RefLayerIdx = new int*[vps_max_layers_minus1 + 1]; + NumRefLayers = new int[vps_max_layers_minus1 + 1]; + + for (i = 0; i <= vps_max_layers_minus1; i++) + { + dependencyFlag[i] = new int[vps_max_layers_minus1 + 1]; + RefLayerIdx[i] = new int[vps_max_layers_minus1 + 1]; + } + + for (i = 0; i <= vps_max_layers_minus1; i++) { + for (j = 0; j <= vps_max_layers_minus1; j++) { + dependencyFlag[i][j] = m_cDecLib.getVPS()->getDirectRefLayerFlag(i, j); + for (k = 0; k < i; k++) + if (m_cDecLib.getVPS()->getDirectRefLayerFlag(i, k) && dependencyFlag[k][j]) + dependencyFlag[i][j] = 1; + } + } + for (i = 0; i <= vps_max_layers_minus1; i++) + { + for (j = 0, r = 0; j <= vps_max_layers_minus1; j++) + { + if (dependencyFlag[i][j]) + RefLayerIdx[i][r++] = j; + } + NumRefLayers[i] = r; + } + + NumOutputLayersInOls[0] = 1; + OutputLayerIdInOls[0][0] = m_cDecLib.getVPS()->getLayerId(0); + for (i = 1; i < TotalNumOlss; i++) + { + if (each_layer_is_an_ols_flag || ols_mode_idc == 0) + { + NumOutputLayersInOls[i] = 1; + OutputLayerIdInOls[i][0] = m_cDecLib.getVPS()->getLayerId(i); + } + else if (ols_mode_idc == 1) { + NumOutputLayersInOls[i] = i + 1; + for (j = 0; j < NumOutputLayersInOls[i]; j++) + OutputLayerIdInOls[i][j] = m_cDecLib.getVPS()->getLayerId(j); + } + else if (ols_mode_idc == 2) { + for (j = 0; j <= vps_max_layers_minus1; j++) + { + layerIncludedInOlsFlag[i][j] = 0; + } + for (k = 0, j = 0; k <= vps_max_layers_minus1; k++) + { + if (m_cDecLib.getVPS()->getOlsOutputLayerFlag(i, k)) + { + layerIncludedInOlsFlag[i][k] = 1; + OutputLayerIdx[i][j] = k; + OutputLayerIdInOls[i][j++] = m_cDecLib.getVPS()->getLayerId(k); + } + } + NumOutputLayersInOls[i] = j; + for (j = 0; j < NumOutputLayersInOls[i]; j++) + { + int idx = OutputLayerIdx[i][j]; + for (k = 0; k < NumRefLayers[idx]; k++) + layerIncludedInOlsFlag[i][RefLayerIdx[idx][k]] = 1; + } + } + } + + m_targetOutputLayerIdSet.clear(); + for (i = 0; i < NumOutputLayersInOls[m_iTargetOLS]; i++) + m_targetOutputLayerIdSet.push_back(OutputLayerIdInOls[m_iTargetOLS][i]); + + NumLayersInOls[0] = 1; + LayerIdInOls[0][0] = m_cDecLib.getVPS()->getLayerId(0); + for (i = 1; i < TotalNumOlss; i++) + { + if (each_layer_is_an_ols_flag) + { + NumLayersInOls[i] = 1; + LayerIdInOls[i][0] = m_cDecLib.getVPS()->getLayerId(i); + } + else if (ols_mode_idc == 0 || ols_mode_idc == 1) + { + NumLayersInOls[i] = i + 1; + for (j = 0; j < NumLayersInOls[i]; j++) + LayerIdInOls[i][j] = m_cDecLib.getVPS()->getLayerId(j); + } + else if (ols_mode_idc == 2) + { + for (k = 0, j = 0; k <= vps_max_layers_minus1; k++) + if (layerIncludedInOlsFlag[i][k]) + LayerIdInOls[i][j++] = m_cDecLib.getVPS()->getLayerId(k); + NumLayersInOls[i] = j; + } + } + + m_targetDecLayerIdSet.clear(); + for (i = 0; i < NumLayersInOls[m_iTargetOLS]; i++) + m_targetDecLayerIdSet.push_back(LayerIdInOls[m_iTargetOLS][i]); + + delete[] NumOutputLayersInOls; + delete[] NumLayersInOls; + delete[] NumRefLayers; + + for (i = 0; i < TotalNumOlss; i++) + { + delete[] OutputLayerIdInOls[i]; + delete[] OutputLayerIdx[i]; + delete[] layerIncludedInOlsFlag[i]; + delete[] LayerIdInOls[i]; + } + delete[] OutputLayerIdInOls; + delete[] OutputLayerIdx; + delete[] layerIncludedInOlsFlag; + delete[] LayerIdInOls; + + for (i = 0; i <= vps_max_layers_minus1; i++) + { + delete[] dependencyFlag[i]; + delete[] RefLayerIdx[i]; + } + delete[] dependencyFlag; + delete[] RefLayerIdx; + + return true; +} + +/** + - lookahead through next NAL units to determine if current NAL unit is the first NAL unit in a new picture + */ +bool DecApp::isNewPicture(ifstream *bitstreamFile, class InputByteStream *bytestream) +{ + bool ret = false; + bool finished = false; + + // cannot be a new picture if there haven't been any slices yet + if(m_cDecLib.getFirstSliceInPicture()) + { + return false; + } + + // save stream position for backup +#if RExt__DECODER_DEBUG_STATISTICS + CodingStatistics::CodingStatisticsData* backupStats = new CodingStatistics::CodingStatisticsData(CodingStatistics::GetStatistics()); + streampos location = bitstreamFile->tellg() - streampos(bytestream->GetNumBufferedBytes()); +#else + streampos location = bitstreamFile->tellg(); +#endif + + // look ahead until picture start location is determined + while (!finished && !!(*bitstreamFile)) + { + AnnexBStats stats = AnnexBStats(); + InputNALUnit nalu; + byteStreamNALUnit(*bytestream, nalu.getBitstream().getFifo(), stats); + if (nalu.getBitstream().getFifo().empty()) + { + msg( ERROR, "Warning: Attempt to decode an empty NAL unit\n"); + } + else + { + // get next NAL unit type + read(nalu); + switch( nalu.m_nalUnitType ) { + + // NUT that indicate the start of a new picture + case NAL_UNIT_ACCESS_UNIT_DELIMITER: + case NAL_UNIT_DPS: + case NAL_UNIT_VPS: + case NAL_UNIT_SPS: + case NAL_UNIT_PPS: + case NAL_UNIT_PH: + ret = true; + finished = true; + break; + + // NUT that are not the start of a new picture + case NAL_UNIT_CODED_SLICE_TRAIL: + case NAL_UNIT_CODED_SLICE_STSA: + case NAL_UNIT_CODED_SLICE_RASL: + case NAL_UNIT_CODED_SLICE_RADL: + case NAL_UNIT_RESERVED_VCL_4: + case NAL_UNIT_RESERVED_VCL_5: + case NAL_UNIT_RESERVED_VCL_6: + case NAL_UNIT_CODED_SLICE_IDR_W_RADL: + case NAL_UNIT_CODED_SLICE_IDR_N_LP: + case NAL_UNIT_CODED_SLICE_CRA: + case NAL_UNIT_CODED_SLICE_GDR: + case NAL_UNIT_RESERVED_IRAP_VCL_11: + case NAL_UNIT_RESERVED_IRAP_VCL_12: + case NAL_UNIT_EOS: + case NAL_UNIT_EOB: + case NAL_UNIT_SUFFIX_APS: + case NAL_UNIT_SUFFIX_SEI: + case NAL_UNIT_FD: + ret = false; + finished = true; + break; + + // NUT that might indicate the start of a new picture - keep looking + case NAL_UNIT_PREFIX_APS: + case NAL_UNIT_PREFIX_SEI: + case NAL_UNIT_RESERVED_NVCL_26: + case NAL_UNIT_RESERVED_NVCL_27: + case NAL_UNIT_UNSPECIFIED_28: + case NAL_UNIT_UNSPECIFIED_29: + case NAL_UNIT_UNSPECIFIED_30: + case NAL_UNIT_UNSPECIFIED_31: + default: + break; + } + } + } + + // restore previous stream location - minus 3 due to the need for the annexB parser to read three extra bytes +#if RExt__DECODER_DEBUG_BIT_STATISTICS + bitstreamFile->clear(); + bitstreamFile->seekg(location); + bytestream->reset(); + CodingStatistics::SetStatistics(*backupStats); + delete backupStats; +#else + bitstreamFile->clear(); + bitstreamFile->seekg(location-streamoff(3)); + bytestream->reset(); +#endif + + // return TRUE if next NAL unit is the start of a new picture + return ret; +} + +/** + - lookahead through next NAL units to determine if current NAL unit is the first NAL unit in a new access unit + */ +bool DecApp::isNewAccessUnit( bool newPicture, ifstream *bitstreamFile, class InputByteStream *bytestream ) +{ + bool ret = false; + bool finished = false; + + // can only be the start of an AU if this is the start of a new picture + if( newPicture == false ) + { + return false; + } + + // save stream position for backup +#if RExt__DECODER_DEBUG_STATISTICS + CodingStatistics::CodingStatisticsData* backupStats = new CodingStatistics::CodingStatisticsData(CodingStatistics::GetStatistics()); + streampos location = bitstreamFile->tellg() - streampos(bytestream->GetNumBufferedBytes()); +#else + streampos location = bitstreamFile->tellg(); +#endif + + // look ahead until access unit start location is determined + while (!finished && !!(*bitstreamFile)) + { + AnnexBStats stats = AnnexBStats(); + InputNALUnit nalu; + byteStreamNALUnit(*bytestream, nalu.getBitstream().getFifo(), stats); + if (nalu.getBitstream().getFifo().empty()) + { + msg( ERROR, "Warning: Attempt to decode an empty NAL unit\n"); + } + else + { + // get next NAL unit type + read(nalu); + switch( nalu.m_nalUnitType ) { + + // AUD always indicates the start of a new access unit + case NAL_UNIT_ACCESS_UNIT_DELIMITER: + ret = true; + finished = true; + break; + + // slice types - check layer ID and POC + case NAL_UNIT_CODED_SLICE_TRAIL: + case NAL_UNIT_CODED_SLICE_STSA: + case NAL_UNIT_CODED_SLICE_RASL: + case NAL_UNIT_CODED_SLICE_RADL: + case NAL_UNIT_CODED_SLICE_IDR_W_RADL: + case NAL_UNIT_CODED_SLICE_IDR_N_LP: + case NAL_UNIT_CODED_SLICE_CRA: + case NAL_UNIT_CODED_SLICE_GDR: + ret = m_cDecLib.isSliceNaluFirstInAU( newPicture, nalu ); + finished = true; + break; + + // NUT that are not the start of a new access unit + case NAL_UNIT_EOS: + case NAL_UNIT_EOB: + case NAL_UNIT_SUFFIX_APS: + case NAL_UNIT_SUFFIX_SEI: + case NAL_UNIT_FD: + ret = false; + finished = true; + break; + + // all other NUT - keep looking to find first VCL + default: + break; + } + } + } + + // restore previous stream location +#if RExt__DECODER_DEBUG_BIT_STATISTICS + bitstreamFile->clear(); + bitstreamFile->seekg(location); + bytestream->reset(); + CodingStatistics::SetStatistics(*backupStats); + delete backupStats; +#else + bitstreamFile->clear(); + bitstreamFile->seekg(location); + bytestream->reset(); +#endif + + // return TRUE if next NAL unit is the start of a new picture + return ret; +} + // ==================================================================================================================== // Protected member functions // ==================================================================================================================== @@ -303,18 +657,24 @@ void DecApp::xCreateDecLib() #endif ); m_cDecLib.setDecodedPictureHashSEIEnabled(m_decodedPictureHashSEIEnabled); + + if (!m_outputDecodedSEIMessagesFilename.empty()) { std::ostream &os=m_seiMessageFileStream.is_open() ? m_seiMessageFileStream : std::cout; m_cDecLib.setDecodedSEIMessageOutputStream(&os); } + m_cDecLib.initScalingList(); } void DecApp::xDestroyDecLib() { - if ( !m_reconFileName.empty() ) + if( !m_reconFileName.empty() ) { - m_cVideoIOYuvReconFile.close(); + for( auto & recFile : m_cVideoIOYuvReconFile ) + { + recFile.second.close(); + } } // destroy decoder class @@ -394,11 +754,11 @@ void DecApp::xWriteOutput( PicList* pcListPic, uint32_t tId ) numPicsNotYetDisplayed = numPicsNotYetDisplayed-2; if ( !m_reconFileName.empty() ) { - const Window &conf = pcPicTop->cs->sps->getConformanceWindow(); - const Window defDisp = (m_respectDefDispWindow && pcPicTop->cs->sps->getVuiParametersPresentFlag()) ? pcPicTop->cs->sps->getVuiParameters()->getDefaultDisplayWindow() : Window(); + const Window &conf = pcPicTop->cs->pps->getConformanceWindow(); const bool isTff = pcPicTop->topField; bool display = true; +#if HEVC_SEI if( m_decodedNoDisplaySEIEnabled ) { SEIMessages noDisplay = getSeisByType( pcPic->SEIs, SEI::NO_DISPLAY ); @@ -408,16 +768,17 @@ void DecApp::xWriteOutput( PicList* pcListPic, uint32_t tId ) display = false; } } +#endif if (display) { - m_cVideoIOYuvReconFile.write( pcPicTop->getRecoBuf(), pcPicBottom->getRecoBuf(), + m_cVideoIOYuvReconFile[pcPicTop->layerId].write( pcPicTop->getRecoBuf(), pcPicBottom->getRecoBuf(), m_outputColourSpaceConvert, false, // TODO: m_packedYUVMode, - conf.getWindowLeftOffset() + defDisp.getWindowLeftOffset(), - conf.getWindowRightOffset() + defDisp.getWindowRightOffset(), - conf.getWindowTopOffset() + defDisp.getWindowTopOffset(), - conf.getWindowBottomOffset() + defDisp.getWindowBottomOffset(), + conf.getWindowLeftOffset() * SPS::getWinUnitX( pcPicTop->cs->sps->getChromaFormatIdc() ), + conf.getWindowRightOffset() * SPS::getWinUnitX( pcPicTop->cs->sps->getChromaFormatIdc() ), + conf.getWindowTopOffset() * SPS::getWinUnitY( pcPicTop->cs->sps->getChromaFormatIdc() ), + conf.getWindowBottomOffset() * SPS::getWinUnitY( pcPicTop->cs->sps->getChromaFormatIdc() ), NUM_CHROMA_FORMAT, isTff ); } } @@ -460,24 +821,32 @@ void DecApp::xWriteOutput( PicList* pcListPic, uint32_t tId ) if (!m_reconFileName.empty()) { - const Window &conf = pcPic->cs->sps->getConformanceWindow(); - const Window defDisp = (m_respectDefDispWindow && pcPic->cs->sps->getVuiParametersPresentFlag()) ? pcPic->cs->sps->getVuiParameters()->getDefaultDisplayWindow() : Window(); - - m_cVideoIOYuvReconFile.write( pcPic->getRecoBuf(), + const Window &conf = pcPic->getConformanceWindow(); + const SPS* sps = pcPic->cs->sps; + ChromaFormat chromaFormatIDC = sps->getChromaFormatIdc(); + if( m_upscaledOutput ) + { + m_cVideoIOYuvReconFile[pcPic->layerId].writeUpscaledPicture( *sps, *pcPic->cs->pps, pcPic->getRecoBuf(), m_outputColourSpaceConvert, m_packedYUVMode, m_upscaledOutput, NUM_CHROMA_FORMAT, m_bClipOutputVideoToRec709Range ); + } + else + { + m_cVideoIOYuvReconFile[pcPic->layerId].write( pcPic->getRecoBuf().get( COMPONENT_Y ).width, pcPic->getRecoBuf().get( COMPONENT_Y ).height, pcPic->getRecoBuf(), m_outputColourSpaceConvert, m_packedYUVMode, - conf.getWindowLeftOffset() + defDisp.getWindowLeftOffset(), - conf.getWindowRightOffset() + defDisp.getWindowRightOffset(), - conf.getWindowTopOffset() + defDisp.getWindowTopOffset(), - conf.getWindowBottomOffset() + defDisp.getWindowBottomOffset(), + conf.getWindowLeftOffset() * SPS::getWinUnitX( chromaFormatIDC ), + conf.getWindowRightOffset() * SPS::getWinUnitX( chromaFormatIDC ), + conf.getWindowTopOffset() * SPS::getWinUnitY( chromaFormatIDC ), + conf.getWindowBottomOffset() * SPS::getWinUnitY( chromaFormatIDC ), NUM_CHROMA_FORMAT, m_bClipOutputVideoToRec709Range ); + } } +#if HEVC_SEI if (m_seiMessageFileStream.is_open()) { m_cColourRemapping.outputColourRemapPic (pcPic, m_seiMessageFileStream); } - +#endif // update POC of display order m_iPOCLastDisplay = pcPic->getPOC(); @@ -496,7 +865,7 @@ void DecApp::xWriteOutput( PicList* pcListPic, uint32_t tId ) /** \param pcListPic list of pictures to be written to file */ -void DecApp::xFlushOutput( PicList* pcListPic ) +void DecApp::xFlushOutput( PicList* pcListPic, const int layerId ) { if(!pcListPic || pcListPic->empty()) { @@ -518,22 +887,26 @@ void DecApp::xFlushOutput( PicList* pcListPic ) iterPic++; pcPicBottom = *(iterPic); + if( pcPicTop->layerId != layerId && layerId != NOT_VALID ) + { + continue; + } + if ( pcPicTop->neededForOutput && pcPicBottom->neededForOutput && !(pcPicTop->getPOC()%2) && (pcPicBottom->getPOC() == pcPicTop->getPOC()+1) ) { // write to file if ( !m_reconFileName.empty() ) { - const Window &conf = pcPicTop->cs->sps->getConformanceWindow(); - const Window defDisp = (m_respectDefDispWindow && pcPicTop->cs->sps->getVuiParametersPresentFlag()) ? pcPicTop->cs->sps->getVuiParameters()->getDefaultDisplayWindow() : Window(); + const Window &conf = pcPicTop->cs->pps->getConformanceWindow(); const bool isTff = pcPicTop->topField; - m_cVideoIOYuvReconFile.write( pcPicTop->getRecoBuf(), pcPicBottom->getRecoBuf(), + m_cVideoIOYuvReconFile[pcPicTop->layerId].write( pcPicTop->getRecoBuf(), pcPicBottom->getRecoBuf(), m_outputColourSpaceConvert, false, // TODO: m_packedYUVMode, - conf.getWindowLeftOffset() + defDisp.getWindowLeftOffset(), - conf.getWindowRightOffset() + defDisp.getWindowRightOffset(), - conf.getWindowTopOffset() + defDisp.getWindowTopOffset(), - conf.getWindowBottomOffset() + defDisp.getWindowBottomOffset(), + conf.getWindowLeftOffset() * SPS::getWinUnitX( pcPicTop->cs->sps->getChromaFormatIdc() ), + conf.getWindowRightOffset() * SPS::getWinUnitX( pcPicTop->cs->sps->getChromaFormatIdc() ), + conf.getWindowTopOffset() * SPS::getWinUnitY( pcPicTop->cs->sps->getChromaFormatIdc() ), + conf.getWindowBottomOffset() * SPS::getWinUnitY( pcPicTop->cs->sps->getChromaFormatIdc() ), NUM_CHROMA_FORMAT, isTff ); } @@ -573,29 +946,44 @@ void DecApp::xFlushOutput( PicList* pcListPic ) { pcPic = *(iterPic); + if( pcPic->layerId != layerId && layerId != NOT_VALID ) + { + iterPic++; + continue; + } + if (pcPic->neededForOutput) { // write to file if (!m_reconFileName.empty()) { - const Window &conf = pcPic->cs->sps->getConformanceWindow(); - const Window defDisp = (m_respectDefDispWindow && pcPic->cs->sps->getVuiParametersPresentFlag()) ? pcPic->cs->sps->getVuiParameters()->getDefaultDisplayWindow() : Window(); - - m_cVideoIOYuvReconFile.write( pcPic->getRecoBuf(), + const Window &conf = pcPic->getConformanceWindow(); + const SPS* sps = pcPic->cs->sps; + ChromaFormat chromaFormatIDC = sps->getChromaFormatIdc(); + if( m_upscaledOutput ) + { + m_cVideoIOYuvReconFile[pcPic->layerId].writeUpscaledPicture( *sps, *pcPic->cs->pps, pcPic->getRecoBuf(), m_outputColourSpaceConvert, m_packedYUVMode, m_upscaledOutput, NUM_CHROMA_FORMAT, m_bClipOutputVideoToRec709Range ); + } + else + { + m_cVideoIOYuvReconFile[pcPic->layerId].write( pcPic->getRecoBuf().get( COMPONENT_Y ).width, pcPic->getRecoBuf().get( COMPONENT_Y ).height, pcPic->getRecoBuf(), m_outputColourSpaceConvert, m_packedYUVMode, - conf.getWindowLeftOffset() + defDisp.getWindowLeftOffset(), - conf.getWindowRightOffset() + defDisp.getWindowRightOffset(), - conf.getWindowTopOffset() + defDisp.getWindowTopOffset(), - conf.getWindowBottomOffset() + defDisp.getWindowBottomOffset(), + conf.getWindowLeftOffset() * SPS::getWinUnitX( chromaFormatIDC ), + conf.getWindowRightOffset() * SPS::getWinUnitX( chromaFormatIDC ), + conf.getWindowTopOffset() * SPS::getWinUnitY( chromaFormatIDC ), + conf.getWindowBottomOffset() * SPS::getWinUnitY( chromaFormatIDC ), NUM_CHROMA_FORMAT, m_bClipOutputVideoToRec709Range ); + } } +#if HEVC_SEI if (m_seiMessageFileStream.is_open()) { m_cColourRemapping.outputColourRemapPic (pcPic, m_seiMessageFileStream); } +#endif // update POC of display order m_iPOCLastDisplay = pcPic->getPOC(); @@ -612,10 +1000,17 @@ void DecApp::xFlushOutput( PicList* pcListPic ) pcPic->destroy(); delete pcPic; pcPic = NULL; + *iterPic = nullptr; } iterPic++; } } + + if( layerId != NOT_VALID ) + { + pcListPic->remove_if([](Picture* p) { return p == nullptr; }); + } + else pcListPic->clear(); m_iPOCLastDisplay = -MAX_INT; } @@ -638,4 +1033,23 @@ bool DecApp::isNaluWithinTargetDecLayerIdSet( InputNALUnit* nalu ) return false; } +/** \param nalu Input nalu to check whether its LayerId is within targetOutputLayerIdSet + */ +bool DecApp::isNaluWithinTargetOutputLayerIdSet(InputNALUnit* nalu) +{ + if (m_targetOutputLayerIdSet.size() == 0) // By default, the set is empty, meaning all LayerIds are allowed + { + return true; + } + for (std::vector<int>::iterator it = m_targetOutputLayerIdSet.begin(); it != m_targetOutputLayerIdSet.end(); it++) + { + if (nalu->m_nuhLayerId == (*it)) + { + return true; + } + } + return false; +} + + //! \} diff --git a/source/App/DecoderApp/DecApp.h b/source/App/DecoderApp/DecApp.h index a0bf8a6fb4b48fdca0c477bf5ffd74738a64fa08..2d5c0fcdae81b11c04f0ca70ac489f23aac9cc86 100644 --- a/source/App/DecoderApp/DecApp.h +++ b/source/App/DecoderApp/DecApp.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -61,13 +61,14 @@ class DecApp : public DecAppCfg private: // class interface DecLib m_cDecLib; ///< decoder class - VideoIOYuv m_cVideoIOYuvReconFile; ///< reconstruction YUV class + std::unordered_map<int, VideoIOYuv> m_cVideoIOYuvReconFile; ///< reconstruction YUV class // for output control int m_iPOCLastDisplay; ///< last POC in display order std::ofstream m_seiMessageFileStream; ///< Used for outputing SEI messages. +#if HEVC_SEI ColourRemapping m_cColourRemapping; ///< colour remapping handler - +#endif public: DecApp(); @@ -79,8 +80,12 @@ private: void xCreateDecLib (); ///< create internal classes void xDestroyDecLib (); ///< destroy internal classes void xWriteOutput ( PicList* pcListPic , uint32_t tId); ///< write YUV to file - void xFlushOutput ( PicList* pcListPic ); ///< flush all remaining decoded pictures to file + void xFlushOutput( PicList* pcListPic, const int layerId = NOT_VALID ); ///< flush all remaining decoded pictures to file bool isNaluWithinTargetDecLayerIdSet ( InputNALUnit* nalu ); ///< check whether given Nalu is within targetDecLayerIdSet + bool isNaluWithinTargetOutputLayerIdSet(InputNALUnit* nalu); ///< check whether given Nalu is within targetOutputLayerIdSet + bool deriveOutputLayerSet(); ///< derive OLS and layer sets + bool isNewPicture(ifstream *bitstreamFile, class InputByteStream *bytestream); ///< check if next NAL unit will be the first NAL unit from a new picture + bool isNewAccessUnit(bool newPicture, ifstream *bitstreamFile, class InputByteStream *bytestream); ///< check if next NAL unit will be the first NAL unit from a new access unit }; //! \} diff --git a/source/App/DecoderApp/DecAppCfg.cpp b/source/App/DecoderApp/DecAppCfg.cpp index c058d6fe8c2493163d0dc40596b7654de627442b..b3a3098256b863af1734166d57d1101d9de6687c 100644 --- a/source/App/DecoderApp/DecAppCfg.cpp +++ b/source/App/DecoderApp/DecAppCfg.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -87,12 +87,12 @@ bool DecAppCfg::parseCfg( int argc, char* argv[] ) ("OutputBitDepthC,d", m_outputBitDepth[CHANNEL_TYPE_CHROMA], 0, "bit depth of YUV output chroma component (default: use luma output bit-depth)") ("OutputColourSpaceConvert", outputColourSpaceConvert, string(""), "Colour space conversion to apply to input 444 video. Permitted values are (empty string=UNCHANGED) " + getListOfColourSpaceConverts(false)) ("MaxTemporalLayer,t", m_iMaxTemporalLayer, -1, "Maximum Temporal Layer to be decoded. -1 to decode all layers") + ("TargetOutputLayerSet,p", m_iTargetOLS, -1, "Target output layer set.") ("SEIDecodedPictureHash,-dph",m_decodedPictureHashSEIEnabled, 1, "Control handling of decoded picture hash SEI messages\n" "\t1: check hash in SEI messages if available in the bitstream\n" "\t0: ignore SEI message") ("SEINoDisplay", m_decodedNoDisplaySEIEnabled, true, "Control handling of decoded no display SEI messages") ("TarDecLayerIdSetFile,l", cfg_TargetDecLayerIdSetFile, string(""), "targetDecLayerIdSet file name. The file should include white space separated LayerId values to be decoded. Omitting the option or a value of -1 in the file decodes all layers.") - ("RespectDefDispWindow,w", m_respectDefDispWindow, 0, "Only output content inside the default display window\n") ("SEIColourRemappingInfoFilename", m_colourRemapSEIFileName, string(""), "Colour Remapping YUV output file name. If empty, no remapping is applied (ignore SEI message)\n") ("OutputDecodedSEIMessagesFilename", m_outputDecodedSEIMessagesFilename, string(""), "When non empty, output decoded SEI messages to the indicated file. If file is '-', then output to stdout\n") ("ClipOutputVideoToRec709Range", m_bClipOutputVideoToRec709Range, false, "If true then clip output video to the Rec. 709 Range on saving") @@ -113,6 +113,7 @@ bool DecAppCfg::parseCfg( int argc, char* argv[] ) "\t3: enable bit and tool statistic\n") #endif ("MCTSCheck", m_mctsCheck, false, "If enabled, the decoder checks for violations of mc_exact_sample_value_match_flag in Temporal MCTS ") + ( "UpscaledOutput", m_upscaledOutput, 0, "Upscaled output for RPR" ) ; po::setDefaults(opts); @@ -222,12 +223,12 @@ DecAppCfg::DecAppCfg() , m_iSkipFrame(0) // m_outputBitDepth array initialised below , m_outputColourSpaceConvert(IPCOLOURSPACE_UNCHANGED) +, m_iTargetOLS(0) , m_iMaxTemporalLayer(-1) , m_decodedPictureHashSEIEnabled(0) , m_decodedNoDisplaySEIEnabled(false) , m_colourRemapSEIFileName() , m_targetDecLayerIdSet() -, m_respectDefDispWindow(0) , m_outputDecodedSEIMessagesFilename() , m_bClipOutputVideoToRec709Range(false) , m_packedYUVMode(false) diff --git a/source/App/DecoderApp/DecAppCfg.h b/source/App/DecoderApp/DecAppCfg.h index a27a069284084ccdf974feb3f20c2f835f1291dd..c06ebdd999d3e3082229697826437763b6a2becf 100644 --- a/source/App/DecoderApp/DecAppCfg.h +++ b/source/App/DecoderApp/DecAppCfg.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -61,13 +61,13 @@ protected: int m_iSkipFrame; ///< counter for frames prior to the random access point to skip int m_outputBitDepth[MAX_NUM_CHANNEL_TYPE]; ///< bit depth used for writing output InputColourSpaceConversion m_outputColourSpaceConvert; - + int m_iTargetOLS; ///< target output layer set + std::vector<int> m_targetOutputLayerIdSet; ///< set of LayerIds to be outputted int m_iMaxTemporalLayer; ///< maximum temporal layer to be decoded int m_decodedPictureHashSEIEnabled; ///< Checksum(3)/CRC(2)/MD5(1)/disable(0) acting on decoded picture hash SEI message bool m_decodedNoDisplaySEIEnabled; ///< Enable(true)/disable(false) writing only pictures that get displayed based on the no display SEI message std::string m_colourRemapSEIFileName; ///< output Colour Remapping file name std::vector<int> m_targetDecLayerIdSet; ///< set of LayerIds to be included in the sub-bitstream extraction process. - int m_respectDefDispWindow; ///< Only output content inside the default display window std::string m_outputDecodedSEIMessagesFilename; ///< filename to output decoded SEI messages to. If '-', then use stdout. If empty, do not output details. bool m_bClipOutputVideoToRec709Range; ///< If true, clip the output video to the Rec 709 range on saving. bool m_packedYUVMode; ///< If true, output 10-bit and 12-bit YUV data as 5-byte and 3-byte (respectively) packed YUV data @@ -75,6 +75,8 @@ protected: int m_statMode; ///< Config statistic mode (0 - bit stat, 1 - tool stat, 3 - both) bool m_mctsCheck; + int m_upscaledOutput; ////< Output upscaled (2), decoded but in full resolution buffer (1) or decoded cropped (0, default) picture for RPR. + public: DecAppCfg(); virtual ~DecAppCfg(); diff --git a/source/App/DecoderApp/decmain.cpp b/source/App/DecoderApp/decmain.cpp index aa3c772b8c2b846c1ba5b8048785ae16efae7926..c8a6e3bd7070cdcbd867a95d3f39c27cca6739e1 100644 --- a/source/App/DecoderApp/decmain.cpp +++ b/source/App/DecoderApp/decmain.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -100,9 +100,9 @@ int main(int argc, char* argv[]) std::cerr << e.what() << std::endl; returnCode = EXIT_FAILURE; } - catch( ... ) + catch (const std::bad_alloc &e) { - std::cerr << "Unspecified error occurred" << std::endl; + std::cout << "Memory allocation failed: " << e.what() << std::endl; returnCode = EXIT_FAILURE; } #endif diff --git a/source/App/EncoderApp/CMakeLists.txt b/source/App/EncoderApp/CMakeLists.txt index 2299bcf8f1987839181ad36b53cba3e95c844042..dd87e52d1f8244c607c34d42359dd0595e63cd88 100644 --- a/source/App/EncoderApp/CMakeLists.txt +++ b/source/App/EncoderApp/CMakeLists.txt @@ -66,6 +66,10 @@ if( EXTENSION_360_VIDEO ) target_link_libraries( ${EXE_NAME} Lib360 AppEncHelper360 ) endif() +if( EXTENSION_HDRTOOLS ) + target_link_libraries( ${EXE_NAME} HDRLib ) +endif() + # lldb custom data formatters if( XCODE ) add_dependencies( ${EXE_NAME} Install${PROJECT_NAME}LldbFiles ) diff --git a/source/App/EncoderApp/EncApp.cpp b/source/App/EncoderApp/EncApp.cpp index 9cdd07a6932cce8b0cf2446c00f5b7d20293a51a..77f765178b1b70b789850c6e9c59035a7721233f 100644 --- a/source/App/EncoderApp/EncApp.cpp +++ b/source/App/EncoderApp/EncApp.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -44,9 +44,7 @@ #include "EncApp.h" #include "EncoderLib/AnnexBwrite.h" -#if EXTENSION_360_VIDEO -#include "AppEncHelper360/TExt360AppEncTop.h" -#endif +#include "EncoderLib/EncLibCommon.h" using namespace std; @@ -57,11 +55,18 @@ using namespace std; // Constructor / destructor / initialization / destroy // ==================================================================================================================== -EncApp::EncApp() +EncApp::EncApp( fstream& bitStream, EncLibCommon* encLibCommon ) + : m_cEncLib( encLibCommon ) + , m_bitstream( bitStream ) { m_iFrameRcvd = 0; m_totalBytes = 0; m_essentialBytes = 0; +#if JVET_O0756_CALCULATE_HDRMETRICS + m_metricTime = std::chrono::milliseconds(0); +#endif + m_numEncoded = 0; + m_flush = false; } EncApp::~EncApp() @@ -70,33 +75,115 @@ EncApp::~EncApp() void EncApp::xInitLibCfg() { -#if HEVC_VPS VPS vps; - vps.setMaxTLayers ( m_maxTempLayer ); - if (m_maxTempLayer == 1) + vps.setMaxLayers( m_maxLayers ); + + if (vps.getMaxLayers() > 1) { - vps.setTemporalNestingFlag(true); + vps.setVPSId(1); //JVET_P0205 vps_video_parameter_set_id shall be greater than 0 for multi-layer coding } - vps.setMaxLayers ( 1 ); - for(int i = 0; i < MAX_TLAYER; i++) + else + { + vps.setVPSId(0); + vps.setEachLayerIsAnOlsFlag(1); // If vps_max_layers_minus1 is equal to 0, + // the value of each_layer_is_an_ols_flag is inferred to be equal to 1. + // Otherwise, when vps_all_independent_layers_flag is equal to 0, + // the value of each_layer_is_an_ols_flag is inferred to be equal to 0. + } + vps.setMaxSubLayers(m_maxSublayers); + if (vps.getMaxLayers() > 1 && vps.getMaxSubLayers() > 1) + { + vps.setAllLayersSameNumSublayersFlag(m_allLayersSameNumSublayersFlag); + } + if (vps.getMaxLayers() > 1) + { + vps.setAllIndependentLayersFlag(m_allIndependentLayersFlag); + if (!vps.getAllIndependentLayersFlag()) + { + vps.setEachLayerIsAnOlsFlag(0); + } + } + + for (int i = 0; i < vps.getMaxLayers(); i++) + { + vps.setGeneralLayerIdx( m_layerId[i], i ); + vps.setLayerId(i, m_layerId[i]); + + if (i > 0 && !vps.getAllIndependentLayersFlag()) + { + vps.setIndependentLayerFlag( i, m_numRefLayers[i] ? false : true ); + + if (!vps.getIndependentLayerFlag(i)) + { + for (int j = 0, k = 0; j < i; j++) + { + if (m_refLayerIdxStr[i].find(to_string(j)) != std::string::npos) + { + vps.setDirectRefLayerFlag(i, j, true); + vps.setInterLayerRefIdc( i, j, k ); + vps.setDirectRefLayerIdx(i, k++, j); + } + else + { + vps.setDirectRefLayerFlag(i, j, false); + } + } + } + } + } + + + if (vps.getMaxLayers() > 1) { - vps.setNumReorderPics ( m_numReorderPics[i], i ); - vps.setMaxDecPicBuffering ( m_maxDecPicBuffering[i], i ); + if (vps.getAllIndependentLayersFlag()) + { + vps.setEachLayerIsAnOlsFlag(m_eachLayerIsAnOlsFlag); + if (vps.getEachLayerIsAnOlsFlag() == 0) + { + vps.setOlsModeIdc(2); // When vps_all_independent_layers_flag is equal to 1 and each_layer_is_an_ols_flag is equal to 0, the value of ols_mode_idc is inferred to be equal to 2 + } + } + if (!vps.getEachLayerIsAnOlsFlag()) + { + if (!vps.getAllIndependentLayersFlag()) + { + vps.setOlsModeIdc(m_olsModeIdc); + } + if (vps.getOlsModeIdc() == 2) + { + vps.setNumOutputLayerSets(m_numOutputLayerSets); + for (int i = 1; i < vps.getNumOutputLayerSets(); i++) + { + for (int j = 0; j < vps.getMaxLayers(); j++) + { + if (m_olsOutputLayerStr[i].find(to_string(j)) != std::string::npos) + { + vps.setOlsOutputLayerFlag(i, j, 1); + } + else + { + vps.setOlsOutputLayerFlag(i, j, 0); + } + } + } + } + } } + vps.setVPSExtensionFlag ( false ); m_cEncLib.setVPS(&vps); -#endif m_cEncLib.setProfile ( m_profile); m_cEncLib.setLevel ( m_levelTier, m_level); + m_cEncLib.setNumSubProfile ( m_numSubProfile ); + for (int i = 0; i < m_numSubProfile; i++) + { + m_cEncLib.setSubProfile(i, m_subProfile[i]); + } m_cEncLib.setProgressiveSourceFlag ( m_progressiveSourceFlag); m_cEncLib.setInterlacedSourceFlag ( m_interlacedSourceFlag); m_cEncLib.setNonPackedConstraintFlag ( m_nonPackedConstraintFlag); m_cEncLib.setFrameOnlyConstraintFlag ( m_frameOnlyConstraintFlag); - m_cEncLib.setBitDepthConstraintValue ( m_bitDepthConstraint ); - m_cEncLib.setChromaFormatConstraintValue ( m_chromaFormatConstraint ); m_cEncLib.setIntraConstraintFlag ( m_intraConstraintFlag ); - m_cEncLib.setOnePictureOnlyConstraintFlag ( m_onePictureOnlyConstraintFlag ); - m_cEncLib.setLowerBitRateConstraintFlag ( m_lowerBitRateConstraintFlag ); m_cEncLib.setPrintMSEBasedSequencePSNR ( m_printMSEBasedSequencePSNR); m_cEncLib.setPrintFrameMSE ( m_printFrameMSE); @@ -109,7 +196,11 @@ void EncApp::xInitLibCfg() m_cEncLib.setTemporalSubsampleRatio ( m_temporalSubsampleRatio ); m_cEncLib.setSourceWidth ( m_iSourceWidth ); m_cEncLib.setSourceHeight ( m_iSourceHeight ); - m_cEncLib.setConformanceWindow ( m_confWinLeft, m_confWinRight, m_confWinTop, m_confWinBottom ); + m_cEncLib.setConformanceWindow ( m_confWinLeft / SPS::getWinUnitX( m_InputChromaFormatIDC ), m_confWinRight / SPS::getWinUnitX( m_InputChromaFormatIDC ), m_confWinTop / SPS::getWinUnitY( m_InputChromaFormatIDC ), m_confWinBottom / SPS::getWinUnitY( m_InputChromaFormatIDC ) ); + m_cEncLib.setScalingRatio ( m_scalingRatioHor, m_scalingRatioVer ); + m_cEncLib.setRPREnabled ( m_rprEnabled ); + m_cEncLib.setSwitchPocPeriod ( m_switchPocPeriod ); + m_cEncLib.setUpscaledOutput ( m_upscaledOutput ); m_cEncLib.setFramesToBeEncoded ( m_framesToBeEncoded ); //====== SPS constraint flags ======= @@ -118,35 +209,51 @@ void EncApp::xInitLibCfg() m_cEncLib.setMaxChromaFormatConstraintIdc ( m_chromaFormatConstraint ); m_cEncLib.setFrameConstraintFlag ( m_bFrameConstraintFlag ); m_cEncLib.setNoQtbttDualTreeIntraConstraintFlag ( !m_dualTree ); + m_cEncLib.setNoPartitionConstraintsOverrideConstraintFlag ( !m_SplitConsOverrideEnabledFlag ); m_cEncLib.setNoSaoConstraintFlag ( !m_bUseSAO ); m_cEncLib.setNoAlfConstraintFlag ( !m_alf ); - m_cEncLib.setNoPcmConstraintFlag ( !m_usePCM ); m_cEncLib.setNoRefWraparoundConstraintFlag ( m_bNoRefWraparoundConstraintFlag ); m_cEncLib.setNoTemporalMvpConstraintFlag ( m_TMVPModeId ? false : true ); m_cEncLib.setNoSbtmvpConstraintFlag ( m_SubPuMvpMode ? false : true ); m_cEncLib.setNoAmvrConstraintFlag ( m_bNoAmvrConstraintFlag ); m_cEncLib.setNoBdofConstraintFlag ( !m_BIO ); + m_cEncLib.setNoDmvrConstraintFlag ( !m_DMVR ); m_cEncLib.setNoCclmConstraintFlag ( m_LMChroma ? false : true ); m_cEncLib.setNoMtsConstraintFlag ( (m_MTS || m_MTSImplicit) ? false : true ); + m_cEncLib.setNoSbtConstraintFlag ( !m_SBT ); m_cEncLib.setNoAffineMotionConstraintFlag ( !m_Affine ); - m_cEncLib.setNoGbiConstraintFlag ( !m_GBi ); - m_cEncLib.setNoMhIntraConstraintFlag ( !m_MHIntra ); + m_cEncLib.setNoBcwConstraintFlag ( !m_bcw ); + m_cEncLib.setNoIbcConstraintFlag ( m_IBCMode ? false : true ); + m_cEncLib.setNoCiipConstraintFlag ( !m_ciip ); + m_cEncLib.setNoFPelMmvdConstraintFlag ( !(m_MMVD && m_allowDisFracMMVD) ); m_cEncLib.setNoTriangleConstraintFlag ( !m_Triangle ); m_cEncLib.setNoLadfConstraintFlag ( !m_LadfEnabed ); - m_cEncLib.setNoCurrPicRefConstraintFlag ( !m_IBCMode ); + m_cEncLib.setNoTransformSkipConstraintFlag ( !m_useTransformSkip ); + m_cEncLib.setNoBDPCMConstraintFlag ( m_useBDPCM == 0 ); + m_cEncLib.setNoJointCbCrConstraintFlag (!m_JointCbCrMode); m_cEncLib.setNoQpDeltaConstraintFlag ( m_bNoQpDeltaConstraintFlag ); m_cEncLib.setNoDepQuantConstraintFlag ( !m_depQuantEnabledFlag); m_cEncLib.setNoSignDataHidingConstraintFlag ( !m_signDataHidingEnabledFlag ); + m_cEncLib.setNoTrailConstraintFlag ( m_iIntraPeriod == 1 ); + m_cEncLib.setNoStsaConstraintFlag ( m_iIntraPeriod == 1 || !xHasNonZeroTemporalID() ); + m_cEncLib.setNoRaslConstraintFlag ( m_iIntraPeriod == 1 || !xHasLeadingPicture() ); + m_cEncLib.setNoRadlConstraintFlag ( m_iIntraPeriod == 1 || !xHasLeadingPicture() ); + m_cEncLib.setNoIdrConstraintFlag ( false ); // Not yet possible to encode bitstream starting with a GDR picture + m_cEncLib.setNoCraConstraintFlag ( m_iDecodingRefreshType != 1 ); + m_cEncLib.setNoGdrConstraintFlag ( false ); // Not yet possible to encode GDR using config parameters + m_cEncLib.setNoApsConstraintFlag ( !m_alf && !m_lmcsEnabled && m_useScalingListId == SCALING_LIST_OFF); //====== Coding Structure ======== m_cEncLib.setIntraPeriod ( m_iIntraPeriod ); m_cEncLib.setDecodingRefreshType ( m_iDecodingRefreshType ); m_cEncLib.setGOPSize ( m_iGOPSize ); -#if JCTVC_Y0038_PARAMS + m_cEncLib.setDrapPeriod ( m_drapPeriod ); m_cEncLib.setReWriteParamSets ( m_rewriteParamSets ); -#endif + m_cEncLib.setRPLList0 ( m_RPLList0); + m_cEncLib.setRPLList1 ( m_RPLList1); + m_cEncLib.setIDRRefParamListPresent ( m_idrRefParamList ); m_cEncLib.setGopList ( m_GOPList ); - m_cEncLib.setExtraRPSs ( m_extraRPSs ); + for(int i = 0; i < MAX_TLAYER; i++) { m_cEncLib.setNumReorderPics ( m_numReorderPics[i], i ); @@ -165,6 +272,8 @@ void EncApp::xInitLibCfg() m_cEncLib.setIntraQPOffset ( m_intraQPOffset ); m_cEncLib.setLambdaFromQPEnable ( m_lambdaFromQPEnable ); #endif + m_cEncLib.setChromaQpMappingTableParams (m_chromaQpMappingTableParams); + m_cEncLib.setPad ( m_aiPad ); m_cEncLib.setAccessUnitDelimiter ( m_AccessUnitDelimiter ); @@ -202,6 +311,8 @@ void EncApp::xInitLibCfg() m_cEncLib.setChromaCrQpOffset ( m_crQpOffset ); m_cEncLib.setChromaCbQpOffsetDualTree ( m_cbQpOffsetDualTree ); m_cEncLib.setChromaCrQpOffsetDualTree ( m_crQpOffsetDualTree ); + m_cEncLib.setChromaCbCrQpOffset ( m_cbCrQpOffset ); + m_cEncLib.setChromaCbCrQpOffsetDualTree ( m_cbCrQpOffsetDualTree ); #if ER_CHROMA_QP_WCG_PPS m_cEncLib.setWCGChromaQpControl ( m_wcgChromaQpControl ); #endif @@ -240,25 +351,58 @@ void EncApp::xInitLibCfg() #endif m_cEncLib.setRDpenalty ( m_rdPenalty ); m_cEncLib.setCTUSize ( m_uiCTUSize ); + m_cEncLib.setSubPicPresentFlag ( m_subPicPresentFlag ); + if(m_subPicPresentFlag) + { + m_cEncLib.setNumSubPics ( m_numSubPics ); + for (int i = 0; i < m_numSubPics; i++) + { + m_cEncLib.setSubPicCtuTopLeftX ( m_subPicCtuTopLeftX[i], i ); + m_cEncLib.setSubPicCtuTopLeftY ( m_subPicCtuTopLeftY[i], i ); + m_cEncLib.setSubPicWidth ( m_subPicWidth[i], i ); + m_cEncLib.setSubPicHeight ( m_subPicHeight[i], i ); + m_cEncLib.setSubPicTreatedAsPicFlag ( m_subPicTreatedAsPicFlag[i], i ); + m_cEncLib.setLoopFilterAcrossSubpicEnabledFlag ( m_loopFilterAcrossSubpicEnabledFlag[i], i ); + } + } + m_cEncLib.setSubPicIdPresentFlag ( m_subPicIdPresentFlag ); + if (m_subPicIdPresentFlag) + { + m_cEncLib.setSubPicIdSignallingPresentFlag ( m_subPicIdSignallingPresentFlag ); + if(m_subPicIdSignallingPresentFlag) + { + m_cEncLib.setSubPicIdLen ( m_subPicIdLen ); + for (int i = 0; i < m_numSubPics; i++) + { + m_cEncLib.setSubPicId ( m_subPicId[i], i ); + } + } + } m_cEncLib.setUseSplitConsOverride ( m_SplitConsOverrideEnabledFlag ); m_cEncLib.setMinQTSizes ( m_uiMinQT ); - m_cEncLib.setMaxBTDepth ( m_uiMaxBTDepth, m_uiMaxBTDepthI, m_uiMaxBTDepthIChroma ); + m_cEncLib.setMaxMTTHierarchyDepth ( m_uiMaxMTTHierarchyDepth, m_uiMaxMTTHierarchyDepthI, m_uiMaxMTTHierarchyDepthIChroma ); m_cEncLib.setDualITree ( m_dualTree ); + m_cEncLib.setLFNST ( m_LFNST ); + m_cEncLib.setUseFastLFNST ( m_useFastLFNST ); m_cEncLib.setSubPuMvpMode ( m_SubPuMvpMode ); m_cEncLib.setAffine ( m_Affine ); m_cEncLib.setAffineType ( m_AffineType ); + m_cEncLib.setPROF ( m_PROF ); m_cEncLib.setBIO (m_BIO); m_cEncLib.setUseLMChroma ( m_LMChroma ); - m_cEncLib.setCclmCollocatedChromaFlag ( m_cclmCollocatedChromaFlag ); + m_cEncLib.setHorCollocatedChromaFlag ( m_horCollocatedChromaFlag ); + m_cEncLib.setVerCollocatedChromaFlag ( m_verCollocatedChromaFlag ); m_cEncLib.setIntraMTS ( m_MTS & 1 ); - m_cEncLib.setIntraMTSMaxCand ( m_MTSIntraMaxCand ); m_cEncLib.setInterMTS ( ( m_MTS >> 1 ) & 1 ); - m_cEncLib.setInterMTSMaxCand ( m_MTSInterMaxCand ); + m_cEncLib.setMTSIntraMaxCand ( m_MTSIntraMaxCand ); + m_cEncLib.setMTSInterMaxCand ( m_MTSInterMaxCand ); m_cEncLib.setImplicitMTS ( m_MTSImplicit ); m_cEncLib.setUseSBT ( m_SBT ); + m_cEncLib.setSBTFast64WidthTh ( m_SBTFast64WidthTh ); m_cEncLib.setUseCompositeRef ( m_compositeRefEnabled ); - m_cEncLib.setUseGBi ( m_GBi ); - m_cEncLib.setUseGBiFast ( m_GBiFast ); + m_cEncLib.setUseSMVD ( m_SMVD ); + m_cEncLib.setUseBcw ( m_bcw ); + m_cEncLib.setUseBcwFast ( m_BcwFast ); #if LUMA_ADAPTIVE_DEBLOCKING_FILTER_QP_OFFSET m_cEncLib.setUseLadf ( m_LadfEnabed ); if ( m_LadfEnabed ) @@ -271,7 +415,7 @@ void EncApp::xInitLibCfg() } } #endif - m_cEncLib.setUseMHIntra ( m_MHIntra ); + m_cEncLib.setUseCiip ( m_ciip ); m_cEncLib.setUseTriangle ( m_Triangle ); m_cEncLib.setUseHashME ( m_HashME ); @@ -279,6 +423,12 @@ void EncApp::xInitLibCfg() m_cEncLib.setUseAffineAmvr ( m_AffineAmvr ); m_cEncLib.setUseAffineAmvrEncOpt ( m_AffineAmvrEncOpt ); m_cEncLib.setDMVR ( m_DMVR ); + m_cEncLib.setMMVD ( m_MMVD ); + m_cEncLib.setMmvdDisNum (m_MmvdDisNum); + m_cEncLib.setRGBFormatFlag(m_rgbFormat); + m_cEncLib.setUseColorTrans(m_useColorTrans); + m_cEncLib.setPLTMode ( m_PLTMode ); + m_cEncLib.setJointCbCr ( m_JointCbCrMode ); m_cEncLib.setIBCMode ( m_IBCMode ); m_cEncLib.setIBCLocalSearchRangeX ( m_IBCLocalSearchRangeX ); m_cEncLib.setIBCLocalSearchRangeY ( m_IBCLocalSearchRangeY ); @@ -292,13 +442,23 @@ void EncApp::xInitLibCfg() // ADD_NEW_TOOL : (encoder app) add setting of tool enabling flags and associated parameters here + m_cEncLib.setLoopFilterAcrossVirtualBoundariesDisabledFlag ( m_loopFilterAcrossVirtualBoundariesDisabledFlag ); + m_cEncLib.setNumVerVirtualBoundaries ( m_numVerVirtualBoundaries ); + m_cEncLib.setNumHorVirtualBoundaries ( m_numHorVirtualBoundaries ); + for( unsigned i = 0; i < m_numVerVirtualBoundaries; i++ ) + { + m_cEncLib.setVirtualBoundariesPosX ( m_virtualBoundariesPosX[ i ], i ); + } + for( unsigned i = 0; i < m_numHorVirtualBoundaries; i++ ) + { + m_cEncLib.setVirtualBoundariesPosY ( m_virtualBoundariesPosY[ i ], i ); + } + m_cEncLib.setMaxCUWidth ( m_uiCTUSize ); m_cEncLib.setMaxCUHeight ( m_uiCTUSize ); m_cEncLib.setMaxCodingDepth ( m_uiMaxCodingDepth ); m_cEncLib.setLog2DiffMaxMinCodingBlockSize ( m_uiLog2DiffMaxMinCodingBlockSize ); -#if MAX_TB_SIZE_SIGNALLING m_cEncLib.setLog2MaxTbSize ( m_log2MaxTbSize ); -#endif m_cEncLib.setUseEncDbOpt(m_encDbOpt); m_cEncLib.setUseFastLCTU ( m_useFastLCTU ); m_cEncLib.setFastInterSearchMode ( m_fastInterSearchMode ); @@ -311,12 +471,21 @@ void EncApp::xInitLibCfg() m_cEncLib.setUseAMaxBT ( m_useAMaxBT ); m_cEncLib.setUseE0023FastEnc ( m_e0023FastEnc ); m_cEncLib.setUseContentBasedFastQtbt ( m_contentBasedFastQtbt ); + m_cEncLib.setUseNonLinearAlfLuma ( m_useNonLinearAlfLuma ); + m_cEncLib.setUseNonLinearAlfChroma ( m_useNonLinearAlfChroma ); + m_cEncLib.setMaxNumAlfAlternativesChroma ( m_maxNumAlfAlternativesChroma ); + m_cEncLib.setUseMRL ( m_MRL ); + m_cEncLib.setUseMIP ( m_MIP ); + m_cEncLib.setUseFastMIP ( m_useFastMIP ); + m_cEncLib.setFastLocalDualTreeMode ( m_fastLocalDualTreeMode ); m_cEncLib.setCrossComponentPredictionEnabledFlag ( m_crossComponentPredictionEnabledFlag ); m_cEncLib.setUseReconBasedCrossCPredictionEstimate ( m_reconBasedCrossCPredictionEstimate ); m_cEncLib.setLog2SaoOffsetScale ( CHANNEL_TYPE_LUMA , m_log2SaoOffsetScale[CHANNEL_TYPE_LUMA] ); m_cEncLib.setLog2SaoOffsetScale ( CHANNEL_TYPE_CHROMA, m_log2SaoOffsetScale[CHANNEL_TYPE_CHROMA] ); m_cEncLib.setUseTransformSkip ( m_useTransformSkip ); m_cEncLib.setUseTransformSkipFast ( m_useTransformSkipFast ); + m_cEncLib.setUseChromaTS ( m_useChromaTS && m_useTransformSkip); + m_cEncLib.setUseBDPCM ( m_useBDPCM ); m_cEncLib.setTransformSkipRotationEnabledFlag ( m_transformSkipRotationEnabledFlag ); m_cEncLib.setTransformSkipContextEnabledFlag ( m_transformSkipContextEnabledFlag ); m_cEncLib.setPersistentRiceAdaptationEnabledFlag ( m_persistentRiceAdaptationEnabledFlag ); @@ -326,12 +495,10 @@ void EncApp::xInitLibCfg() { m_cEncLib.setRdpcmEnabledFlag ( RDPCMSignallingMode(signallingModeIndex), m_rdpcmEnabledFlag[signallingModeIndex]); } - m_cEncLib.setUseConstrainedIntraPred ( m_bUseConstrainedIntraPred ); m_cEncLib.setFastUDIUseMPMEnabled ( m_bFastUDIUseMPMEnabled ); m_cEncLib.setFastMEForGenBLowDelayEnabled ( m_bFastMEForGenBLowDelayEnabled ); m_cEncLib.setUseBLambdaForNonKeyLowDelayPictures ( m_bUseBLambdaForNonKeyLowDelayPictures ); - m_cEncLib.setPCMLog2MinSize ( m_uiPCMLog2MinSize); - m_cEncLib.setUsePCM ( m_usePCM ); + m_cEncLib.setUseISP ( m_ISP ); m_cEncLib.setUseFastISP ( m_useFastISP ); // set internal bit-depth and constants @@ -339,35 +506,42 @@ void EncApp::xInitLibCfg() { m_cEncLib.setBitDepth((ChannelType)channelType, m_internalBitDepth[channelType]); m_cEncLib.setInputBitDepth((ChannelType)channelType, m_inputBitDepth[channelType]); - m_cEncLib.setPCMBitDepth((ChannelType)channelType, m_bPCMInputBitDepthFlag ? m_MSBExtendedBitDepth[channelType] : m_internalBitDepth[channelType]); } - m_cEncLib.setPCMLog2MaxSize ( m_pcmLog2MaxSize); m_cEncLib.setMaxNumMergeCand ( m_maxNumMergeCand ); m_cEncLib.setMaxNumAffineMergeCand ( m_maxNumAffineMergeCand ); + m_cEncLib.setMaxNumTriangleCand ( m_maxNumTriangleCand ); + m_cEncLib.setMaxNumIBCMergeCand ( m_maxNumIBCMergeCand ); //====== Weighted Prediction ======== m_cEncLib.setUseWP ( m_useWeightedPred ); m_cEncLib.setWPBiPred ( m_useWeightedBiPred ); - //====== Parallel Merge Estimation ======== - m_cEncLib.setLog2ParallelMergeLevelMinus2 ( m_log2ParallelMergeLevel - 2 ); - - //====== Slice ======== - m_cEncLib.setSliceMode ( m_sliceMode ); - m_cEncLib.setSliceArgument ( m_sliceArgument ); - -#if HEVC_DEPENDENT_SLICES - //====== Dependent Slice ======== - m_cEncLib.setSliceSegmentMode ( m_sliceSegmentMode ); - m_cEncLib.setSliceSegmentArgument ( m_sliceSegmentArgument ); -#endif - - if(m_sliceMode == NO_SLICES ) + //====== Tiles and Slices ======== + m_cEncLib.setNoPicPartitionFlag( !m_picPartitionFlag ); + if( m_picPartitionFlag ) { - m_bLFCrossSliceBoundaryFlag = true; + m_cEncLib.setTileColWidths( m_tileColumnWidth ); + m_cEncLib.setTileRowHeights( m_tileRowHeight ); + m_cEncLib.setRectSliceFlag( !m_rasterSliceFlag ); + m_cEncLib.setNumSlicesInPic( m_numSlicesInPic ); + m_cEncLib.setTileIdxDeltaPresentFlag( m_tileIdxDeltaPresentFlag ); + m_cEncLib.setRectSlices( m_rectSlices ); + m_cEncLib.setRasterSliceSizes( m_rasterSliceSize ); + m_cEncLib.setLFCrossTileBoundaryFlag( !m_disableLFCrossTileBoundaryFlag ); + m_cEncLib.setLFCrossSliceBoundaryFlag( !m_disableLFCrossSliceBoundaryFlag ); } - m_cEncLib.setLFCrossSliceBoundaryFlag ( m_bLFCrossSliceBoundaryFlag ); + else + { + m_cEncLib.setRectSliceFlag( true ); + m_cEncLib.setNumSlicesInPic( 1 ); + m_cEncLib.setTileIdxDeltaPresentFlag( 0 ); + m_cEncLib.setLFCrossTileBoundaryFlag( true ); + m_cEncLib.setLFCrossSliceBoundaryFlag( true ); + } + + //====== Sub-picture and Slices ======== + m_cEncLib.setSingleSlicePerSubPicFlagFlag ( m_singleSlicePerSubPicFlag ); m_cEncLib.setUseSAO ( m_bUseSAO ); m_cEncLib.setTestSAODisableAtPictureLevel ( m_bTestSAODisableAtPictureLevel ); m_cEncLib.setSaoEncodingRate ( m_saoEncodingRate ); @@ -375,121 +549,157 @@ void EncApp::xInitLibCfg() m_cEncLib.setMaxNumOffsetsPerPic ( m_maxNumOffsetsPerPic); m_cEncLib.setSaoCtuBoundary ( m_saoCtuBoundary); - m_cEncLib.setPCMInputBitDepthFlag ( m_bPCMInputBitDepthFlag); - m_cEncLib.setPCMFilterDisableFlag ( m_bPCMFilterDisableFlag); -#if K0238_SAO_GREEDY_MERGE_ENCODING m_cEncLib.setSaoGreedyMergeEnc ( m_saoGreedyMergeEnc); -#endif m_cEncLib.setIntraSmoothingDisabledFlag (!m_enableIntraReferenceSmoothing ); m_cEncLib.setDecodedPictureHashSEIType ( m_decodedPictureHashSEIType ); +#if HEVC_SEI m_cEncLib.setRecoveryPointSEIEnabled ( m_recoveryPointSEIEnabled ); +#endif + m_cEncLib.setDependentRAPIndicationSEIEnabled ( m_drapPeriod > 0 ); m_cEncLib.setBufferingPeriodSEIEnabled ( m_bufferingPeriodSEIEnabled ); m_cEncLib.setPictureTimingSEIEnabled ( m_pictureTimingSEIEnabled ); - m_cEncLib.setToneMappingInfoSEIEnabled ( m_toneMappingInfoSEIEnabled ); - m_cEncLib.setTMISEIToneMapId ( m_toneMapId ); - m_cEncLib.setTMISEIToneMapCancelFlag ( m_toneMapCancelFlag ); - m_cEncLib.setTMISEIToneMapPersistenceFlag ( m_toneMapPersistenceFlag ); - m_cEncLib.setTMISEICodedDataBitDepth ( m_toneMapCodedDataBitDepth ); - m_cEncLib.setTMISEITargetBitDepth ( m_toneMapTargetBitDepth ); - m_cEncLib.setTMISEIModelID ( m_toneMapModelId ); - m_cEncLib.setTMISEIMinValue ( m_toneMapMinValue ); - m_cEncLib.setTMISEIMaxValue ( m_toneMapMaxValue ); - m_cEncLib.setTMISEISigmoidMidpoint ( m_sigmoidMidpoint ); - m_cEncLib.setTMISEISigmoidWidth ( m_sigmoidWidth ); - m_cEncLib.setTMISEIStartOfCodedInterva ( m_startOfCodedInterval ); - m_cEncLib.setTMISEINumPivots ( m_numPivots ); - m_cEncLib.setTMISEICodedPivotValue ( m_codedPivotValue ); - m_cEncLib.setTMISEITargetPivotValue ( m_targetPivotValue ); - m_cEncLib.setTMISEICameraIsoSpeedIdc ( m_cameraIsoSpeedIdc ); - m_cEncLib.setTMISEICameraIsoSpeedValue ( m_cameraIsoSpeedValue ); - m_cEncLib.setTMISEIExposureIndexIdc ( m_exposureIndexIdc ); - m_cEncLib.setTMISEIExposureIndexValue ( m_exposureIndexValue ); - m_cEncLib.setTMISEIExposureCompensationValueSignFlag ( m_exposureCompensationValueSignFlag ); - m_cEncLib.setTMISEIExposureCompensationValueNumerator ( m_exposureCompensationValueNumerator ); - m_cEncLib.setTMISEIExposureCompensationValueDenomIdc ( m_exposureCompensationValueDenomIdc ); - m_cEncLib.setTMISEIRefScreenLuminanceWhite ( m_refScreenLuminanceWhite ); - m_cEncLib.setTMISEIExtendedRangeWhiteLevel ( m_extendedRangeWhiteLevel ); - m_cEncLib.setTMISEINominalBlackLevelLumaCodeValue ( m_nominalBlackLevelLumaCodeValue ); - m_cEncLib.setTMISEINominalWhiteLevelLumaCodeValue ( m_nominalWhiteLevelLumaCodeValue ); - m_cEncLib.setTMISEIExtendedWhiteLevelLumaCodeValue ( m_extendedWhiteLevelLumaCodeValue ); - m_cEncLib.setChromaResamplingFilterHintEnabled ( m_chromaResamplingFilterSEIenabled ); - m_cEncLib.setChromaResamplingHorFilterIdc ( m_chromaResamplingHorFilterIdc ); - m_cEncLib.setChromaResamplingVerFilterIdc ( m_chromaResamplingVerFilterIdc ); + m_cEncLib.setFrameFieldInfoSEIEnabled ( m_frameFieldInfoSEIEnabled ); + m_cEncLib.setBpDeltasGOPStructure ( m_bpDeltasGOPStructure ); + m_cEncLib.setDecodingUnitInfoSEIEnabled ( m_decodingUnitInfoSEIEnabled ); + m_cEncLib.setHrdParametersPresentFlag ( m_hrdParametersPresentFlag ); m_cEncLib.setFramePackingArrangementSEIEnabled ( m_framePackingSEIEnabled ); m_cEncLib.setFramePackingArrangementSEIType ( m_framePackingSEIType ); m_cEncLib.setFramePackingArrangementSEIId ( m_framePackingSEIId ); m_cEncLib.setFramePackingArrangementSEIQuincunx ( m_framePackingSEIQuincunx ); m_cEncLib.setFramePackingArrangementSEIInterpretation ( m_framePackingSEIInterpretation ); - m_cEncLib.setSegmentedRectFramePackingArrangementSEIEnabled ( m_segmentedRectFramePackingSEIEnabled ); - m_cEncLib.setSegmentedRectFramePackingArrangementSEICancel ( m_segmentedRectFramePackingSEICancel ); - m_cEncLib.setSegmentedRectFramePackingArrangementSEIType ( m_segmentedRectFramePackingSEIType ); - m_cEncLib.setSegmentedRectFramePackingArrangementSEIPersistence( m_segmentedRectFramePackingSEIPersistence ); - m_cEncLib.setDisplayOrientationSEIAngle ( m_displayOrientationSEIAngle ); - m_cEncLib.setTemporalLevel0IndexSEIEnabled ( m_temporalLevel0IndexSEIEnabled ); - m_cEncLib.setGradualDecodingRefreshInfoEnabled ( m_gradualDecodingRefreshInfoEnabled ); - m_cEncLib.setNoDisplaySEITLayer ( m_noDisplaySEITLayer ); - m_cEncLib.setDecodingUnitInfoSEIEnabled ( m_decodingUnitInfoSEIEnabled ); - m_cEncLib.setSOPDescriptionSEIEnabled ( m_SOPDescriptionSEIEnabled ); - m_cEncLib.setScalableNestingSEIEnabled ( m_scalableNestingSEIEnabled ); - m_cEncLib.setTMCTSSEIEnabled ( m_tmctsSEIEnabled ); + m_cEncLib.setErpSEIEnabled ( m_erpSEIEnabled ); + m_cEncLib.setErpSEICancelFlag ( m_erpSEICancelFlag ); + m_cEncLib.setErpSEIPersistenceFlag ( m_erpSEIPersistenceFlag ); + m_cEncLib.setErpSEIGuardBandFlag ( m_erpSEIGuardBandFlag ); + m_cEncLib.setErpSEIGuardBandType ( m_erpSEIGuardBandType ); + m_cEncLib.setErpSEILeftGuardBandWidth ( m_erpSEILeftGuardBandWidth ); + m_cEncLib.setErpSEIRightGuardBandWidth ( m_erpSEIRightGuardBandWidth ); + m_cEncLib.setSphereRotationSEIEnabled ( m_sphereRotationSEIEnabled ); + m_cEncLib.setSphereRotationSEICancelFlag ( m_sphereRotationSEICancelFlag ); + m_cEncLib.setSphereRotationSEIPersistenceFlag ( m_sphereRotationSEIPersistenceFlag ); + m_cEncLib.setSphereRotationSEIYaw ( m_sphereRotationSEIYaw ); + m_cEncLib.setSphereRotationSEIPitch ( m_sphereRotationSEIPitch ); + m_cEncLib.setSphereRotationSEIRoll ( m_sphereRotationSEIRoll ); + m_cEncLib.setOmniViewportSEIEnabled ( m_omniViewportSEIEnabled ); + m_cEncLib.setOmniViewportSEIId ( m_omniViewportSEIId ); + m_cEncLib.setOmniViewportSEICancelFlag ( m_omniViewportSEICancelFlag ); + m_cEncLib.setOmniViewportSEIPersistenceFlag ( m_omniViewportSEIPersistenceFlag ); + m_cEncLib.setOmniViewportSEICntMinus1 ( m_omniViewportSEICntMinus1 ); + m_cEncLib.setOmniViewportSEIAzimuthCentre ( m_omniViewportSEIAzimuthCentre ); + m_cEncLib.setOmniViewportSEIElevationCentre ( m_omniViewportSEIElevationCentre ); + m_cEncLib.setOmniViewportSEITiltCentre ( m_omniViewportSEITiltCentre ); + m_cEncLib.setOmniViewportSEIHorRange ( m_omniViewportSEIHorRange ); + m_cEncLib.setOmniViewportSEIVerRange ( m_omniViewportSEIVerRange ); + m_cEncLib.setRwpSEIEnabled (m_rwpSEIEnabled); + m_cEncLib.setRwpSEIRwpCancelFlag (m_rwpSEIRwpCancelFlag); + m_cEncLib.setRwpSEIRwpPersistenceFlag (m_rwpSEIRwpPersistenceFlag); + m_cEncLib.setRwpSEIConstituentPictureMatchingFlag (m_rwpSEIConstituentPictureMatchingFlag); + m_cEncLib.setRwpSEINumPackedRegions (m_rwpSEINumPackedRegions); + m_cEncLib.setRwpSEIProjPictureWidth (m_rwpSEIProjPictureWidth); + m_cEncLib.setRwpSEIProjPictureHeight (m_rwpSEIProjPictureHeight); + m_cEncLib.setRwpSEIPackedPictureWidth (m_rwpSEIPackedPictureWidth); + m_cEncLib.setRwpSEIPackedPictureHeight (m_rwpSEIPackedPictureHeight); + m_cEncLib.setRwpSEIRwpTransformType (m_rwpSEIRwpTransformType); + m_cEncLib.setRwpSEIRwpGuardBandFlag (m_rwpSEIRwpGuardBandFlag); + m_cEncLib.setRwpSEIProjRegionWidth (m_rwpSEIProjRegionWidth); + m_cEncLib.setRwpSEIProjRegionHeight (m_rwpSEIProjRegionHeight); + m_cEncLib.setRwpSEIRwpSEIProjRegionTop (m_rwpSEIRwpSEIProjRegionTop); + m_cEncLib.setRwpSEIProjRegionLeft (m_rwpSEIProjRegionLeft); + m_cEncLib.setRwpSEIPackedRegionWidth (m_rwpSEIPackedRegionWidth); + m_cEncLib.setRwpSEIPackedRegionHeight (m_rwpSEIPackedRegionHeight); + m_cEncLib.setRwpSEIPackedRegionTop (m_rwpSEIPackedRegionTop); + m_cEncLib.setRwpSEIPackedRegionLeft (m_rwpSEIPackedRegionLeft); + m_cEncLib.setRwpSEIRwpLeftGuardBandWidth (m_rwpSEIRwpLeftGuardBandWidth); + m_cEncLib.setRwpSEIRwpRightGuardBandWidth (m_rwpSEIRwpRightGuardBandWidth); + m_cEncLib.setRwpSEIRwpTopGuardBandHeight (m_rwpSEIRwpTopGuardBandHeight); + m_cEncLib.setRwpSEIRwpBottomGuardBandHeight (m_rwpSEIRwpBottomGuardBandHeight); + m_cEncLib.setRwpSEIRwpGuardBandNotUsedForPredFlag (m_rwpSEIRwpGuardBandNotUsedForPredFlag); + m_cEncLib.setRwpSEIRwpGuardBandType (m_rwpSEIRwpGuardBandType); + m_cEncLib.setGcmpSEIEnabled ( m_gcmpSEIEnabled ); + m_cEncLib.setGcmpSEICancelFlag ( m_gcmpSEICancelFlag ); + m_cEncLib.setGcmpSEIPersistenceFlag ( m_gcmpSEIPersistenceFlag ); + m_cEncLib.setGcmpSEIPackingType ( (uint8_t)m_gcmpSEIPackingType ); + m_cEncLib.setGcmpSEIMappingFunctionType ( (uint8_t)m_gcmpSEIMappingFunctionType ); + m_cEncLib.setGcmpSEIFaceIndex ( m_gcmpSEIFaceIndex ); + m_cEncLib.setGcmpSEIFaceRotation ( m_gcmpSEIFaceRotation ); + m_cEncLib.setGcmpSEIFunctionCoeffU ( m_gcmpSEIFunctionCoeffU ); + m_cEncLib.setGcmpSEIFunctionUAffectedByVFlag ( m_gcmpSEIFunctionUAffectedByVFlag ); + m_cEncLib.setGcmpSEIFunctionCoeffV ( m_gcmpSEIFunctionCoeffV ); + m_cEncLib.setGcmpSEIFunctionVAffectedByUFlag ( m_gcmpSEIFunctionVAffectedByUFlag ); + m_cEncLib.setGcmpSEIGuardBandFlag ( m_gcmpSEIGuardBandFlag ); + m_cEncLib.setGcmpSEIGuardBandBoundaryType ( m_gcmpSEIGuardBandBoundaryType ); + m_cEncLib.setGcmpSEIGuardBandSamplesMinus1 ( (uint8_t)m_gcmpSEIGuardBandSamplesMinus1 ); + m_cEncLib.setSubpicureLevelInfoSEIEnabled (m_subpicureLevelInfoSEIEnabled); + m_cEncLib.setSampleAspectRatioInfoSEIEnabled (m_sampleAspectRatioInfoSEIEnabled); + m_cEncLib.setSariCancelFlag (m_sariCancelFlag); + m_cEncLib.setSariPersistenceFlag (m_sariPersistenceFlag); + m_cEncLib.setSariAspectRatioIdc (m_sariAspectRatioIdc); + m_cEncLib.setSariSarWidth (m_sariSarWidth); + m_cEncLib.setSariSarHeight (m_sariSarHeight); m_cEncLib.setMCTSEncConstraint ( m_MCTSEncConstraint); - m_cEncLib.setTimeCodeSEIEnabled ( m_timeCodeSEIEnabled ); - m_cEncLib.setNumberOfTimeSets ( m_timeCodeSEINumTs ); - for(int i = 0; i < m_timeCodeSEINumTs; i++) - { - m_cEncLib.setTimeSet(m_timeSetArray[i], i); - } - m_cEncLib.setKneeSEIEnabled ( m_kneeSEIEnabled ); - m_cEncLib.setKneeSEIId ( m_kneeSEIId ); - m_cEncLib.setKneeSEICancelFlag ( m_kneeSEICancelFlag ); - m_cEncLib.setKneeSEIPersistenceFlag ( m_kneeSEIPersistenceFlag ); - m_cEncLib.setKneeSEIInputDrange ( m_kneeSEIInputDrange ); - m_cEncLib.setKneeSEIInputDispLuminance ( m_kneeSEIInputDispLuminance ); - m_cEncLib.setKneeSEIOutputDrange ( m_kneeSEIOutputDrange ); - m_cEncLib.setKneeSEIOutputDispLuminance ( m_kneeSEIOutputDispLuminance ); - m_cEncLib.setKneeSEINumKneePointsMinus1 ( m_kneeSEINumKneePointsMinus1 ); - m_cEncLib.setKneeSEIInputKneePoint ( m_kneeSEIInputKneePoint ); - m_cEncLib.setKneeSEIOutputKneePoint ( m_kneeSEIOutputKneePoint ); - m_cEncLib.setColourRemapInfoSEIFileRoot ( m_colourRemapSEIFileRoot ); m_cEncLib.setMasteringDisplaySEI ( m_masteringDisplay ); #if U0033_ALTERNATIVE_TRANSFER_CHARACTERISTICS_SEI m_cEncLib.setSEIAlternativeTransferCharacteristicsSEIEnable ( m_preferredTransferCharacteristics>=0 ); m_cEncLib.setSEIPreferredTransferCharacteristics ( uint8_t(m_preferredTransferCharacteristics) ); #endif - m_cEncLib.setSEIGreenMetadataInfoSEIEnable ( m_greenMetadataType > 0 ); - m_cEncLib.setSEIGreenMetadataType ( uint8_t(m_greenMetadataType) ); - m_cEncLib.setSEIXSDMetricType ( uint8_t(m_xsdMetricType) ); - -#if HEVC_TILES_WPP - m_cEncLib.setTileUniformSpacingFlag ( m_tileUniformSpacingFlag ); - m_cEncLib.setNumColumnsMinus1 ( m_numTileColumnsMinus1 ); - m_cEncLib.setNumRowsMinus1 ( m_numTileRowsMinus1 ); - if(!m_tileUniformSpacingFlag) - { - m_cEncLib.setColumnWidth ( m_tileColumnWidth ); - m_cEncLib.setRowHeight ( m_tileRowHeight ); + // film grain charcteristics + m_cEncLib.setFilmGrainCharactersticsSEIEnabled (m_fgcSEIEnabled); + m_cEncLib.setFilmGrainCharactersticsSEICancelFlag (m_fgcSEICancelFlag); + m_cEncLib.setFilmGrainCharactersticsSEIPersistenceFlag (m_fgcSEIPersistenceFlag); + m_cEncLib.setFilmGrainCharactersticsSEIModelID ((uint8_t)m_fgcSEIModelID); + m_cEncLib.setFilmGrainCharactersticsSEISepColourDescPresent (m_fgcSEISepColourDescPresentFlag); + m_cEncLib.setFilmGrainCharactersticsSEIBlendingModeID ((uint8_t)m_fgcSEIBlendingModeID); + m_cEncLib.setFilmGrainCharactersticsSEILog2ScaleFactor ((uint8_t)m_fgcSEILog2ScaleFactor); + for (int i = 0; i < MAX_NUM_COMPONENT; i++) { + m_cEncLib.setFGCSEICompModelPresent (m_fgcSEICompModelPresent[i], i); } -#endif - m_cEncLib.xCheckGSParameters(); -#if HEVC_TILES_WPP - int uiTilesCount = (m_numTileRowsMinus1+1) * (m_numTileColumnsMinus1+1); - if(uiTilesCount == 1) - { - m_bLFCrossTileBoundaryFlag = true; + // content light level + m_cEncLib.setCLLSEIEnabled (m_cllSEIEnabled); + m_cEncLib.setCLLSEIMaxContentLightLevel ((uint16_t)m_cllSEIMaxContentLevel); + m_cEncLib.setCLLSEIMaxPicAvgLightLevel ((uint16_t)m_cllSEIMaxPicAvgLevel); + // ambient viewing enviornment + m_cEncLib.setAmbientViewingEnvironmentSEIEnabled (m_aveSEIEnabled); + m_cEncLib.setAmbientViewingEnvironmentSEIIlluminance (m_aveSEIAmbientIlluminance); + m_cEncLib.setAmbientViewingEnvironmentSEIAmbientLightX ((uint16_t)m_aveSEIAmbientLightX); + m_cEncLib.setAmbientViewingEnvironmentSEIAmbientLightY ((uint16_t)m_aveSEIAmbientLightY); + // content colour volume SEI + m_cEncLib.setCcvSEIEnabled (m_ccvSEIEnabled); + m_cEncLib.setCcvSEICancelFlag (m_ccvSEICancelFlag); + m_cEncLib.setCcvSEIPersistenceFlag (m_ccvSEIPersistenceFlag); + m_cEncLib.setCcvSEIEnabled (m_ccvSEIEnabled); + m_cEncLib.setCcvSEICancelFlag (m_ccvSEICancelFlag); + m_cEncLib.setCcvSEIPersistenceFlag (m_ccvSEIPersistenceFlag); + m_cEncLib.setCcvSEIPrimariesPresentFlag (m_ccvSEIPrimariesPresentFlag); + m_cEncLib.setCcvSEIMinLuminanceValuePresentFlag (m_ccvSEIMinLuminanceValuePresentFlag); + m_cEncLib.setCcvSEIMaxLuminanceValuePresentFlag (m_ccvSEIMaxLuminanceValuePresentFlag); + m_cEncLib.setCcvSEIAvgLuminanceValuePresentFlag (m_ccvSEIAvgLuminanceValuePresentFlag); + for(int i = 0; i < MAX_NUM_COMPONENT; i++) { + m_cEncLib.setCcvSEIPrimariesX (m_ccvSEIPrimariesX[i], i); + m_cEncLib.setCcvSEIPrimariesY (m_ccvSEIPrimariesY[i], i); } - m_cEncLib.setLFCrossTileBoundaryFlag ( m_bLFCrossTileBoundaryFlag ); + m_cEncLib.setCcvSEIMinLuminanceValue (m_ccvSEIMinLuminanceValue); + m_cEncLib.setCcvSEIMaxLuminanceValue (m_ccvSEIMaxLuminanceValue); + m_cEncLib.setCcvSEIAvgLuminanceValue (m_ccvSEIAvgLuminanceValue); m_cEncLib.setEntropyCodingSyncEnabledFlag ( m_entropyCodingSyncEnabledFlag ); -#endif m_cEncLib.setTMVPModeId ( m_TMVPModeId ); -#if HEVC_USE_SCALING_LISTS + m_cEncLib.setSliceLevelRpl ( m_sliceLevelRpl ); + m_cEncLib.setSliceLevelDblk ( m_sliceLevelDblk ); + m_cEncLib.setSliceLevelSao ( m_sliceLevelSao ); + m_cEncLib.setSliceLevelAlf ( m_sliceLevelAlf ); + m_cEncLib.setConstantSliceHeaderParamsEnabledFlag ( m_constantSliceHeaderParamsEnabledFlag ); + m_cEncLib.setPPSDepQuantEnabledIdc ( m_PPSDepQuantEnabledIdc ); + m_cEncLib.setPPSRefPicListSPSIdc0 ( m_PPSRefPicListSPSIdc0 ); + m_cEncLib.setPPSRefPicListSPSIdc1 ( m_PPSRefPicListSPSIdc1 ); + m_cEncLib.setPPSMvdL1ZeroIdc ( m_PPSMvdL1ZeroIdc ); + m_cEncLib.setPPSCollocatedFromL0Idc ( m_PPSCollocatedFromL0Idc ); + m_cEncLib.setPPSSixMinusMaxNumMergeCandPlus1 ( m_PPSSixMinusMaxNumMergeCandPlus1 ); + m_cEncLib.setPPSMaxNumMergeCandMinusMaxNumTriangleCandPlus1 ( m_PPSMaxNumMergeCandMinusMaxNumTriangleCandPlus1 ); m_cEncLib.setUseScalingListId ( m_useScalingListId ); m_cEncLib.setScalingListFileName ( m_scalingListFileName ); -#endif + m_cEncLib.setDisableScalingMatrixForLfnstBlks ( m_disableScalingMatrixForLfnstBlks); m_cEncLib.setDepQuantEnabledFlag ( m_depQuantEnabledFlag); -#if HEVC_USE_SIGN_HIDING m_cEncLib.setSignDataHidingEnabledFlag ( m_signDataHidingEnabledFlag); -#endif m_cEncLib.setUseRateCtrl ( m_RCEnableRateControl ); m_cEncLib.setTargetBitrate ( m_RCTargetBitrate ); m_cEncLib.setKeepHierBit ( m_RCKeepHierarchicalBit ); @@ -502,24 +712,17 @@ void EncApp::xInitLibCfg() m_cEncLib.setCpbSize ( m_RCCpbSize ); m_cEncLib.setInitialCpbFullness ( m_RCInitialCpbFullness ); #endif - m_cEncLib.setTransquantBypassEnabledFlag ( m_TransquantBypassEnabledFlag ); - m_cEncLib.setCUTransquantBypassFlagForceValue ( m_CUTransquantBypassFlagForce ); m_cEncLib.setCostMode ( m_costMode ); m_cEncLib.setUseRecalculateQPAccordingToLambda ( m_recalculateQPAccordingToLambda ); -#if HEVC_USE_INTRA_SMOOTHING_T32 || HEVC_USE_INTRA_SMOOTHING_T64 - m_cEncLib.setUseStrongIntraSmoothing ( m_useStrongIntraSmoothing ); -#endif + m_cEncLib.setDecodingParameterSetEnabled ( m_decodingParameterSetEnabled ); +#if HEVC_SEI m_cEncLib.setActiveParameterSetsSEIEnabled ( m_activeParameterSetsSEIEnabled ); +#endif m_cEncLib.setVuiParametersPresentFlag ( m_vuiParametersPresentFlag ); m_cEncLib.setAspectRatioInfoPresentFlag ( m_aspectRatioInfoPresentFlag); m_cEncLib.setAspectRatioIdc ( m_aspectRatioIdc ); m_cEncLib.setSarWidth ( m_sarWidth ); m_cEncLib.setSarHeight ( m_sarHeight ); - m_cEncLib.setOverscanInfoPresentFlag ( m_overscanInfoPresentFlag ); - m_cEncLib.setOverscanAppropriateFlag ( m_overscanAppropriateFlag ); - m_cEncLib.setVideoSignalTypePresentFlag ( m_videoSignalTypePresentFlag ); - m_cEncLib.setVideoFormat ( m_videoFormat ); - m_cEncLib.setVideoFullRangeFlag ( m_videoFullRangeFlag ); m_cEncLib.setColourDescriptionPresentFlag ( m_colourDescriptionPresentFlag ); m_cEncLib.setColourPrimaries ( m_colourPrimaries ); m_cEncLib.setTransferCharacteristics ( m_transferCharacteristics ); @@ -527,21 +730,10 @@ void EncApp::xInitLibCfg() m_cEncLib.setChromaLocInfoPresentFlag ( m_chromaLocInfoPresentFlag ); m_cEncLib.setChromaSampleLocTypeTopField ( m_chromaSampleLocTypeTopField ); m_cEncLib.setChromaSampleLocTypeBottomField ( m_chromaSampleLocTypeBottomField ); - m_cEncLib.setNeutralChromaIndicationFlag ( m_neutralChromaIndicationFlag ); - m_cEncLib.setDefaultDisplayWindow ( m_defDispWinLeftOffset, m_defDispWinRightOffset, m_defDispWinTopOffset, m_defDispWinBottomOffset ); - m_cEncLib.setFrameFieldInfoPresentFlag ( m_frameFieldInfoPresentFlag ); - m_cEncLib.setPocProportionalToTimingFlag ( m_pocProportionalToTimingFlag ); - m_cEncLib.setNumTicksPocDiffOneMinus1 ( m_numTicksPocDiffOneMinus1 ); - m_cEncLib.setBitstreamRestrictionFlag ( m_bitstreamRestrictionFlag ); -#if HEVC_TILES_WPP - m_cEncLib.setTilesFixedStructureFlag ( m_tilesFixedStructureFlag ); -#endif - m_cEncLib.setMotionVectorsOverPicBoundariesFlag ( m_motionVectorsOverPicBoundariesFlag ); - m_cEncLib.setMinSpatialSegmentationIdc ( m_minSpatialSegmentationIdc ); - m_cEncLib.setMaxBytesPerPicDenom ( m_maxBytesPerPicDenom ); - m_cEncLib.setMaxBitsPerMinCuDenom ( m_maxBitsPerMinCuDenom ); - m_cEncLib.setLog2MaxMvLengthHorizontal ( m_log2MaxMvLengthHorizontal ); - m_cEncLib.setLog2MaxMvLengthVertical ( m_log2MaxMvLengthVertical ); + m_cEncLib.setChromaSampleLocType ( m_chromaSampleLocType ); + m_cEncLib.setOverscanInfoPresentFlag ( m_overscanInfoPresentFlag ); + m_cEncLib.setOverscanAppropriateFlag ( m_overscanAppropriateFlag ); + m_cEncLib.setVideoFullRangeFlag ( m_videoFullRangeFlag ); m_cEncLib.setEfficientFieldIRAPEnabled ( m_bEfficientFieldIRAPEnabled ); m_cEncLib.setHarmonizeGopFirstFieldCoupleEnabled ( m_bHarmonizeGopFirstFieldCoupleEnabled ); m_cEncLib.setSummaryOutFilename ( m_summaryOutFilename ); @@ -561,29 +753,47 @@ void EncApp::xInitLibCfg() #if ENABLE_SPLIT_PARALLELISM m_cEncLib.setNumSplitThreads ( m_numSplitThreads ); m_cEncLib.setForceSingleSplitThread ( m_forceSplitSequential ); -#endif -#if ENABLE_WPP_PARALLELISM - m_cEncLib.setNumWppThreads ( m_numWppThreads ); - m_cEncLib.setNumWppExtraLines ( m_numWppExtraLines ); - m_cEncLib.setEnsureWppBitEqual ( m_ensureWppBitEqual ); - #endif m_cEncLib.setUseALF ( m_alf ); - m_cEncLib.setReshaper ( m_lumaReshapeEnable ); + m_cEncLib.setLmcs ( m_lmcsEnabled ); m_cEncLib.setReshapeSignalType ( m_reshapeSignalType ); m_cEncLib.setReshapeIntraCMD ( m_intraCMD ); m_cEncLib.setReshapeCW ( m_reshapeCW ); + m_cEncLib.setReshapeCSoffset ( m_CSoffset ); + +#if JVET_O0756_CALCULATE_HDRMETRICS + for (int i=0; i<hdrtoolslib::NB_REF_WHITE; i++) + { + m_cEncLib.setWhitePointDeltaE (i, m_whitePointDeltaE[i] ); + } + m_cEncLib.setMaxSampleValue (m_maxSampleValue); + m_cEncLib.setSampleRange (m_sampleRange); + m_cEncLib.setColorPrimaries (m_colorPrimaries); + m_cEncLib.setEnableTFunctionLUT (m_enableTFunctionLUT); + for (int i=0; i<2; i++) + { + m_cEncLib.setChromaLocation (i, m_chromaLocation); + m_cEncLib.setChromaUPFilter (m_chromaUPFilter); + } + m_cEncLib.setCropOffsetLeft (m_cropOffsetLeft); + m_cEncLib.setCropOffsetTop (m_cropOffsetTop); + m_cEncLib.setCropOffsetRight (m_cropOffsetRight); + m_cEncLib.setCropOffsetBottom (m_cropOffsetBottom); + m_cEncLib.setCalculateHdrMetrics (m_calculateHdrMetrics); +#endif + m_cEncLib.setGopBasedTemporalFilterEnabled(m_gopBasedTemporalFilterEnabled); + m_cEncLib.setNumRefLayers ( m_numRefLayers ); } -void EncApp::xCreateLib( std::list<PelUnitBuf*>& recBufList - ) +void EncApp::xCreateLib( std::list<PelUnitBuf*>& recBufList, const int layerId ) { // Video I/O m_cVideoIOYuvInputFile.open( m_inputFileName, false, m_inputBitDepth, m_MSBExtendedBitDepth, m_internalBitDepth ); // read mode #if EXTENSION_360_VIDEO m_cVideoIOYuvInputFile.skipFrames(m_FrameSkip, m_inputFileWidth, m_inputFileHeight, m_InputChromaFormatIDC); #else - m_cVideoIOYuvInputFile.skipFrames(m_FrameSkip, m_iSourceWidth - m_aiPad[0], m_iSourceHeight - m_aiPad[1], m_InputChromaFormatIDC); + const int sourceHeight = m_isField ? m_iSourceHeightOrg : m_iSourceHeight; + m_cVideoIOYuvInputFile.skipFrames(m_FrameSkip, m_iSourceWidth - m_aiPad[0], sourceHeight - m_aiPad[1], m_InputChromaFormatIDC); #endif if (!m_reconFileName.empty()) { @@ -598,11 +808,24 @@ void EncApp::xCreateLib( std::list<PelUnitBuf*>& recBufList EXIT ("Invalid chroma output bit-depth or image width for packed YUV output, aborting\n"); } - m_cVideoIOYuvReconFile.open(m_reconFileName, true, m_outputBitDepth, m_outputBitDepth, m_internalBitDepth); // write mode + std::string reconFileName = m_reconFileName; + if( m_reconFileName.compare( "/dev/null" ) && (m_maxLayers > 1) ) + { + size_t pos = reconFileName.find_last_of('.'); + if (pos != string::npos) + { + reconFileName.insert( pos, std::to_string( layerId ) ); + } + else + { + reconFileName.append( std::to_string( layerId ) ); + } + } + m_cVideoIOYuvReconFile.open( reconFileName, true, m_outputBitDepth, m_outputBitDepth, m_internalBitDepth ); // write mode } // create the encoder - m_cEncLib.create(); + m_cEncLib.create( layerId ); // create the output buffer for( int i = 0; i < (m_iGOPSize + 1 + (m_isField ? 1 : 0)); i++ ) @@ -630,128 +853,172 @@ void EncApp::xInitLib(bool isFieldCoding) // Public member functions // ==================================================================================================================== -/** - - create internal class - - initialize internal variable - - until the end of input YUV file, call encoding function in EncLib class - - delete allocated buffers - - destroy internal class - . - */ -void EncApp::encode() +void EncApp::createLib( const int layerId ) { - m_bitstream.open(m_bitstreamFileName.c_str(), fstream::binary | fstream::out); - if (!m_bitstream) + const int sourceHeight = m_isField ? m_iSourceHeightOrg : m_iSourceHeight; + UnitArea unitArea( m_chromaFormatIDC, Area( 0, 0, m_iSourceWidth, sourceHeight ) ); + + m_orgPic = new PelStorage; + m_trueOrgPic = new PelStorage; + m_orgPic->create( unitArea ); + m_trueOrgPic->create( unitArea ); + + if( !m_bitstream.is_open() ) { - EXIT( "Failed to open bitstream file " << m_bitstreamFileName.c_str() << " for writing\n"); + m_bitstream.open( m_bitstreamFileName.c_str(), fstream::binary | fstream::out ); + if( !m_bitstream ) + { + EXIT( "Failed to open bitstream file " << m_bitstreamFileName.c_str() << " for writing\n" ); + } } - std::list<PelUnitBuf*> recBufList; // initialize internal class & member variables xInitLibCfg(); - xCreateLib( recBufList - ); - xInitLib(m_isField); + xCreateLib( m_recBufList, layerId ); + xInitLib( m_isField ); printChromaFormat(); - // main encoder loop - int iNumEncoded = 0; - bool bEos = false; +#if EXTENSION_360_VIDEO + m_ext360 = new TExt360AppEncTop( *this, m_cEncLib.getGOPEncoder()->getExt360Data(), *( m_cEncLib.getGOPEncoder() ), *m_orgPic ); +#endif - const InputColourSpaceConversion ipCSC = m_inputColourSpaceConvert; - const InputColourSpaceConversion snrCSC = (!m_snrInternalColourSpace) ? m_inputColourSpaceConvert : IPCOLOURSPACE_UNCHANGED; + if( m_gopBasedTemporalFilterEnabled ) + { + m_temporalFilter.init( m_FrameSkip, m_inputBitDepth, m_MSBExtendedBitDepth, m_internalBitDepth, m_iSourceWidth, m_iSourceHeight, + m_aiPad, m_bClipInputVideoToRec709Range, m_inputFileName, m_chromaFormatIDC, + m_inputColourSpaceConvert, m_iQP, m_gopBasedTemporalFilterStrengths, + m_gopBasedTemporalFilterFutureReference ); + } +} - PelStorage trueOrgPic; - PelStorage orgPic; - const int sourceHeight = m_isField ? m_iSourceHeightOrg : m_iSourceHeight; - UnitArea unitArea( m_chromaFormatIDC, Area( 0, 0, m_iSourceWidth, sourceHeight ) ); +void EncApp::destroyLib() +{ + printf( "\nLayerId %2d", m_cEncLib.getLayerId() ); + + m_cEncLib.printSummary( m_isField ); - orgPic.create( unitArea ); - trueOrgPic.create( unitArea ); + // delete used buffers in encoder class + m_cEncLib.deletePicBuffer(); + + for( auto &p : m_recBufList ) + { + delete p; + } + m_recBufList.clear(); + + xDestroyLib(); + + if( m_bitstream.is_open() ) + { + m_bitstream.close(); + } + + m_orgPic->destroy(); + m_trueOrgPic->destroy(); + delete m_trueOrgPic; + delete m_orgPic; #if EXTENSION_360_VIDEO - TExt360AppEncTop ext360(*this, m_cEncLib.getGOPEncoder()->getExt360Data(), *(m_cEncLib.getGOPEncoder()), orgPic); + delete m_ext360; #endif - while ( !bEos ) - { - // read input YUV file + printRateSummary(); +} + +bool EncApp::encodePrep( bool& eos ) +{ + // main encoder loop + const InputColourSpaceConversion ipCSC = m_inputColourSpaceConvert; + const InputColourSpaceConversion snrCSC = ( !m_snrInternalColourSpace ) ? m_inputColourSpaceConvert : IPCOLOURSPACE_UNCHANGED; + + // read input YUV file #if EXTENSION_360_VIDEO - if (ext360.isEnabled()) - { - ext360.read(m_cVideoIOYuvInputFile, orgPic, trueOrgPic, ipCSC); - } - else - { - m_cVideoIOYuvInputFile.read(orgPic, trueOrgPic, ipCSC, m_aiPad, m_InputChromaFormatIDC, m_bClipInputVideoToRec709Range); - } + if( m_ext360->isEnabled() ) + { + m_ext360->read( m_cVideoIOYuvInputFile, *m_orgPic, *m_trueOrgPic, ipCSC ); + } + else + { + m_cVideoIOYuvInputFile.read( *m_orgPic, *m_trueOrgPic, ipCSC, m_aiPad, m_InputChromaFormatIDC, m_bClipInputVideoToRec709Range ); + } #else - m_cVideoIOYuvInputFile.read( orgPic, trueOrgPic, ipCSC, m_aiPad, m_InputChromaFormatIDC, m_bClipInputVideoToRec709Range ); + m_cVideoIOYuvInputFile.read( *m_orgPic, *m_trueOrgPic, ipCSC, m_aiPad, m_InputChromaFormatIDC, m_bClipInputVideoToRec709Range ); #endif - // increase number of received frames - m_iFrameRcvd++; + if( m_gopBasedTemporalFilterEnabled ) + { + m_temporalFilter.filter( m_orgPic, m_iFrameRcvd ); + } + + // increase number of received frames + m_iFrameRcvd++; - bEos = (m_isField && (m_iFrameRcvd == (m_framesToBeEncoded >> 1) )) || ( !m_isField && (m_iFrameRcvd == m_framesToBeEncoded) ); + eos = ( m_isField && ( m_iFrameRcvd == ( m_framesToBeEncoded >> 1 ) ) ) || ( !m_isField && ( m_iFrameRcvd == m_framesToBeEncoded ) ); - bool flush = 0; - // if end of file (which is only detected on a read failure) flush the encoder of any queued pictures - if (m_cVideoIOYuvInputFile.isEof()) - { - flush = true; - bEos = true; - m_iFrameRcvd--; - m_cEncLib.setFramesToBeEncoded(m_iFrameRcvd); - } + // if end of file (which is only detected on a read failure) flush the encoder of any queued pictures + if( m_cVideoIOYuvInputFile.isEof() ) + { + m_flush = true; + eos = true; + m_iFrameRcvd--; + m_cEncLib.setFramesToBeEncoded( m_iFrameRcvd ); + } - // call encoding function for one frame - if ( m_isField ) - { - m_cEncLib.encode( bEos, flush ? 0 : &orgPic, flush ? 0 : &trueOrgPic, snrCSC, recBufList, - iNumEncoded, m_isTopFieldFirst ); - } - else - { - m_cEncLib.encode( bEos, flush ? 0 : &orgPic, flush ? 0 : &trueOrgPic, snrCSC, recBufList, - iNumEncoded ); - } + bool keepDoing = false; + + // call encoding function for one frame + if( m_isField ) + { + keepDoing = m_cEncLib.encodePrep( eos, m_flush ? 0 : m_orgPic, m_flush ? 0 : m_trueOrgPic, snrCSC, m_recBufList, m_numEncoded, m_isTopFieldFirst ); + } + else + { + keepDoing = m_cEncLib.encodePrep( eos, m_flush ? 0 : m_orgPic, m_flush ? 0 : m_trueOrgPic, snrCSC, m_recBufList, m_numEncoded ); + } + + return keepDoing; +} + +bool EncApp::encode() +{ + const InputColourSpaceConversion snrCSC = ( !m_snrInternalColourSpace ) ? m_inputColourSpaceConvert : IPCOLOURSPACE_UNCHANGED; + bool keepDoing = false; + + // call encoding function for one frame + if( m_isField ) + { + keepDoing = m_cEncLib.encode( snrCSC, m_recBufList, m_numEncoded, m_isTopFieldFirst ); + } + else + { + keepDoing = m_cEncLib.encode( snrCSC, m_recBufList, m_numEncoded ); + } +#if JVET_O0756_CALCULATE_HDRMETRICS + m_metricTime = m_cEncLib.getMetricTime(); +#endif + + // output when the entire GOP was proccessed + if( !keepDoing ) + { // write bistream to file if necessary - if ( iNumEncoded > 0 ) + if( m_numEncoded > 0 ) { - xWriteOutput( iNumEncoded, recBufList - ); + xWriteOutput( m_numEncoded, m_recBufList ); } // temporally skip frames if( m_temporalSubsampleRatio > 1 ) { #if EXTENSION_360_VIDEO - m_cVideoIOYuvInputFile.skipFrames(m_temporalSubsampleRatio - 1, m_inputFileWidth, m_inputFileHeight, m_InputChromaFormatIDC); + m_cVideoIOYuvInputFile.skipFrames( m_temporalSubsampleRatio - 1, m_inputFileWidth, m_inputFileHeight, m_InputChromaFormatIDC ); #else - m_cVideoIOYuvInputFile.skipFrames(m_temporalSubsampleRatio-1, m_iSourceWidth - m_aiPad[0], m_iSourceHeight - m_aiPad[1], m_InputChromaFormatIDC); + const int sourceHeight = m_isField ? m_iSourceHeightOrg : m_iSourceHeight; + m_cVideoIOYuvInputFile.skipFrames( m_temporalSubsampleRatio - 1, m_iSourceWidth - m_aiPad[0], sourceHeight - m_aiPad[1], m_InputChromaFormatIDC ); #endif } } - m_cEncLib.printSummary(m_isField); - - - // delete used buffers in encoder class - m_cEncLib.deletePicBuffer(); - - for( auto &p : recBufList ) - { - delete p; - } - recBufList.clear(); - - xDestroyLib(); - - m_bitstream.close(); - - printRateSummary(); - - return; + return keepDoing; } // ==================================================================================================================== @@ -764,8 +1031,7 @@ void EncApp::encode() \param iNumEncoded number of encoded frames \param accessUnits list of access units to be written */ -void EncApp::xWriteOutput( int iNumEncoded, std::list<PelUnitBuf*>& recBufList - ) +void EncApp::xWriteOutput( int iNumEncoded, std::list<PelUnitBuf*>& recBufList ) { const InputColourSpaceConversion ipCSC = (!m_outputInternalColourSpace) ? m_inputColourSpaceConvert : IPCOLOURSPACE_UNCHANGED; std::list<PelUnitBuf*>::iterator iterPicYuvRec = recBufList.end(); @@ -800,10 +1066,18 @@ void EncApp::xWriteOutput( int iNumEncoded, std::list<PelUnitBuf*>& recBufList const PelUnitBuf* pcPicYuvRec = *(iterPicYuvRec++); if (!m_reconFileName.empty()) { - m_cVideoIOYuvReconFile.write( *pcPicYuvRec, - ipCSC, - m_packedYUVMode, - m_confWinLeft, m_confWinRight, m_confWinTop, m_confWinBottom, NUM_CHROMA_FORMAT, m_bClipOutputVideoToRec709Range ); + if( m_cEncLib.isRPREnabled() && m_cEncLib.getUpscaledOutput() ) + { + const SPS& sps = *m_cEncLib.getSPS( 0 ); + const PPS& pps = *m_cEncLib.getPPS( ( sps.getMaxPicWidthInLumaSamples() != pcPicYuvRec->get( COMPONENT_Y ).width || sps.getMaxPicHeightInLumaSamples() != pcPicYuvRec->get( COMPONENT_Y ).height ) ? ENC_PPS_ID_RPR : 0 ); + + m_cVideoIOYuvReconFile.writeUpscaledPicture( sps, pps, *pcPicYuvRec, ipCSC, m_packedYUVMode, m_cEncLib.getUpscaledOutput(), NUM_CHROMA_FORMAT, m_bClipOutputVideoToRec709Range ); + } + else + { + m_cVideoIOYuvReconFile.write( pcPicYuvRec->get( COMPONENT_Y ).width, pcPicYuvRec->get( COMPONENT_Y ).height, *pcPicYuvRec, ipCSC, m_packedYUVMode, + m_confWinLeft, m_confWinRight, m_confWinTop, m_confWinBottom, NUM_CHROMA_FORMAT, m_bClipOutputVideoToRec709Range ); + } } } } @@ -830,38 +1104,21 @@ void EncApp::rateStatsAccum(const AccessUnit& au, const std::vector<uint32_t>& a { switch ((*it_au)->m_nalUnitType) { -#if !JVET_M0101_HLS - case NAL_UNIT_CODED_SLICE_TRAIL_R: - case NAL_UNIT_CODED_SLICE_TRAIL_N: - case NAL_UNIT_CODED_SLICE_TSA_R: - case NAL_UNIT_CODED_SLICE_TSA_N: - case NAL_UNIT_CODED_SLICE_STSA_R: - case NAL_UNIT_CODED_SLICE_STSA_N: - case NAL_UNIT_CODED_SLICE_BLA_W_LP: - case NAL_UNIT_CODED_SLICE_BLA_W_RADL: - case NAL_UNIT_CODED_SLICE_BLA_N_LP: - case NAL_UNIT_CODED_SLICE_IDR_W_RADL: - case NAL_UNIT_CODED_SLICE_IDR_N_LP: - case NAL_UNIT_CODED_SLICE_CRA: - case NAL_UNIT_CODED_SLICE_RADL_N: - case NAL_UNIT_CODED_SLICE_RADL_R: - case NAL_UNIT_CODED_SLICE_RASL_N: - case NAL_UNIT_CODED_SLICE_RASL_R: -#else case NAL_UNIT_CODED_SLICE_TRAIL: case NAL_UNIT_CODED_SLICE_STSA: case NAL_UNIT_CODED_SLICE_IDR_W_RADL: case NAL_UNIT_CODED_SLICE_IDR_N_LP: case NAL_UNIT_CODED_SLICE_CRA: + case NAL_UNIT_CODED_SLICE_GDR: case NAL_UNIT_CODED_SLICE_RADL: case NAL_UNIT_CODED_SLICE_RASL: -#endif -#if HEVC_VPS + case NAL_UNIT_DPS: case NAL_UNIT_VPS: -#endif case NAL_UNIT_SPS: case NAL_UNIT_PPS: - case NAL_UNIT_APS: + case NAL_UNIT_PH: + case NAL_UNIT_PREFIX_APS: + case NAL_UNIT_SUFFIX_APS: m_essentialBytes += *it_stats; break; default: diff --git a/source/App/EncoderApp/EncApp.h b/source/App/EncoderApp/EncApp.h index 146e9dc4a30b209fdf0baf2f33d8aeea21a14177..c3d384d12326e31526ede5eabdf8e2eafb6a61f8 100644 --- a/source/App/EncoderApp/EncApp.h +++ b/source/App/EncoderApp/EncApp.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -45,6 +45,16 @@ #include "Utilities/VideoIOYuv.h" #include "CommonLib/NAL.h" #include "EncAppCfg.h" +#if EXTENSION_360_VIDEO +#include "AppEncHelper360/TExt360AppEncTop.h" +#endif +#include "EncoderLib/EncTemporalFilter.h" + +#if JVET_O0756_CALCULATE_HDRMETRICS +#include <chrono> +#endif + +class EncAppCommon; //! \ingroup EncoderApp //! \{ @@ -62,14 +72,16 @@ private: VideoIOYuv m_cVideoIOYuvInputFile; ///< input YUV file VideoIOYuv m_cVideoIOYuvReconFile; ///< output reconstruction file int m_iFrameRcvd; ///< number of received frames - uint32_t m_essentialBytes; - uint32_t m_totalBytes; - fstream m_bitstream; + uint32_t m_essentialBytes; + uint32_t m_totalBytes; + fstream& m_bitstream; +#if JVET_O0756_CALCULATE_HDRMETRICS + std::chrono::duration<long long, ratio<1, 1000000000>> m_metricTime; +#endif private: // initialization - void xCreateLib ( std::list<PelUnitBuf*>& recBufList - ); ///< create files & encoder class + void xCreateLib( std::list<PelUnitBuf*>& recBufList, const int layerId ); ///< create files & encoder class void xInitLibCfg (); ///< initialize internal variables void xInitLib (bool isFieldCoding); ///< initialize encoder class void xDestroyLib (); ///< destroy encoder class @@ -81,14 +93,33 @@ private: void printRateSummary (); void printChromaFormat(); + std::list<PelUnitBuf*> m_recBufList; + int m_numEncoded; + PelStorage* m_trueOrgPic; + PelStorage* m_orgPic; +#if EXTENSION_360_VIDEO + TExt360AppEncTop* m_ext360; +#endif + EncTemporalFilter m_temporalFilter; + bool m_flush; + public: - EncApp(); + EncApp( fstream& bitStream, EncLibCommon* encLibCommon ); virtual ~EncApp(); - void encode(); ///< main encoding function + int getMaxLayers() const { return m_maxLayers; } + void createLib( int layerId ); + void destroyLib(); + bool encodePrep( bool& eos ); + bool encode(); ///< main encoding function void outputAU( const AccessUnit& au ); +#if JVET_O0756_CALCULATE_HDRMETRICS + std::chrono::duration<long long, ratio<1, 1000000000>> getMetricTime() const { return m_metricTime; }; +#endif + + };// END CLASS DEFINITION EncApp //! \} diff --git a/source/App/EncoderApp/EncAppCfg.cpp b/source/App/EncoderApp/EncAppCfg.cpp index c1c6391d2a9e00f7fbb0ebd2e88d2eeed5dd164a..cdad7cff4d66dfd3bc7e3c999f4ffeb9b2a050e5 100644 --- a/source/App/EncoderApp/EncAppCfg.cpp +++ b/source/App/EncoderApp/EncAppCfg.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -60,39 +60,10 @@ namespace po = df::program_options_lite; enum ExtendedProfileName // this is used for determining profile strings, where multiple profiles map to a single profile idc with various constraint flag combinations { - NONE = 0, - MAIN = 1, - MAIN10 = 2, - MAINSTILLPICTURE = 3, - MAINREXT = 4, - HIGHTHROUGHPUTREXT = 5, // Placeholder profile for development - // The following are RExt profiles, which would map to the MAINREXT profile idc. - // The enumeration indicates the bit-depth constraint in the bottom 2 digits - // the chroma format in the next digit - // the intra constraint in the next digit - // If it is a RExt still picture, there is a '1' for the top digit. - MONOCHROME_8 = 1008, - MONOCHROME_12 = 1012, - MONOCHROME_16 = 1016, - MAIN_12 = 1112, - MAIN_422_10 = 1210, - MAIN_422_12 = 1212, - MAIN_444 = 1308, - MAIN_444_10 = 1310, - MAIN_444_12 = 1312, - MAIN_444_16 = 1316, // non-standard profile definition, used for development purposes - MAIN_INTRA = 2108, - MAIN_10_INTRA = 2110, - MAIN_12_INTRA = 2112, - MAIN_422_10_INTRA = 2210, - MAIN_422_12_INTRA = 2212, - MAIN_444_INTRA = 2308, - MAIN_444_10_INTRA = 2310, - MAIN_444_12_INTRA = 2312, - MAIN_444_16_INTRA = 2316, - MAIN_444_STILL_PICTURE = 11308, - MAIN_444_16_STILL_PICTURE = 12316, - NEXT = 6 + NONE = Profile::NONE, + MAIN_10 = Profile::MAIN_10, + MAIN_444_10 = Profile::MAIN_444_10, + AUTO = -1 }; @@ -113,33 +84,50 @@ EncAppCfg::EncAppCfg() , m_maxChromaFormatConstraintIdc(CHROMA_420) , m_bFrameConstraintFlag(false) , m_bNoQtbttDualTreeIntraConstraintFlag(false) +, m_noPartitionConstraintsOverrideConstraintFlag(false) , m_bNoSaoConstraintFlag(false) , m_bNoAlfConstraintFlag(false) -, m_bNoPcmConstraintFlag(false) , m_bNoRefWraparoundConstraintFlag(false) , m_bNoTemporalMvpConstraintFlag(false) , m_bNoSbtmvpConstraintFlag(false) , m_bNoAmvrConstraintFlag(false) , m_bNoBdofConstraintFlag(false) +, m_noDmvrConstraintFlag(false) , m_bNoCclmConstraintFlag(false) , m_bNoMtsConstraintFlag(false) +, m_noSbtConstraintFlag(false) , m_bNoAffineMotionConstraintFlag(false) -, m_bNoGbiConstraintFlag(false) -, m_bNoMhIntraConstraintFlag(false) +, m_bNoBcwConstraintFlag(false) +, m_noIbcConstraintFlag(false) +, m_bNoCiipConstraintFlag(false) +, m_noFPelMmvdConstraintFlag(false) , m_bNoTriangleConstraintFlag(false) , m_bNoLadfConstraintFlag(false) -, m_bNoCurrPicRefConstraintFlag(false) +, m_noTransformSkipConstraintFlag(false) +, m_noBDPCMConstraintFlag(false) +, m_noJointCbCrConstraintFlag(false) , m_bNoQpDeltaConstraintFlag(false) , m_bNoDepQuantConstraintFlag(false) , m_bNoSignDataHidingConstraintFlag(false) +, m_noTrailConstraintFlag(false) +, m_noStsaConstraintFlag(false) +, m_noRaslConstraintFlag(false) +, m_noRadlConstraintFlag(false) +, m_noIdrConstraintFlag(false) +, m_noCraConstraintFlag(false) +, m_noGdrConstraintFlag(false) +, m_noApsConstraintFlag(false) + #if EXTENSION_360_VIDEO , m_ext360(*this) #endif { m_aidQP = NULL; +#if HEVC_SEI m_startOfCodedInterval = NULL; m_codedPivotValue = NULL; m_targetPivotValue = NULL; +#endif } EncAppCfg::~EncAppCfg() @@ -148,6 +136,7 @@ EncAppCfg::~EncAppCfg() { delete[] m_aidQP; } +#if HEVC_SEI if ( m_startOfCodedInterval ) { delete[] m_startOfCodedInterval; @@ -163,6 +152,7 @@ EncAppCfg::~EncAppCfg() delete[] m_targetPivotValue; m_targetPivotValue = NULL; } +#endif #if ENABLE_TRACING tracing_uninit(g_trace_ctx); @@ -194,29 +184,24 @@ std::istringstream &operator>>(std::istringstream &in, GOPEntry &entry) //in in>>entry.m_tcOffsetDiv2; in>>entry.m_betaOffsetDiv2; in>>entry.m_temporalId; - in>>entry.m_numRefPicsActive; - in>>entry.m_numRefPics; - for ( int i = 0; i < entry.m_numRefPics; i++ ) - { - in>>entry.m_referencePics[i]; - } - in>>entry.m_interRPSPrediction; - if (entry.m_interRPSPrediction==1) + in >> entry.m_numRefPicsActive0; + in >> entry.m_numRefPics0; + for (int i = 0; i < entry.m_numRefPics0; i++) { - in>>entry.m_deltaRPS; - in>>entry.m_numRefIdc; - for ( int i = 0; i < entry.m_numRefIdc; i++ ) - { - in>>entry.m_refIdc[i]; - } + in >> entry.m_deltaRefPics0[i]; } - else if (entry.m_interRPSPrediction==2) + in >> entry.m_numRefPicsActive1; + in >> entry.m_numRefPics1; + for (int i = 0; i < entry.m_numRefPics1; i++) { - in>>entry.m_deltaRPS; + in >> entry.m_deltaRefPics1[i]; } + return in; } + + bool confirmPara(bool bflag, const char* message); static inline ChromaFormat numberToChromaFormat(const int val) @@ -239,12 +224,8 @@ static const struct MapStrToProfile strToProfile[] = { {"none", Profile::NONE }, - {"main", Profile::MAIN }, - {"main10", Profile::MAIN10 }, - {"main-still-picture", Profile::MAINSTILLPICTURE }, - {"main-RExt", Profile::MAINREXT }, - {"high-throughput-RExt", Profile::HIGHTHROUGHPUTREXT }, - {"next", Profile::NEXT } + {"main_10", Profile::MAIN_10 }, + {"main_444_10", Profile::MAIN_444_10 } }; static const struct MapStrToExtendedProfile @@ -255,57 +236,11 @@ static const struct MapStrToExtendedProfile strToExtendedProfile[] = { {"none", NONE }, - {"main", MAIN }, - {"main10", MAIN10 }, - {"main_still_picture", MAINSTILLPICTURE }, - {"main-still-picture", MAINSTILLPICTURE }, - {"main_RExt", MAINREXT }, - {"main-RExt", MAINREXT }, - {"main_rext", MAINREXT }, - {"main-rext", MAINREXT }, - {"high_throughput_RExt", HIGHTHROUGHPUTREXT }, - {"high-throughput-RExt", HIGHTHROUGHPUTREXT }, - {"high_throughput_rext", HIGHTHROUGHPUTREXT }, - {"high-throughput-rext", HIGHTHROUGHPUTREXT }, - {"monochrome", MONOCHROME_8 }, - {"monochrome12", MONOCHROME_12 }, - {"monochrome16", MONOCHROME_16 }, - {"main12", MAIN_12 }, - {"main_422_10", MAIN_422_10 }, - {"main_422_12", MAIN_422_12 }, - {"main_444", MAIN_444 }, + {"main_10", MAIN_10 }, {"main_444_10", MAIN_444_10 }, - {"main_444_12", MAIN_444_12 }, - {"main_444_16", MAIN_444_16 }, - {"main_intra", MAIN_INTRA }, - {"main_10_intra", MAIN_10_INTRA }, - {"main_12_intra", MAIN_12_INTRA }, - {"main_422_10_intra", MAIN_422_10_INTRA}, - {"main_422_12_intra", MAIN_422_12_INTRA}, - {"main_444_intra", MAIN_444_INTRA }, - {"main_444_still_picture", MAIN_444_STILL_PICTURE }, - {"main_444_10_intra", MAIN_444_10_INTRA}, - {"main_444_12_intra", MAIN_444_12_INTRA}, - {"main_444_16_intra", MAIN_444_16_INTRA}, - {"main_444_16_still_picture", MAIN_444_16_STILL_PICTURE }, - {"next", NEXT } + {"auto", AUTO } }; -static const ExtendedProfileName validRExtProfileNames[2/* intraConstraintFlag*/][4/* bit depth constraint 8=0, 10=1, 12=2, 16=3*/][4/*chroma format*/]= -{ - { - { MONOCHROME_8, NONE, NONE, MAIN_444 }, // 8-bit inter for 400, 420, 422 and 444 - { NONE, NONE, MAIN_422_10, MAIN_444_10 }, // 10-bit inter for 400, 420, 422 and 444 - { MONOCHROME_12, MAIN_12, MAIN_422_12, MAIN_444_12 }, // 12-bit inter for 400, 420, 422 and 444 - { MONOCHROME_16, NONE, NONE, MAIN_444_16 } // 16-bit inter for 400, 420, 422 and 444 (the latter is non standard used for development) - }, - { - { NONE, MAIN_INTRA, NONE, MAIN_444_INTRA }, // 8-bit intra for 400, 420, 422 and 444 - { NONE, MAIN_10_INTRA, MAIN_422_10_INTRA, MAIN_444_10_INTRA }, // 10-bit intra for 400, 420, 422 and 444 - { NONE, MAIN_12_INTRA, MAIN_422_12_INTRA, MAIN_444_12_INTRA }, // 12-bit intra for 400, 420, 422 and 444 - { NONE, NONE, NONE, MAIN_444_16_INTRA } // 16-bit intra for 400, 420, 422 and 444 - } -}; static const struct MapStrToTier { @@ -364,7 +299,6 @@ strToCostMode[] = {"mixed_lossless_lossy", COST_MIXED_LOSSLESS_LOSSY_CODING} }; -#if HEVC_USE_SCALING_LISTS static const struct MapStrToScalingListMode { const char* str; @@ -379,7 +313,6 @@ strToScalingListMode[] = {"default", SCALING_LIST_DEFAULT}, {"file", SCALING_LIST_FILE_READ} }; -#endif template<typename T, typename P> static std::string enumToString(P map[], uint32_t mapLen, const T val) @@ -439,12 +372,10 @@ static inline istream& operator >> (istream &in, CostMode &mode) return readStrToEnum(strToCostMode, sizeof(strToCostMode)/sizeof(*strToCostMode), in, mode); } -#if HEVC_USE_SCALING_LISTS static inline istream& operator >> (istream &in, ScalingListMode &mode) { return readStrToEnum(strToScalingListMode, sizeof(strToScalingListMode)/sizeof(*strToScalingListMode), in, mode); } -#endif template <class T> struct SMultiValueInput @@ -484,6 +415,16 @@ uint32_t SMultiValueInput<uint32_t>::readValue(const char *&pStr, bool &bSuccess return val; } +template<> +uint8_t SMultiValueInput<uint8_t>::readValue(const char *&pStr, bool &bSuccess) +{ + char *eptr; + uint32_t val = strtoul(pStr, &eptr, 0); + pStr = eptr; + bSuccess = !(*eptr != 0 && !isspace(*eptr) && *eptr != ',') && !(val<minValIncl || val>maxValIncl); + return val; +} + template<> int SMultiValueInput<int>::readValue(const char *&pStr, bool &bSuccess) { @@ -579,81 +520,109 @@ static inline istream& operator >> (std::istream &in, EncAppCfg::OptionalValue<T } #endif -static void -automaticallySelectRExtProfile(const bool bUsingGeneralRExtTools, - const bool bUsingChromaQPAdjustment, - const bool bUsingExtendedPrecision, - const bool bIntraConstraintFlag, - uint32_t &bitDepthConstraint, - ChromaFormat &chromaFormatConstraint, - const int maxBitDepth, - const ChromaFormat chromaFormat) +template <class T1, class T2> +static inline istream& operator >> (std::istream& in, std::map<T1, T2>& map) { - // Try to choose profile, according to table in Q1013. - uint32_t trialBitDepthConstraint=maxBitDepth; - if (trialBitDepthConstraint<8) - { - trialBitDepthConstraint=8; - } - else if (trialBitDepthConstraint==9 || trialBitDepthConstraint==11) + T1 key; + T2 value; + try { - trialBitDepthConstraint++; + in >> key; + in >> value; } - else if (trialBitDepthConstraint>12) + catch (...) { - trialBitDepthConstraint=16; + in.setstate(ios::failbit); } - // both format and bit depth constraints are unspecified - if (bUsingExtendedPrecision || trialBitDepthConstraint==16) - { - bitDepthConstraint = 16; - chromaFormatConstraint = (!bIntraConstraintFlag && chromaFormat==CHROMA_400) ? CHROMA_400 : CHROMA_444; - } - else if (bUsingGeneralRExtTools) - { - if (chromaFormat == CHROMA_400 && !bIntraConstraintFlag) - { - bitDepthConstraint = 16; - chromaFormatConstraint = CHROMA_400; - } - else - { - bitDepthConstraint = trialBitDepthConstraint; - chromaFormatConstraint = CHROMA_444; - } + map[key] = value; + return in; +} + + + +static uint32_t getMaxTileColsByLevel( Level::Name level ) +{ + switch( level ) + { + case Level::LEVEL1: + case Level::LEVEL2: + case Level::LEVEL2_1: + return 1; + case Level::LEVEL3: + return 2; + case Level::LEVEL3_1: + return 3; + case Level::LEVEL4: + case Level::LEVEL4_1: + return 5; + case Level::LEVEL5: + case Level::LEVEL5_1: + case Level::LEVEL5_2: + return 10; + case Level::LEVEL6: + case Level::LEVEL6_1: + case Level::LEVEL6_2: + default: + return 20; } - else if (chromaFormat == CHROMA_400) - { - if (bIntraConstraintFlag) - { - chromaFormatConstraint = CHROMA_420; // there is no intra 4:0:0 profile. - bitDepthConstraint = trialBitDepthConstraint; - } - else - { - chromaFormatConstraint = CHROMA_400; - bitDepthConstraint = trialBitDepthConstraint == 8 ? 8 : 12; - } +} + +static uint32_t getMaxTileRowsByLevel( Level::Name level ) +{ + switch( level ) + { + case Level::LEVEL1: + case Level::LEVEL2: + case Level::LEVEL2_1: + return 1; + case Level::LEVEL3: + return 2; + case Level::LEVEL3_1: + return 3; + case Level::LEVEL4: + case Level::LEVEL4_1: + return 5; + case Level::LEVEL5: + case Level::LEVEL5_1: + case Level::LEVEL5_2: + return 11; + case Level::LEVEL6: + case Level::LEVEL6_1: + case Level::LEVEL6_2: + default: + return 21; } - else - { - bitDepthConstraint = trialBitDepthConstraint; - chromaFormatConstraint = chromaFormat; - if (bUsingChromaQPAdjustment && chromaFormat == CHROMA_420) - { - chromaFormatConstraint = CHROMA_422; // 4:2:0 cannot use the chroma qp tool. - } - if (chromaFormatConstraint == CHROMA_422 && bitDepthConstraint == 8) - { - bitDepthConstraint = 10; // there is no 8-bit 4:2:2 profile. - } - if (chromaFormatConstraint == CHROMA_420 && !bIntraConstraintFlag) - { - bitDepthConstraint = 12; // there is no 8 or 10-bit 4:2:0 inter RExt profile. - } +} + +static uint32_t getMaxSlicesByLevel( Level::Name level ) +{ + switch( level ) + { + case Level::LEVEL1: + case Level::LEVEL2: + return 16; + case Level::LEVEL2_1: + return 20; + case Level::LEVEL3: + return 30; + case Level::LEVEL3_1: + return 40; + case Level::LEVEL4: + case Level::LEVEL4_1: + return 75; + case Level::LEVEL5: + case Level::LEVEL5_1: + case Level::LEVEL5_2: + return 200; + case Level::LEVEL6: + case Level::LEVEL6_1: + case Level::LEVEL6_2: + default: + return 600; } } + // ==================================================================================================================== // Public member functions // ==================================================================================================================== @@ -672,10 +641,6 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] ) int tmpWeightedPredictionMethod; int tmpFastInterSearchMode; int tmpMotionEstimationSearchMethod; - int tmpSliceMode; -#if HEVC_DEPENDENT_SLICES - int tmpSliceSegmentMode; -#endif int tmpDecodedPictureHashSEIMappedType; string inputColourSpaceConvert; string inputPathPrefix; @@ -683,12 +648,15 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] ) int saoOffsetBitShift[MAX_NUM_CHANNEL_TYPE]; // Multi-value input fields: // minval, maxval (incl), min_entries, max_entries (incl) [, default values, number of default values] - SMultiValueInput<uint32_t> cfg_ColumnWidth (0, std::numeric_limits<uint32_t>::max(), 0, std::numeric_limits<uint32_t>::max()); - SMultiValueInput<uint32_t> cfg_RowHeight (0, std::numeric_limits<uint32_t>::max(), 0, std::numeric_limits<uint32_t>::max()); + SMultiValueInput<uint32_t> cfgTileColumnWidth (0, std::numeric_limits<uint32_t>::max(), 0, std::numeric_limits<uint32_t>::max()); + SMultiValueInput<uint32_t> cfgTileRowHeight (0, std::numeric_limits<uint32_t>::max(), 0, std::numeric_limits<uint32_t>::max()); + SMultiValueInput<uint32_t> cfgRectSlicePos (0, std::numeric_limits<uint32_t>::max(), 0, std::numeric_limits<uint32_t>::max()); + SMultiValueInput<uint32_t> cfgRasterSliceSize (0, std::numeric_limits<uint32_t>::max(), 0, std::numeric_limits<uint32_t>::max()); SMultiValueInput<int> cfg_startOfCodedInterval (std::numeric_limits<int>::min(), std::numeric_limits<int>::max(), 0, 1<<16); SMultiValueInput<int> cfg_codedPivotValue (std::numeric_limits<int>::min(), std::numeric_limits<int>::max(), 0, 1<<16); SMultiValueInput<int> cfg_targetPivotValue (std::numeric_limits<int>::min(), std::numeric_limits<int>::max(), 0, 1<<16); + SMultiValueInput<double> cfg_adIntraLambdaModifier (0, std::numeric_limits<double>::max(), 0, MAX_TLAYER); ///< Lambda modifier for Intra pictures, one for each temporal layer. If size>temporalLayer, then use [temporalLayer], else if size>0, use [size()-1], else use m_adLambdaModifier. #if SHARP_LUMA_DELTA_QP @@ -698,7 +666,15 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] ) SMultiValueInput<int> cfg_lumaLeveltoDQPMappingLuma (0, std::numeric_limits<int>::max(), 0, LUMA_LEVEL_TO_DQP_LUT_MAXSIZE, defaultLumaLevelTodQp_LumaChangePoints, sizeof(defaultLumaLevelTodQp_LumaChangePoints)/sizeof(int)); uint32_t lumaLevelToDeltaQPMode; #endif - + const int qpInVals[] = { 25, 33, 43 }; // qpInVal values used to derive the chroma QP mapping table used in VTM-5.0 + const int qpOutVals[] = { 25, 32, 37 }; // qpOutVal values used to derive the chroma QP mapping table used in VTM-5.0 + SMultiValueInput<int> cfg_qpInValCb (MIN_QP_VALUE_FOR_16_BIT, MAX_QP, 0, MAX_NUM_QP_VALUES, qpInVals, sizeof(qpInVals)/sizeof(int)); + SMultiValueInput<int> cfg_qpOutValCb (MIN_QP_VALUE_FOR_16_BIT, MAX_QP, 0, MAX_NUM_QP_VALUES, qpOutVals, sizeof(qpOutVals) / sizeof(int)); + const int zeroVector[] = { 0 }; + SMultiValueInput<int> cfg_qpInValCr (MIN_QP_VALUE_FOR_16_BIT, MAX_QP, 0, MAX_NUM_QP_VALUES, zeroVector, 1); + SMultiValueInput<int> cfg_qpOutValCr (MIN_QP_VALUE_FOR_16_BIT, MAX_QP, 0, MAX_NUM_QP_VALUES, zeroVector, 1); + SMultiValueInput<int> cfg_qpInValCbCr (MIN_QP_VALUE_FOR_16_BIT, MAX_QP, 0, MAX_NUM_QP_VALUES, zeroVector, 1); + SMultiValueInput<int> cfg_qpOutValCbCr (MIN_QP_VALUE_FOR_16_BIT, MAX_QP, 0, MAX_NUM_QP_VALUES, zeroVector, 1); const uint32_t defaultInputKneeCodes[3] = { 600, 800, 900 }; const uint32_t defaultOutputKneeCodes[3] = { 100, 250, 450 }; SMultiValueInput<uint32_t> cfg_kneeSEIInputKneePointValue (1, 999, 0, 999, defaultInputKneeCodes, sizeof(defaultInputKneeCodes )/sizeof(uint32_t)); @@ -723,12 +699,50 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] ) SMultiValueInput<bool> cfg_timeCodeSeiHoursFlag (0, 1, 0, MAX_TIMECODE_SEI_SETS); SMultiValueInput<int> cfg_timeCodeSeiTimeOffsetLength (0, 31, 0, MAX_TIMECODE_SEI_SETS); SMultiValueInput<int> cfg_timeCodeSeiTimeOffsetValue (std::numeric_limits<int>::min(), std::numeric_limits<int>::max(), 0, MAX_TIMECODE_SEI_SETS); + SMultiValueInput<int> cfg_omniViewportSEIAzimuthCentre (-11796480, 11796479, 0, 15); + SMultiValueInput<int> cfg_omniViewportSEIElevationCentre ( -5898240, 5898240, 0, 15); + SMultiValueInput<int> cfg_omniViewportSEITiltCentre (-11796480, 11796479, 0, 15); + SMultiValueInput<uint32_t> cfg_omniViewportSEIHorRange ( 1, 23592960, 0, 15); + SMultiValueInput<uint32_t> cfg_omniViewportSEIVerRange ( 1, 11796480, 0, 15); + SMultiValueInput<uint32_t> cfg_rwpSEIRwpTransformType (0, 7, 0, std::numeric_limits<uint8_t>::max()); + SMultiValueInput<bool> cfg_rwpSEIRwpGuardBandFlag (0, 1, 0, std::numeric_limits<uint8_t>::max()); + SMultiValueInput<uint32_t> cfg_rwpSEIProjRegionWidth (0, std::numeric_limits<uint32_t>::max(), 0, std::numeric_limits<uint8_t>::max()); + SMultiValueInput<uint32_t> cfg_rwpSEIProjRegionHeight (0, std::numeric_limits<uint32_t>::max(), 0, std::numeric_limits<uint8_t>::max()); + SMultiValueInput<uint32_t> cfg_rwpSEIRwpSEIProjRegionTop (0, std::numeric_limits<uint32_t>::max(), 0, std::numeric_limits<uint8_t>::max()); + SMultiValueInput<uint32_t> cfg_rwpSEIProjRegionLeft (0, std::numeric_limits<uint32_t>::max(), 0, std::numeric_limits<uint8_t>::max()); + SMultiValueInput<uint32_t> cfg_rwpSEIPackedRegionWidth (0, std::numeric_limits<uint16_t>::max(), 0, std::numeric_limits<uint8_t>::max()); + SMultiValueInput<uint32_t> cfg_rwpSEIPackedRegionHeight (0, std::numeric_limits<uint16_t>::max(), 0, std::numeric_limits<uint8_t>::max()); + SMultiValueInput<uint32_t> cfg_rwpSEIPackedRegionTop (0, std::numeric_limits<uint16_t>::max(), 0, std::numeric_limits<uint8_t>::max()); + SMultiValueInput<uint32_t> cfg_rwpSEIPackedRegionLeft (0, std::numeric_limits<uint16_t>::max(), 0, std::numeric_limits<uint8_t>::max()); + SMultiValueInput<uint32_t> cfg_rwpSEIRwpLeftGuardBandWidth (0, std::numeric_limits<uint8_t>::max(), 0, std::numeric_limits<uint8_t>::max()); + SMultiValueInput<uint32_t> cfg_rwpSEIRwpRightGuardBandWidth (0, std::numeric_limits<uint8_t>::max(), 0, std::numeric_limits<uint8_t>::max()); + SMultiValueInput<uint32_t> cfg_rwpSEIRwpTopGuardBandHeight (0, std::numeric_limits<uint8_t>::max(), 0, std::numeric_limits<uint8_t>::max()); + SMultiValueInput<uint32_t> cfg_rwpSEIRwpBottomGuardBandHeight (0, std::numeric_limits<uint8_t>::max(), 0, std::numeric_limits<uint8_t>::max()); + SMultiValueInput<bool> cfg_rwpSEIRwpGuardBandNotUsedForPredFlag (0, 1, 0, std::numeric_limits<uint8_t>::max()); + SMultiValueInput<uint32_t> cfg_rwpSEIRwpGuardBandType (0, 7, 0, 4*std::numeric_limits<uint8_t>::max()); + SMultiValueInput<uint32_t> cfg_gcmpSEIFaceIndex (0, 5, 5, 6); + SMultiValueInput<uint32_t> cfg_gcmpSEIFaceRotation (0, 3, 5, 6); + SMultiValueInput<double> cfg_gcmpSEIFunctionCoeffU (0.0, 1.0, 5, 6); + SMultiValueInput<uint32_t> cfg_gcmpSEIFunctionUAffectedByVFlag (0, 1, 5, 6); + SMultiValueInput<double> cfg_gcmpSEIFunctionCoeffV (0.0, 1.0, 5, 6); + SMultiValueInput<uint32_t> cfg_gcmpSEIFunctionVAffectedByUFlag (0, 1, 5, 6); #if LUMA_ADAPTIVE_DEBLOCKING_FILTER_QP_OFFSET const int defaultLadfQpOffset[3] = { 1, 0, 1 }; const int defaultLadfIntervalLowerBound[2] = { 350, 833 }; SMultiValueInput<int> cfg_LadfQpOffset ( -MAX_QP, MAX_QP, 2, MAX_LADF_INTERVALS, defaultLadfQpOffset, 3 ); SMultiValueInput<int> cfg_LadfIntervalLowerBound ( 0, std::numeric_limits<int>::max(), 1, MAX_LADF_INTERVALS - 1, defaultLadfIntervalLowerBound, 2 ); #endif + SMultiValueInput<unsigned> cfg_virtualBoundariesPosX (0, std::numeric_limits<uint32_t>::max(), 0, 3); + SMultiValueInput<unsigned> cfg_virtualBoundariesPosY (0, std::numeric_limits<uint32_t>::max(), 0, 3); + + SMultiValueInput<uint8_t> cfg_SubProfile(0, std::numeric_limits<uint8_t>::max(), 0, std::numeric_limits<uint8_t>::max()); + SMultiValueInput<uint32_t> cfg_subPicCtuTopLeftX(0, std::numeric_limits<uint32_t>::max(), 0, MAX_NUM_SUB_PICS); + SMultiValueInput<uint32_t> cfg_subPicCtuTopLeftY(0, std::numeric_limits<uint32_t>::max(), 0, MAX_NUM_SUB_PICS); + SMultiValueInput<uint32_t> cfg_subPicWidth(1, std::numeric_limits<uint32_t>::max(), 0, MAX_NUM_SUB_PICS); + SMultiValueInput<uint32_t> cfg_subPicHeight(1, std::numeric_limits<uint32_t>::max(), 0, MAX_NUM_SUB_PICS); + SMultiValueInput<uint32_t> cfg_subPicTreatedAsPicFlag(0, 1, 0, MAX_NUM_SUB_PICS); + SMultiValueInput<uint32_t> cfg_loopFilterAcrossSubpicEnabledFlag(0, 1, 0, MAX_NUM_SUB_PICS); + SMultiValueInput<uint32_t> cfg_subPicId(0, std::numeric_limits<uint32_t>::max(), 0, MAX_NUM_SUB_PICS); int warnUnknowParameter = 0; #if ENABLE_TRACING @@ -765,7 +779,6 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] ) ("InputBitDepthC", m_inputBitDepth[CHANNEL_TYPE_CHROMA], 0, "As per InputBitDepth but for chroma component. (default:InputBitDepth)") ("OutputBitDepthC", m_outputBitDepth[CHANNEL_TYPE_CHROMA], 0, "As per OutputBitDepth but for chroma component. (default: use luma output bit-depth)") ("MSBExtendedBitDepthC", m_MSBExtendedBitDepth[CHANNEL_TYPE_CHROMA], 0, "As per MSBExtendedBitDepth but for chroma component. (default:MSBExtendedBitDepth)") - ("InternalBitDepthC", m_internalBitDepth[CHANNEL_TYPE_CHROMA], 0, "As per InternalBitDepth but for chroma component. (default:InternalBitDepth)") ("ExtendedPrecision", m_extendedPrecisionProcessingFlag, false, "Increased internal accuracies to support high bit depths (not valid in V1 profiles)") ("HighPrecisionPredictionWeighting", m_highPrecisionOffsetsEnabledFlag, false, "Use high precision option for weighted prediction (not valid in V1 profiles)") ("InputColourSpaceConvert", inputColourSpaceConvert, string(""), "Colour space conversion to apply to input video. Permitted values are (empty string=UNCHANGED) " + getListOfColourSpaceConverts(true)) @@ -803,6 +816,23 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] ) ("SummaryVerboseness", m_summaryVerboseness, 0u, "Specifies the level of the verboseness of the text output") ("Verbosity,v", m_verbosity, (int)VERBOSE, "Specifies the level of the verboseness") +#if JVET_O0756_CONFIG_HDRMETRICS || JVET_O0756_CALCULATE_HDRMETRICS + ( "WhitePointDeltaE1", m_whitePointDeltaE[0], 100.0, "1st reference white point value") + ( "WhitePointDeltaE2", m_whitePointDeltaE[1], 1000.0, "2nd reference white point value") + ( "WhitePointDeltaE3", m_whitePointDeltaE[2], 5000.0, "3rd reference white point value") + ( "MaxSampleValue", m_maxSampleValue, 10000.0, "Maximum sample value for floats") + ( "InputSampleRange", m_sampleRange, 0, "Sample Range") + ( "InputColorPrimaries", m_colorPrimaries, 1, "Input Color Primaries") + ( "EnableTFunctionLUT", m_enableTFunctionLUT, false, "Input Color Primaries") + ( "ChromaLocation", m_chromaLocation, 2, "Location of Chroma Samples") + ( "ChromaUpsampleFilter", m_chromaUPFilter, 1, "420 to 444 conversion filters") + ( "CropOffsetLeft", m_cropOffsetLeft, 0, "Crop Offset Left position") + ( "CropOffsetTop", m_cropOffsetTop, 0, "Crop Offset Top position") + ( "CropOffsetRight", m_cropOffsetRight, 0, "Crop Offset Right position") + ( "CropOffsetBottom", m_cropOffsetBottom, 0, "Crop Offset Bottom position") + ( "CalculateHdrMetrics", m_calculateHdrMetrics, false, "Enable HDR metric calculation") +#endif + //Field coding parameters ("FieldCoding", m_isField, false, "Signals if it's a field based coding") ("TopFieldFirst, Tff", m_isTopFieldFirst, false, "In case of field based coding, signals whether if it's a top field first or not") @@ -810,42 +840,61 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] ) ("HarmonizeGopFirstFieldCoupleEnabled", m_bHarmonizeGopFirstFieldCoupleEnabled, true, "Enables harmonization of Gop first field couple") // Profile and level - ("Profile", extendedProfile, NONE, "Profile name to use for encoding. Use main (for main), main10 (for main10), main-still-picture, main-RExt (for Range Extensions profile), any of the RExt specific profile names, or none") + ("Profile", extendedProfile, ExtendedProfileName::NONE, "Profile name to use for encoding. Use main_10, main_444_10, auto, or none") ("Level", m_level, Level::NONE, "Level limit to be used, eg 5.1, or none") ("Tier", m_levelTier, Level::MAIN, "Tier to use for interpretation of --Level (main or high only)") + ("SubProfile", cfg_SubProfile, cfg_SubProfile, "Sub-profile idc") + ("EnableDecodingParameterSet", m_decodingParameterSetEnabled, false, "Enables writing of Decoding Parameter Set") ("MaxBitDepthConstraint", m_bitDepthConstraint, 0u, "Bit depth to use for profile-constraint for RExt profiles. 0=automatically choose based upon other parameters") ("MaxChromaFormatConstraint", tmpConstraintChromaFormat, 0, "Chroma-format to use for the profile-constraint for RExt profiles. 0=automatically choose based upon other parameters") ("IntraConstraintFlag", m_intraConstraintFlag, false, "Value of general_intra_constraint_flag to use for RExt profiles (not used if an explicit RExt sub-profile is specified)") - ("OnePictureOnlyConstraintFlag", m_onePictureOnlyConstraintFlag, false, "Value of general_one_picture_only_constraint_flag to use for RExt profiles (not used if an explicit RExt sub-profile is specified)") - ("LowerBitRateConstraintFlag", m_lowerBitRateConstraintFlag, true, "Value of general_lower_bit_rate_constraint_flag to use for RExt profiles") ("ProgressiveSource", m_progressiveSourceFlag, false, "Indicate that source is progressive") ("InterlacedSource", m_interlacedSourceFlag, false, "Indicate that source is interlaced") ("NonPackedSource", m_nonPackedConstraintFlag, false, "Indicate that source does not contain frame packing") ("FrameOnly", m_frameOnlyConstraintFlag, false, "Indicate that the bitstream contains only frames") ("CTUSize", m_uiCTUSize, 128u, "CTUSize (specifies the CTU size if QTBT is on) [default: 128]") + ("SubPicPresentFlag", m_subPicPresentFlag, false, "equal to 1 specifies that subpicture parameters are present in in the SPS RBSP syntax") + ("NumSubPics", m_numSubPics, 0u, "specifies the number of subpictures") + ("SubPicCtuTopLeftX", cfg_subPicCtuTopLeftX, cfg_subPicCtuTopLeftX, "specifies horizontal position of top left CTU of i-th subpicture in unit of CtbSizeY") + ("SubPicCtuTopLeftY", cfg_subPicCtuTopLeftY, cfg_subPicCtuTopLeftY, "specifies vertical position of top left CTU of i-th subpicture in unit of CtbSizeY") + ("SubPicWidth", cfg_subPicWidth, cfg_subPicWidth, "specifies the width of the i-th subpicture in units of CtbSizeY") + ("SubPicHeight", cfg_subPicHeight, cfg_subPicHeight, "specifies the height of the i-th subpicture in units of CtbSizeY") + ("SubPicTreatedAsPicFlag", cfg_subPicTreatedAsPicFlag, cfg_subPicTreatedAsPicFlag, "equal to 1 specifies that the i-th subpicture of each coded picture in the CLVS is treated as a picture in the decoding process excluding in-loop filtering operations") + ("LoopFilterAcrossSubpicEnabledFlag", cfg_loopFilterAcrossSubpicEnabledFlag, cfg_loopFilterAcrossSubpicEnabledFlag, "equal to 1 specifies that in-loop filtering operations may be performed across the boundaries of the i-th subpicture in each coded picture in the CLVS") + ("SubPicIdPresentFlag", m_subPicIdPresentFlag, false, "equal to 1 specifies that subpicture ID mapping is present in the SPS") + ("SubPicIdSignallingPresentFlag", m_subPicIdSignallingPresentFlag, false, "equal to 1 specifies that subpicture ID mapping is signalled in the SPS") + ("SubPicIdLen", m_subPicIdLen, 0u, "specifies the number of bits used to represent the syntax element sps_subpic_id[ i ]. ") + ("SubPicId", cfg_subPicId, cfg_subPicId, "specifies that subpicture ID of the i-th subpicture") ("EnablePartitionConstraintsOverride", m_SplitConsOverrideEnabledFlag, true, "Enable partition constraints override") ("MinQTISlice", m_uiMinQT[0], 8u, "MinQTISlice") ("MinQTLumaISlice", m_uiMinQT[0], 8u, "MinQTLumaISlice") ("MinQTChromaISlice", m_uiMinQT[2], 4u, "MinQTChromaISlice") ("MinQTNonISlice", m_uiMinQT[1], 8u, "MinQTNonISlice") - ("MaxBTDepth", m_uiMaxBTDepth, 3u, "MaxBTDepth") - ("MaxBTDepthI", m_uiMaxBTDepthI, 3u, "MaxBTDepthI") - ("MaxBTDepthISliceL", m_uiMaxBTDepthI, 3u, "MaxBTDepthISliceL") - ("MaxBTDepthISliceC", m_uiMaxBTDepthIChroma, 3u, "MaxBTDepthISliceC") + ("MaxMTTHierarchyDepth", m_uiMaxMTTHierarchyDepth, 3u, "MaxMTTHierarchyDepth") + ("MaxMTTHierarchyDepthI", m_uiMaxMTTHierarchyDepthI, 3u, "MaxMTTHierarchyDepthI") + ("MaxMTTHierarchyDepthISliceL", m_uiMaxMTTHierarchyDepthI, 3u, "MaxMTTHierarchyDepthISliceL") + ("MaxMTTHierarchyDepthISliceC", m_uiMaxMTTHierarchyDepthIChroma, 3u, "MaxMTTHierarchyDepthISliceC") ("DualITree", m_dualTree, false, "Use separate QTBT trees for intra slice luma and chroma channel types") - ("SubPuMvp", m_SubPuMvpMode, 0, "Enable Sub-PU temporal motion vector prediction (0:off, 1:ATMVP, 2:STMVP, 3:ATMVP+STMVP) [default: off]") - ("Affine", m_Affine, false, "Enable affine prediction (0:off, 1:on) [default: off]") - ("AffineType", m_AffineType, true, "Enable affine type prediction (0:off, 1:on) [default: on]" ) - ("BIO", m_BIO, false, "Enable bi-directional optical flow") + ( "LFNST", m_LFNST, false, "Enable LFNST (0:off, 1:on) [default: off]" ) + ( "FastLFNST", m_useFastLFNST, false, "Fast methods for LFNST" ) + ("SubPuMvp", m_SubPuMvpMode, 0, "Enable Sub-PU temporal motion vector prediction (0:off, 1:ATMVP, 2:STMVP, 3:ATMVP+STMVP) [default: off]") + ("MMVD", m_MMVD, true, "Enable Merge mode with Motion Vector Difference (0:off, 1:on) [default: 1]") + ("Affine", m_Affine, false, "Enable affine prediction (0:off, 1:on) [default: off]") + ("AffineType", m_AffineType, true, "Enable affine type prediction (0:off, 1:on) [default: on]" ) + ("PROF", m_PROF, false, "Enable Prediction refinement with optical flow for affine mode (0:off, 1:on) [default: off]") + ("BIO", m_BIO, false, "Enable bi-directional optical flow") ("IMV", m_ImvMode, 1, "Adaptive MV precision Mode (IMV)\n" "\t0: disabled\n" - "\t1: enabled (Full-Pel and 4-PEL)\n") + "\t1: enabled (1/2-Pel, Full-Pel and 4-PEL)\n") ("IMV4PelFast", m_Imv4PelFast, 1, "Fast 4-Pel Adaptive MV precision Mode 0:disabled, 1:enabled) [default: 1]") ("LMChroma", m_LMChroma, 1, " LMChroma prediction " "\t0: Disable LMChroma\n" "\t1: Enable LMChroma\n") - ("CclmCollocatedChroma", m_cclmCollocatedChromaFlag, false, "Specifies the location of the top-left downsampled luma sample in cross-component linear model intra prediction relative to the top-left luma sample\n" + ("HorCollocatedChroma", m_horCollocatedChromaFlag, true, "Specifies location of a chroma sample relatively to the luma sample in horizontal direction in the reference picture resampling\n" + "\t0: horizontally shifted by 0.5 units of luma samples\n" + "\t1: collocated (default)\n") + ("VerCollocatedChroma", m_verCollocatedChromaFlag, false, "Specifies location of a chroma sample relatively to the luma sample in vertical direction in the cross-component linear model intra prediction and the reference picture resampling\n" "\t0: horizontally co-sited, vertically shifted by 0.5 units of luma samples\n" "\t1: collocated\n") ("MTS", m_MTS, 0, "Multiple Transform Set (MTS)\n" @@ -857,16 +906,19 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] ) ("MTSInterMaxCand", m_MTSInterMaxCand, 4, "Number of additional candidates to test in encoder search for MTS in inter slices\n") ("MTSImplicit", m_MTSImplicit, 0, "Enable implicit MTS (when explicit MTS is off)\n") ( "SBT", m_SBT, false, "Enable Sub-Block Transform for inter blocks\n" ) + ( "SBTFast64WidthTh", m_SBTFast64WidthTh, 1920, "Picture width threshold for testing size-64 SBT in RDO (now for HD and above sequences)\n") + ( "ISP", m_ISP, false, "Enable Intra Sub-Partitions\n" ) + ("SMVD", m_SMVD, false, "Enable Symmetric MVD\n") ("CompositeLTReference", m_compositeRefEnabled, false, "Enable Composite Long Term Reference Frame") - ("GBi", m_GBi, false, "Enable Generalized Bi-prediction(GBi)") - ("GBiFast", m_GBiFast, false, "Fast methods for Generalized Bi-prediction(GBi)\n") + ("BCW", m_bcw, false, "Enable Generalized Bi-prediction(Bcw)") + ("BcwFast", m_BcwFast, false, "Fast methods for Generalized Bi-prediction(Bcw)\n") #if LUMA_ADAPTIVE_DEBLOCKING_FILTER_QP_OFFSET ("LADF", m_LadfEnabed, false, "Luma adaptive deblocking filter QP Offset(L0414)") ("LadfNumIntervals", m_LadfNumIntervals, 3, "LADF number of intervals (2-5, inclusive)") ("LadfQpOffset", cfg_LadfQpOffset, cfg_LadfQpOffset, "LADF QP offset") ("LadfIntervalLowerBound", cfg_LadfIntervalLowerBound, cfg_LadfIntervalLowerBound, "LADF lower bound for 2nd lowest interval") #endif - ("MHIntra", m_MHIntra, false, "Enable MHIntra mode") + ("CIIP", m_ciip, false, "Enable CIIP mode") ("Triangle", m_Triangle, false, "Enable triangular shape motion vector prediction (0:off, 1:on)") ("HashME", m_HashME, false, "Enable hash motion estimation (0:off, 1:on)") @@ -874,6 +926,10 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] ) ("AffineAmvr", m_AffineAmvr, false, "Eanble AMVR for affine inter mode") ("AffineAmvrEncOpt", m_AffineAmvrEncOpt, false, "Enable encoder optimization of affine AMVR") ("DMVR", m_DMVR, false, "Decoder-side Motion Vector Refinement") + ("MmvdDisNum", m_MmvdDisNum, 8, "Number of MMVD Distance Entries") + ("ColorTransform", m_useColorTrans, false, "Enable the color transform") + ("PLT", m_PLTMode, 0u, "PLTMode (0x1:enabled, 0x0:disabled) [default: disabled]") + ("JointCbCr", m_JointCbCrMode, false, "Enable joint coding of chroma residuals (JointCbCr, 0:off, 1:on)") ( "IBC", m_IBCMode, 0u, "IBCMode (0x1:enabled, 0x0:disabled) [default: disabled]") ( "IBCLocalSearchRangeX", m_IBCLocalSearchRangeX, 128u, "Search range of IBC local search in x direction") ( "IBCLocalSearchRangeY", m_IBCLocalSearchRangeY, 128u, "Search range of IBC local search in y direction") @@ -886,9 +942,20 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] ) ("WrapAroundOffset", m_wrapAroundOffset, 0u, "Offset in luma samples used for computing the horizontal wrap-around position") // ADD_NEW_TOOL : (encoder app) add parsing parameters here + ("LoopFilterAcrossVirtualBoundariesDisabledFlag", m_loopFilterAcrossVirtualBoundariesDisabledFlag, false, "Disable in-loop filtering operations across the virtual boundaries (0:off, 1:on) [default: off]") + ("NumVerVirtualBoundaries", m_numVerVirtualBoundaries, 0u, "Number of vertical virtual boundaries (0-3, inclusive)") + ("NumHorVirtualBoundaries", m_numHorVirtualBoundaries, 0u, "Number of horizontal virtual boundaries (0-3, inclusive)") + ("VirtualBoundariesPosX", cfg_virtualBoundariesPosX, cfg_virtualBoundariesPosX, "Locations of the vertical virtual boundaries in units of luma samples") + ("VirtualBoundariesPosY", cfg_virtualBoundariesPosY, cfg_virtualBoundariesPosY, "Locations of the horizontal virtual boundaries in units of luma samples") ("EncDbOpt", m_encDbOpt, false, "Encoder optimization with deblocking filter") - ("LumaReshapeEnable", m_lumaReshapeEnable, false, "Enable Reshaping for Luma Channel") - ("ReshapeSignalType", m_reshapeSignalType, 0u, "Input signal type: 0: SDR, 1:PQ, 2:HLG") + ("LMCSEnable", m_lmcsEnabled, false, "Enable LMCS (luma mapping with chroma scaling") + ("LMCSSignalType", m_reshapeSignalType, 0u, "Input signal type: 0:SDR, 1:HDR-PQ, 2:HDR-HLG") + ("LMCSUpdateCtrl", m_updateCtrl, 0, "LMCS model update control: 0:RA, 1:AI, 2:LDB/LDP") + ("LMCSAdpOption", m_adpOption, 0, "LMCS adaptation options: 0:automatic(default)," + "1: rsp both (CW66 for QP<=22), 2: rsp TID0 (for all QP)," + "3: rsp inter(CW66 for QP<=22), 4: rsp inter(for all QP).") + ("LMCSInitialCW", m_initialCW, 0u, "LMCS initial total codeword (0~1023) when LMCSAdpOption > 0") + ("LMCSOffset", m_CSoffset, 0, "LMCS chroma residual scaling offset") ("IntraCMD", m_intraCMD, 0u, "IntraChroma MD: 0: none, 1:fixed to default wPSNR weight") ("LCTUFast", m_useFastLCTU, false, "Fast methods for large CTU") ("FastMrg", m_useFastMrg, false, "Fast methods for inter merge") @@ -896,6 +963,14 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] ) ("AMaxBT", m_useAMaxBT, false, "Adaptive maximal BT-size") ("E0023FastEnc", m_e0023FastEnc, true, "Fast encoding setting for QTBT (proposal E0023)") ("ContentBasedFastQtbt", m_contentBasedFastQtbt, false, "Signal based QTBT speed-up") + ("UseNonLinearAlfLuma", m_useNonLinearAlfLuma, true, "Non-linear adaptive loop filters for Luma Channel") + ("UseNonLinearAlfChroma", m_useNonLinearAlfChroma, true, "Non-linear adaptive loop filters for Chroma Channels") + ("MaxNumAlfAlternativesChroma", m_maxNumAlfAlternativesChroma, + (unsigned)MAX_NUM_ALF_ALTERNATIVES_CHROMA, std::string("Maximum number of alternative Chroma filters (1-") + std::to_string(MAX_NUM_ALF_ALTERNATIVES_CHROMA) + std::string (", inclusive)") ) + ("MRL", m_MRL, false, "Enable MRL (multiple reference line intra prediction)") + ("MIP", m_MIP, true, "Enable MIP (matrix-based intra prediction)") + ("FastMIP", m_useFastMIP, false, "Fast encoder search for MIP (matrix-based intra prediction)") + ("FastLocalDualTreeMode", m_fastLocalDualTreeMode, 0, "Fast intra pass coding for local dual-tree in intra coding region, 0: off, 1: use threshold, 2: one intra mode only") // Unit definition parameters ("MaxCUWidth", m_uiMaxCUWidth, 64u) ("MaxCUHeight", m_uiMaxCUHeight, 64u) @@ -904,20 +979,15 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] ) ("MaxCUSize,s", m_uiMaxCUHeight, 64u, "Maximum CU size") ("MaxPartitionDepth,h", m_uiMaxCUDepth, 4u, "CU depth") -#if MAX_TB_SIZE_SIGNALLING ("Log2MaxTbSize", m_log2MaxTbSize, 6, "Maximum transform block size in logarithm base 2 (Default: 6)") -#endif // Coding structure paramters ("IntraPeriod,-ip", m_iIntraPeriod, -1, "Intra period in frames, (-1: only first frame)") ("DecodingRefreshType,-dr", m_iDecodingRefreshType, 0, "Intra refresh type (0:none 1:CRA 2:IDR 3:RecPointSEI)") ("GOPSize,g", m_iGOPSize, 1, "GOP size of temporal structure") -#if JCTVC_Y0038_PARAMS + ("DRAPPeriod", m_drapPeriod, 0, "DRAP period in frames (0: disable Dependent RAP indication SEI messages)") ("ReWriteParamSets", m_rewriteParamSets, false, "Enable rewriting of Parameter sets before every (intra) random access point") - //Alias with same name as in HM - ("ReWriteParamSetsFlag", m_rewriteParamSets, false, "Alias for ReWriteParamSets") -#endif - + ("IDRRefParamList", m_idrRefParamList, false, "Enable indication of reference picture list syntax elements in slice headers of IDR pictures") // motion search options ("DisableIntraInInter", m_bDisableIntraPUsInInterSlices, false, "Flag to disable intra PUs in inter slices") ("FastSearch", tmpMotionEstimationSearchMethod, int(MESEARCH_DIAMOND), "0:Full search 1:Diamond 2:Selective 3:Enhanced Diamond") @@ -967,11 +1037,20 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] ) ("LumaLevelToDeltaQPMappingLuma", cfg_lumaLeveltoDQPMappingLuma, cfg_lumaLeveltoDQPMappingLuma, "Luma to Delta QP Mapping - luma thresholds") ("LumaLevelToDeltaQPMappingDQP", cfg_lumaLeveltoDQPMappingQP, cfg_lumaLeveltoDQPMappingQP, "Luma to Delta QP Mapping - DQP values") #endif - + ("UseIdentityTableForNon420Chroma", m_useIdentityTableForNon420Chroma, true, "True: Indicates that 422/444 chroma uses identity chroma QP mapping tables; False: explicit Qp table may be specified in config") + ("SameCQPTablesForAllChroma", m_chromaQpMappingTableParams.m_sameCQPTableForAllChromaFlag, true, "0: Different tables for Cb, Cr and joint Cb-Cr components, 1 (default): Same tables for all three chroma components") + ("QpInValCb", cfg_qpInValCb, cfg_qpInValCb, "Input coordinates for the QP table for Cb component") + ("QpOutValCb", cfg_qpOutValCb, cfg_qpOutValCb, "Output coordinates for the QP table for Cb component") + ("QpInValCr", cfg_qpInValCr, cfg_qpInValCr, "Input coordinates for the QP table for Cr component") + ("QpOutValCr", cfg_qpOutValCr, cfg_qpOutValCr, "Output coordinates for the QP table for Cr component") + ("QpInValCbCr", cfg_qpInValCbCr, cfg_qpInValCbCr, "Input coordinates for the QP table for joint Cb-Cr component") + ("QpOutValCbCr", cfg_qpOutValCbCr, cfg_qpOutValCbCr, "Output coordinates for the QP table for joint Cb-Cr component") ("CbQpOffset,-cbqpofs", m_cbQpOffset, 0, "Chroma Cb QP Offset") ("CrQpOffset,-crqpofs", m_crQpOffset, 0, "Chroma Cr QP Offset") ("CbQpOffsetDualTree", m_cbQpOffsetDualTree, 0, "Chroma Cb QP Offset for dual tree") ("CrQpOffsetDualTree", m_crQpOffsetDualTree, 0, "Chroma Cr QP Offset for dual tree") + ("CbCrQpOffset,-cbcrqpofs", m_cbCrQpOffset, -1, "QP Offset for joint Cb-Cr mode") + ("CbCrQpOffsetDualTree", m_cbCrQpOffsetDualTree, 0, "QP Offset for joint Cb-Cr mode in dual tree") #if ER_CHROMA_QP_WCG_PPS ("WCGPPSEnable", m_wcgChromaQpControl.enabled, false, "1: Enable the WCG PPS chroma modulation scheme. 0 (default) disabled") ("WCGPPSCbQpScale", m_wcgChromaQpControl.chromaCbQpScale, 1.0, "WCG PPS Chroma Cb QP Scale") @@ -1017,6 +1096,8 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] ) ("TransformSkip", m_useTransformSkip, false, "Intra transform skipping") ("TransformSkipFast", m_useTransformSkipFast, false, "Fast encoder search for transform skipping, winner takes it all mode.") ("TransformSkipLog2MaxSize", m_log2MaxTransformSkipBlockSize, 5U, "Specify transform-skip maximum size. Minimum 2, Maximum 5. (not valid in V1 profiles)") + ("ChromaTS", m_useChromaTS, false, "Enable encoder search of chromaTS") + ("BDPCM", m_useBDPCM, 0, "BDPCM (0:off, 1:lumaonly, 2:lumachroma") ("ISPFast", m_useFastISP, false, "Fast encoder search for ISP") ("ImplicitResidualDPCM", m_rdpcmEnabledFlag[RDPCM_SIGNAL_IMPLICIT], false, "Enable implicitly signalled residual DPCM for intra (also known as sample-adaptive intra predict) (not valid in V1 profiles)") ("ExplicitResidualDPCM", m_rdpcmEnabledFlag[RDPCM_SIGNAL_EXPLICIT], false, "Enable explicitly signalled residual DPCM for inter (not valid in V1 profiles)") @@ -1030,76 +1111,48 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] ) ("SaoEncodingRateChroma", m_saoEncodingRateChroma, 0.5, "The SAO early picture termination rate to use for chroma (when m_SaoEncodingRate is >0). If <=0, use results for luma") ("MaxNumOffsetsPerPic", m_maxNumOffsetsPerPic, 2048, "Max number of SAO offset per picture (Default: 2048)") ("SAOLcuBoundary", m_saoCtuBoundary, false, "0: right/bottom CTU boundary areas skipped from SAO parameter estimation, 1: non-deblocked pixels are used for those areas") -#if K0238_SAO_GREEDY_MERGE_ENCODING ("SAOGreedyEnc", m_saoGreedyMergeEnc, false, "SAO greedy merge encoding algorithm") -#endif -#if HEVC_TILES_WPP - ("SliceMode", tmpSliceMode, int(NO_SLICES), "0: Disable all Recon slice limits, 1: Enforce max # of CTUs, 2: Enforce max # of bytes, 3:specify tiles per dependent slice") - ("SliceArgument", m_sliceArgument, 0, "Depending on SliceMode being:" - "\t1: max number of CTUs per slice" - "\t2: max number of bytes per slice" - "\t3: max number of tiles per slice") -#else - ("SliceMode", tmpSliceMode, int(NO_SLICES), "0: Disable all Recon slice limits, 1: Enforce max # of CTUs, 2: Enforce max # of bytes)") - ("SliceArgument", m_sliceArgument, 0, "Depending on SliceMode being:" - "\t1: max number of CTUs per slice" - "\t2: max number of bytes per slice") -#endif -#if HEVC_DEPENDENT_SLICES - ("SliceSegmentMode", tmpSliceSegmentMode, int(NO_SLICES), "0: Disable all slice segment limits, 1: Enforce max # of CTUs, 2: Enforce max # of bytes, 3:specify tiles per dependent slice") - ("SliceSegmentArgument", m_sliceSegmentArgument, 0, "Depending on SliceSegmentMode being:" - "\t1: max number of CTUs per slice segment" - "\t2: max number of bytes per slice segment" - "\t3: max number of tiles per slice segment") -#endif - ("LFCrossSliceBoundaryFlag", m_bLFCrossSliceBoundaryFlag, true) - - ("ConstrainedIntraPred", m_bUseConstrainedIntraPred, false, "Constrained Intra Prediction") + ("EnablePicPartitioning", m_picPartitionFlag, false, "Enable picture partitioning (0: single tile, single slice, 1: multiple tiles/slices can be used)") + ("TileColumnWidthArray", cfgTileColumnWidth, cfgTileColumnWidth, "Tile column widths in units of CTUs. Last column width in list will be repeated uniformly to cover any remaining picture width") + ("TileRowHeightArray", cfgTileRowHeight, cfgTileRowHeight, "Tile row heights in units of CTUs. Last row height in list will be repeated uniformly to cover any remaining picture height") + ("RasterScanSlices", m_rasterSliceFlag, false, "Indicates if using raster-scan or rectangular slices (0: rectangular, 1: raster-scan)") + ("RectSlicePositions", cfgRectSlicePos, cfgRectSlicePos, "Rectangular slice positions. List containing pairs of top-left CTU RS address followed by bottom-right CTU RS address") + ("RectSliceFixedWidth", m_rectSliceFixedWidth, 0, "Fixed rectangular slice width in units of tiles (0: disable this feature and use RectSlicePositions instead)") + ("RectSliceFixedHeight", m_rectSliceFixedHeight, 0, "Fixed rectangular slice height in units of tiles (0: disable this feature and use RectSlicePositions instead)") + ("RasterSliceSizes", cfgRasterSliceSize, cfgRasterSliceSize, "Raster-scan slice sizes in units of tiles. Last size in list will be repeated uniformly to cover any remaining tiles in the picture") + ("DisableLoopFilterAcrossTiles", m_disableLFCrossTileBoundaryFlag, false, "Loop filtering applied across tile boundaries or not (0: filter across tile boundaries 1: do not filter across tile boundaries)") + ("DisableLoopFilterAcrossSlices", m_disableLFCrossSliceBoundaryFlag, false, "Loop filtering applied across slice boundaries or not (0: filter across slice boundaries 1: do not filter across slice boundaries)") + ("EnableSubPicPartitioning", m_subPicPartitionFlag, true, "Enable Sub-Picture partitioning (0: single slice per sub-picture, 1: multiple slices per sub-picture can be used)") ("FastUDIUseMPMEnabled", m_bFastUDIUseMPMEnabled, true, "If enabled, adapt intra direction search, accounting for MPM") ("FastMEForGenBLowDelayEnabled", m_bFastMEForGenBLowDelayEnabled, true, "If enabled use a fast ME for generalised B Low Delay slices") ("UseBLambdaForNonKeyLowDelayPictures", m_bUseBLambdaForNonKeyLowDelayPictures, true, "Enables use of B-Lambda for non-key low-delay pictures") - ("PCMEnabledFlag", m_usePCM, false) - ("PCMLog2MaxSize", m_pcmLog2MaxSize, 5u) - ("PCMLog2MinSize", m_uiPCMLog2MinSize, 3u) - - ("PCMInputBitDepthFlag", m_bPCMInputBitDepthFlag, true) - ("PCMFilterDisableFlag", m_bPCMFilterDisableFlag, false) ("IntraReferenceSmoothing", m_enableIntraReferenceSmoothing, true, "0: Disable use of intra reference smoothing (not valid in V1 profiles). 1: Enable use of intra reference smoothing (same as V1)") ("WeightedPredP,-wpP", m_useWeightedPred, false, "Use weighted prediction in P slices") ("WeightedPredB,-wpB", m_useWeightedBiPred, false, "Use weighted (bidirectional) prediction in B slices") ("WeightedPredMethod,-wpM", tmpWeightedPredictionMethod, int(WP_PER_PICTURE_WITH_SIMPLE_DC_COMBINED_COMPONENT), "Weighted prediction method") - ("Log2ParallelMergeLevel", m_log2ParallelMergeLevel, 2u, "Parallel merge estimation region") -#if HEVC_TILES_WPP - //deprecated copies of renamed tile parameters - ("UniformSpacingIdc", m_tileUniformSpacingFlag, false, "deprecated alias of TileUniformSpacing") - ("ColumnWidthArray", cfg_ColumnWidth, cfg_ColumnWidth, "deprecated alias of TileColumnWidthArray") - ("RowHeightArray", cfg_RowHeight, cfg_RowHeight, "deprecated alias of TileRowHeightArray") - - ("TileUniformSpacing", m_tileUniformSpacingFlag, false, "Indicates that tile columns and rows are distributed uniformly") - ("NumTileColumnsMinus1", m_numTileColumnsMinus1, 0, "Number of tile columns in a picture minus 1") - ("NumTileRowsMinus1", m_numTileRowsMinus1, 0, "Number of rows in a picture minus 1") - ("TileColumnWidthArray", cfg_ColumnWidth, cfg_ColumnWidth, "Array containing tile column width values in units of CTU") - ("TileRowHeightArray", cfg_RowHeight, cfg_RowHeight, "Array containing tile row height values in units of CTU") - ("LFCrossTileBoundaryFlag", m_bLFCrossTileBoundaryFlag, true, "1: cross-tile-boundary loop filtering. 0:non-cross-tile-boundary loop filtering") ("WaveFrontSynchro", m_entropyCodingSyncEnabledFlag, false, "0: entropy coding sync disabled; 1 entropy coding sync enabled") -#endif -#if HEVC_USE_SCALING_LISTS ("ScalingList", m_useScalingListId, SCALING_LIST_OFF, "0/off: no scaling list, 1/default: default scaling lists, 2/file: scaling lists specified in ScalingListFile") ("ScalingListFile", m_scalingListFileName, string(""), "Scaling list file name. Use an empty string to produce help.") -#endif - ("DepQuant", m_depQuantEnabledFlag, true ) -#if HEVC_USE_SIGN_HIDING - ("SignHideFlag,-SBH", m_signDataHidingEnabledFlag, false ) -#endif + ("DisableScalingMatrixForLFNST", m_disableScalingMatrixForLfnstBlks, true, "Disable scaling matrices, when enabled, for LFNST-coded blocks") + ("DepQuant", m_depQuantEnabledFlag, true, "Enable dependent quantization (Default: 1)" ) + ("SignHideFlag,-SBH", m_signDataHidingEnabledFlag, false, "Enable sign hiding" ) ("MaxNumMergeCand", m_maxNumMergeCand, 5u, "Maximum number of merge candidates") ("MaxNumAffineMergeCand", m_maxNumAffineMergeCand, 5u, "Maximum number of affine merge candidates") - /* Misc. */ + ("MaxNumTriangleCand", m_maxNumTriangleCand, 5u, "Maximum number of triangle candidates") + ("MaxNumIBCMergeCand", m_maxNumIBCMergeCand, 6u, "Maximum number of IBC merge candidates") + /* Misc. */ ("SEIDecodedPictureHash,-dph", tmpDecodedPictureHashSEIMappedType, 0, "Control generation of decode picture hash SEI messages\n" "\t3: checksum\n" "\t2: CRC\n" "\t1: use MD5\n" "\t0: disable") ("TMVPMode", m_TMVPModeId, 1, "TMVP mode 0: TMVP disable for all slices. 1: TMVP enable for all slices (default) 2: TMVP enable for certain slices only") + ("PPSorSliceMode", m_PPSorSliceMode, 0, "Enable signalling certain parameters either in PPS or per slice\n" + "\tmode 0: Always per slice (default), 1: RA settings, 2: LDB settings, 3: LDP settings") + ("SliceLevelRpl", m_sliceLevelRpl, true, "Code reference picture lists in slice headers rather than picture header.") + ("SliceLevelDblk", m_sliceLevelDblk, true, "Code deblocking filter parameters in slice headers rather than picture header.") + ("SliceLevelSao", m_sliceLevelSao, true, "Code SAO parameters in slice headers rather than picture header.") + ("SliceLevelAlf", m_sliceLevelAlf, true, "Code ALF parameters in slice headers rather than picture header.") ("FEN", tmpFastInterSearchMode, int(FASTINTERSEARCH_DISABLED), "fast encoder setting") ("ECU", m_bUseEarlyCU, false, "Early CU setting") ("FDM", m_useFastDecisionForMerge, true, "Fast decision for Merge RD Cost") @@ -1117,26 +1170,18 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] ) ( "RCCpbSize", m_RCCpbSize, 0u, "Rate control: CPB size" ) ( "RCInitialCpbFullness", m_RCInitialCpbFullness, 0.9, "Rate control: initial CPB fullness" ) #endif - ("TransquantBypassEnable", m_TransquantBypassEnabledFlag, false, "transquant_bypass_enabled_flag indicator in PPS") - ("TransquantBypassEnableFlag", m_TransquantBypassEnabledFlag, false, "deprecated and obsolete, but still needed for compatibility reasons") - ("CUTransquantBypassFlagForce", m_CUTransquantBypassFlagForce, false, "Force transquant bypass mode, when transquant_bypass_enabled_flag is enabled") ("CostMode", m_costMode, COST_STANDARD_LOSSY, "Use alternative cost functions: choose between 'lossy', 'sequence_level_lossless', 'lossless' (which forces QP to " MACRO_TO_STRING(LOSSLESS_AND_MIXED_LOSSLESS_RD_COST_TEST_QP) ") and 'mixed_lossless_lossy' (which used QP'=" MACRO_TO_STRING(LOSSLESS_AND_MIXED_LOSSLESS_RD_COST_TEST_QP_PRIME) " for pre-estimates of transquant-bypass blocks).") ("RecalculateQPAccordingToLambda", m_recalculateQPAccordingToLambda, false, "Recalculate QP values according to lambda values. Do not suggest to be enabled in all intra case") -#if HEVC_USE_INTRA_SMOOTHING_T32 || HEVC_USE_INTRA_SMOOTHING_T64 - ("StrongIntraSmoothing,-sis", m_useStrongIntraSmoothing, true, "Enable strong intra smoothing for 32x32 blocks") -#endif +#if HEVC_SEI ("SEIActiveParameterSets", m_activeParameterSetsSEIEnabled, 0, "Enable generation of active parameter sets SEI messages"); opts.addOptions() +#endif + ("HrdParametersPresent,-hrd", m_hrdParametersPresentFlag, false, "Enable generation of hrd_parameters()") ("VuiParametersPresent,-vui", m_vuiParametersPresentFlag, false, "Enable generation of vui_parameters()") ("AspectRatioInfoPresent", m_aspectRatioInfoPresentFlag, false, "Signals whether aspect_ratio_idc is present") ("AspectRatioIdc", m_aspectRatioIdc, 0, "aspect_ratio_idc") ("SarWidth", m_sarWidth, 0, "horizontal size of the sample aspect ratio") ("SarHeight", m_sarHeight, 0, "vertical size of the sample aspect ratio") - ("OverscanInfoPresent", m_overscanInfoPresentFlag, false, "Indicates whether conformant decoded pictures are suitable for display using overscan\n") - ("OverscanAppropriate", m_overscanAppropriateFlag, false, "Indicates whether conformant decoded pictures are suitable for display using overscan\n") - ("VideoSignalTypePresent", m_videoSignalTypePresentFlag, false, "Signals whether video_format, video_full_range_flag, and colour_description_present_flag are present") - ("VideoFormat", m_videoFormat, 5, "Indicates representation of pictures") - ("VideoFullRange", m_videoFullRangeFlag, false, "Indicates the black level and range of luma and chroma signals") ("ColourDescriptionPresent", m_colourDescriptionPresentFlag, false, "Signals whether colour_primaries, transfer_characteristics and matrix_coefficients are present") ("ColourPrimaries", m_colourPrimaries, 2, "Indicates chromaticity coordinates of the source primaries") ("TransferCharacteristics", m_transferCharacteristics, 2, "Indicates the opto-electronic transfer characteristics of the source") @@ -1144,70 +1189,19 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] ) ("ChromaLocInfoPresent", m_chromaLocInfoPresentFlag, false, "Signals whether chroma_sample_loc_type_top_field and chroma_sample_loc_type_bottom_field are present") ("ChromaSampleLocTypeTopField", m_chromaSampleLocTypeTopField, 0, "Specifies the location of chroma samples for top field") ("ChromaSampleLocTypeBottomField", m_chromaSampleLocTypeBottomField, 0, "Specifies the location of chroma samples for bottom field") - ("NeutralChromaIndication", m_neutralChromaIndicationFlag, false, "Indicates that the value of all decoded chroma samples is equal to 1<<(BitDepthCr-1)") - ("DefaultDisplayWindowFlag", m_defaultDisplayWindowFlag, false, "Indicates the presence of the Default Window parameters") - ("DefDispWinLeftOffset", m_defDispWinLeftOffset, 0, "Specifies the left offset of the default display window from the conformance window") - ("DefDispWinRightOffset", m_defDispWinRightOffset, 0, "Specifies the right offset of the default display window from the conformance window") - ("DefDispWinTopOffset", m_defDispWinTopOffset, 0, "Specifies the top offset of the default display window from the conformance window") - ("DefDispWinBottomOffset", m_defDispWinBottomOffset, 0, "Specifies the bottom offset of the default display window from the conformance window") - ("FrameFieldInfoPresentFlag", m_frameFieldInfoPresentFlag, false, "Indicates that pic_struct and field coding related values are present in picture timing SEI messages") - ("PocProportionalToTimingFlag", m_pocProportionalToTimingFlag, false, "Indicates that the POC value is proportional to the output time w.r.t. first picture in CVS") - ("NumTicksPocDiffOneMinus1", m_numTicksPocDiffOneMinus1, 0, "Number of ticks minus 1 that for a POC difference of one") - ("BitstreamRestriction", m_bitstreamRestrictionFlag, false, "Signals whether bitstream restriction parameters are present") -#if HEVC_TILES_WPP - ("TilesFixedStructure", m_tilesFixedStructureFlag, false, "Indicates that each active picture parameter set has the same values of the syntax elements related to tiles") -#endif - ("MotionVectorsOverPicBoundaries", m_motionVectorsOverPicBoundariesFlag, false, "Indicates that no samples outside the picture boundaries are used for inter prediction") - ("MaxBytesPerPicDenom", m_maxBytesPerPicDenom, 2, "Indicates a number of bytes not exceeded by the sum of the sizes of the VCL NAL units associated with any coded picture") - ("MaxBitsPerMinCuDenom", m_maxBitsPerMinCuDenom, 1, "Indicates an upper bound for the number of bits of coding_unit() data") - ("Log2MaxMvLengthHorizontal", m_log2MaxMvLengthHorizontal, 15, "Indicate the maximum absolute value of a decoded horizontal MV component in quarter-pel luma units") - ("Log2MaxMvLengthVertical", m_log2MaxMvLengthVertical, 15, "Indicate the maximum absolute value of a decoded vertical MV component in quarter-pel luma units"); + ("ChromaSampleLocType", m_chromaSampleLocType, 0, "Specifies the location of chroma samples for progressive content") + ("OverscanInfoPresent", m_overscanInfoPresentFlag, false, "Indicates whether conformant decoded pictures are suitable for display using overscan\n") + ("OverscanAppropriate", m_overscanAppropriateFlag, false, "Indicates whether conformant decoded pictures are suitable for display using overscan\n") + ("VideoFullRange", m_videoFullRangeFlag, false, "Indicates the black level and range of luma and chroma signals"); opts.addOptions() +#if HEVC_SEI ("SEIColourRemappingInfoFileRoot,-cri", m_colourRemapSEIFileRoot, string(""), "Colour Remapping Information SEI parameters root file name (wo num ext)") ("SEIRecoveryPoint", m_recoveryPointSEIEnabled, false, "Control generation of recovery point SEI messages") +#endif ("SEIBufferingPeriod", m_bufferingPeriodSEIEnabled, false, "Control generation of buffering period SEI messages") ("SEIPictureTiming", m_pictureTimingSEIEnabled, false, "Control generation of picture timing SEI messages") - ("SEIToneMappingInfo", m_toneMappingInfoSEIEnabled, false, "Control generation of Tone Mapping SEI messages") - ("SEIToneMapId", m_toneMapId, 0, "Specifies Id of Tone Mapping SEI message for a given session") - ("SEIToneMapCancelFlag", m_toneMapCancelFlag, false, "Indicates that Tone Mapping SEI message cancels the persistence or follows") - ("SEIToneMapPersistenceFlag", m_toneMapPersistenceFlag, true, "Specifies the persistence of the Tone Mapping SEI message") - ("SEIToneMapCodedDataBitDepth", m_toneMapCodedDataBitDepth, 8, "Specifies Coded Data BitDepth of Tone Mapping SEI messages") - ("SEIToneMapTargetBitDepth", m_toneMapTargetBitDepth, 8, "Specifies Output BitDepth of Tone mapping function") - ("SEIToneMapModelId", m_toneMapModelId, 0, "Specifies Model utilized for mapping coded data into target_bit_depth range\n" - "\t0: linear mapping with clipping\n" - "\t1: sigmoidal mapping\n" - "\t2: user-defined table mapping\n" - "\t3: piece-wise linear mapping\n" - "\t4: luminance dynamic range information ") - ("SEIToneMapMinValue", m_toneMapMinValue, 0, "Specifies the minimum value in mode 0") - ("SEIToneMapMaxValue", m_toneMapMaxValue, 1023, "Specifies the maximum value in mode 0") - ("SEIToneMapSigmoidMidpoint", m_sigmoidMidpoint, 512, "Specifies the centre point in mode 1") - ("SEIToneMapSigmoidWidth", m_sigmoidWidth, 960, "Specifies the distance between 5% and 95% values of the target_bit_depth in mode 1") - ("SEIToneMapStartOfCodedInterval", cfg_startOfCodedInterval, cfg_startOfCodedInterval, "Array of user-defined mapping table") - ("SEIToneMapNumPivots", m_numPivots, 0, "Specifies the number of pivot points in mode 3") - ("SEIToneMapCodedPivotValue", cfg_codedPivotValue, cfg_codedPivotValue, "Array of pivot point") - ("SEIToneMapTargetPivotValue", cfg_targetPivotValue, cfg_targetPivotValue, "Array of pivot point") - ("SEIToneMapCameraIsoSpeedIdc", m_cameraIsoSpeedIdc, 0, "Indicates the camera ISO speed for daylight illumination") - ("SEIToneMapCameraIsoSpeedValue", m_cameraIsoSpeedValue, 400, "Specifies the camera ISO speed for daylight illumination of Extended_ISO") - ("SEIToneMapExposureIndexIdc", m_exposureIndexIdc, 0, "Indicates the exposure index setting of the camera") - ("SEIToneMapExposureIndexValue", m_exposureIndexValue, 400, "Specifies the exposure index setting of the camera of Extended_ISO") - ("SEIToneMapExposureCompensationValueSignFlag", m_exposureCompensationValueSignFlag, false, "Specifies the sign of ExposureCompensationValue") - ("SEIToneMapExposureCompensationValueNumerator", m_exposureCompensationValueNumerator, 0, "Specifies the numerator of ExposureCompensationValue") - ("SEIToneMapExposureCompensationValueDenomIdc", m_exposureCompensationValueDenomIdc, 2, "Specifies the denominator of ExposureCompensationValue") - ("SEIToneMapRefScreenLuminanceWhite", m_refScreenLuminanceWhite, 350, "Specifies reference screen brightness setting in units of candela per square metre") - ("SEIToneMapExtendedRangeWhiteLevel", m_extendedRangeWhiteLevel, 800, "Indicates the luminance dynamic range") - ("SEIToneMapNominalBlackLevelLumaCodeValue", m_nominalBlackLevelLumaCodeValue, 16, "Specifies luma sample value of the nominal black level assigned decoded pictures") - ("SEIToneMapNominalWhiteLevelLumaCodeValue", m_nominalWhiteLevelLumaCodeValue, 235, "Specifies luma sample value of the nominal white level assigned decoded pictures") - ("SEIToneMapExtendedWhiteLevelLumaCodeValue", m_extendedWhiteLevelLumaCodeValue, 300, "Specifies luma sample value of the extended dynamic range assigned decoded pictures") - ("SEIChromaResamplingFilterHint", m_chromaResamplingFilterSEIenabled, false, "Control generation of the chroma sampling filter hint SEI message") - ("SEIChromaResamplingHorizontalFilterType", m_chromaResamplingHorFilterIdc, 2, "Defines the Index of the chroma sampling horizontal filter\n" - "\t0: unspecified - Chroma filter is unknown or is determined by the application" - "\t1: User-defined - Filter coefficients are specified in the chroma sampling filter hint SEI message" - "\t2: Standards-defined - ITU-T Rec. T.800 | ISO/IEC15444-1, 5/3 filter") - ("SEIChromaResamplingVerticalFilterType", m_chromaResamplingVerFilterIdc, 2, "Defines the Index of the chroma sampling vertical filter\n" - "\t0: unspecified - Chroma filter is unknown or is determined by the application" - "\t1: User-defined - Filter coefficients are specified in the chroma sampling filter hint SEI message" - "\t2: Standards-defined - ITU-T Rec. T.800 | ISO/IEC15444-1, 5/3 filter") + ("SEIDecodingUnitInfo", m_decodingUnitInfoSEIEnabled, false, "Control generation of decoding unit information SEI message.") + ("SEIFrameFieldInfo", m_frameFieldInfoSEIEnabled, false, "Control generation of frame field information SEI messages") ("SEIFramePacking", m_framePackingSEIEnabled, false, "Control generation of frame packing SEI messages") ("SEIFramePackingType", m_framePackingSEIType, 0, "Define frame packing arrangement\n" "\t3: side by side - frames are displayed horizontally\n" @@ -1219,52 +1213,7 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] ) "\t0: unspecified\n" "\t1: stereo pair, frame0 represents left view\n" "\t2: stereo pair, frame0 represents right view") - ("SEISegmentedRectFramePacking", m_segmentedRectFramePackingSEIEnabled, false, "Controls generation of segmented rectangular frame packing SEI messages") - ("SEISegmentedRectFramePackingCancel", m_segmentedRectFramePackingSEICancel, false, "If equal to 1, cancels the persistence of any previous SRFPA SEI message") - ("SEISegmentedRectFramePackingType", m_segmentedRectFramePackingSEIType, 0, "Specifies the arrangement of the frames in the reconstructed picture") - ("SEISegmentedRectFramePackingPersistence", m_segmentedRectFramePackingSEIPersistence, false, "If equal to 0, the SEI applies to the current frame only") - ("SEIDisplayOrientation", m_displayOrientationSEIAngle, 0, "Control generation of display orientation SEI messages\n" - "\tN: 0 < N < (2^16 - 1) enable display orientation SEI message with anticlockwise_rotation = N and display_orientation_repetition_period = 1\n" - "\t0: disable") - ("SEITemporalLevel0Index", m_temporalLevel0IndexSEIEnabled, false, "Control generation of temporal level 0 index SEI messages") - ("SEIGradualDecodingRefreshInfo", m_gradualDecodingRefreshInfoEnabled, false, "Control generation of gradual decoding refresh information SEI message") - ("SEINoDisplay", m_noDisplaySEITLayer, 0, "Control generation of no display SEI message\n" - "\tN: 0 < N enable no display SEI message for temporal layer N or higher\n" - "\t0: disable") - ("SEIDecodingUnitInfo", m_decodingUnitInfoSEIEnabled, false, "Control generation of decoding unit information SEI message.") - ("SEISOPDescription", m_SOPDescriptionSEIEnabled, false, "Control generation of SOP description SEI messages") - ("SEIScalableNesting", m_scalableNestingSEIEnabled, false, "Control generation of scalable nesting SEI messages") -#if HEVC_TILES_WPP - ("SEITempMotionConstrainedTileSets", m_tmctsSEIEnabled, false, "Control generation of temporal motion constrained tile sets SEI message") -#endif - ("SEITimeCodeEnabled", m_timeCodeSEIEnabled, false, "Control generation of time code information SEI message") - ("SEITimeCodeNumClockTs", m_timeCodeSEINumTs, 0, "Number of clock time sets [0..3]") - ("SEITimeCodeTimeStampFlag", cfg_timeCodeSeiTimeStampFlag, cfg_timeCodeSeiTimeStampFlag, "Time stamp flag associated to each time set") - ("SEITimeCodeFieldBasedFlag", cfg_timeCodeSeiNumUnitFieldBasedFlag, cfg_timeCodeSeiNumUnitFieldBasedFlag, "Field based flag associated to each time set") - ("SEITimeCodeCountingType", cfg_timeCodeSeiCountingType, cfg_timeCodeSeiCountingType, "Counting type associated to each time set") - ("SEITimeCodeFullTsFlag", cfg_timeCodeSeiFullTimeStampFlag, cfg_timeCodeSeiFullTimeStampFlag, "Full time stamp flag associated to each time set") - ("SEITimeCodeDiscontinuityFlag", cfg_timeCodeSeiDiscontinuityFlag, cfg_timeCodeSeiDiscontinuityFlag, "Discontinuity flag associated to each time set") - ("SEITimeCodeCntDroppedFlag", cfg_timeCodeSeiCntDroppedFlag, cfg_timeCodeSeiCntDroppedFlag, "Counter dropped flag associated to each time set") - ("SEITimeCodeNumFrames", cfg_timeCodeSeiNumberOfFrames, cfg_timeCodeSeiNumberOfFrames, "Number of frames associated to each time set") - ("SEITimeCodeSecondsValue", cfg_timeCodeSeiSecondsValue, cfg_timeCodeSeiSecondsValue, "Seconds value for each time set") - ("SEITimeCodeMinutesValue", cfg_timeCodeSeiMinutesValue, cfg_timeCodeSeiMinutesValue, "Minutes value for each time set") - ("SEITimeCodeHoursValue", cfg_timeCodeSeiHoursValue, cfg_timeCodeSeiHoursValue, "Hours value for each time set") - ("SEITimeCodeSecondsFlag", cfg_timeCodeSeiSecondsFlag, cfg_timeCodeSeiSecondsFlag, "Flag to signal seconds value presence in each time set") - ("SEITimeCodeMinutesFlag", cfg_timeCodeSeiMinutesFlag, cfg_timeCodeSeiMinutesFlag, "Flag to signal minutes value presence in each time set") - ("SEITimeCodeHoursFlag", cfg_timeCodeSeiHoursFlag, cfg_timeCodeSeiHoursFlag, "Flag to signal hours value presence in each time set") - ("SEITimeCodeOffsetLength", cfg_timeCodeSeiTimeOffsetLength, cfg_timeCodeSeiTimeOffsetLength, "Time offset length associated to each time set") - ("SEITimeCodeTimeOffset", cfg_timeCodeSeiTimeOffsetValue, cfg_timeCodeSeiTimeOffsetValue, "Time offset associated to each time set") - ("SEIKneeFunctionInfo", m_kneeSEIEnabled, false, "Control generation of Knee function SEI messages") - ("SEIKneeFunctionId", m_kneeSEIId, 0, "Specifies Id of Knee function SEI message for a given session") - ("SEIKneeFunctionCancelFlag", m_kneeSEICancelFlag, false, "Indicates that Knee function SEI message cancels the persistence or follows") - ("SEIKneeFunctionPersistenceFlag", m_kneeSEIPersistenceFlag, true, "Specifies the persistence of the Knee function SEI message") - ("SEIKneeFunctionInputDrange", m_kneeSEIInputDrange, 1000, "Specifies the peak luminance level for the input picture of Knee function SEI messages") - ("SEIKneeFunctionInputDispLuminance", m_kneeSEIInputDispLuminance, 100, "Specifies the expected display brightness for the input picture of Knee function SEI messages") - ("SEIKneeFunctionOutputDrange", m_kneeSEIOutputDrange, 4000, "Specifies the peak luminance level for the output picture of Knee function SEI messages") - ("SEIKneeFunctionOutputDispLuminance", m_kneeSEIOutputDispLuminance, 800, "Specifies the expected display brightness for the output picture of Knee function SEI messages") - ("SEIKneeFunctionNumKneePointsMinus1", m_kneeSEINumKneePointsMinus1, 2, "Specifies the number of knee points - 1") - ("SEIKneeFunctionInputKneePointValue", cfg_kneeSEIInputKneePointValue, cfg_kneeSEIInputKneePointValue, "Array of input knee point") - ("SEIKneeFunctionOutputKneePointValue", cfg_kneeSEIOutputKneePointValue, cfg_kneeSEIOutputKneePointValue, "Array of output knee point") + ("SEIMasteringDisplayColourVolume", m_masteringDisplay.colourVolumeSEIEnabled, false, "Control generation of mastering display colour volume SEI messages") ("SEIMasteringDisplayMaxLuminance", m_masteringDisplay.maxLuminance, 10000u, "Specifies the mastering display maximum luminance value in units of 1/10000 candela per square metre (32-bit code value)") ("SEIMasteringDisplayMinLuminance", m_masteringDisplay.minLuminance, 0u, "Specifies the mastering display minimum luminance value in units of 1/10000 candela per square metre (32-bit code value)") @@ -1273,14 +1222,124 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] ) #if U0033_ALTERNATIVE_TRANSFER_CHARACTERISTICS_SEI ("SEIPreferredTransferCharacterisics", m_preferredTransferCharacteristics, -1, "Value for the preferred_transfer_characteristics field of the Alternative transfer characteristics SEI which will override the corresponding entry in the VUI. If negative, do not produce the respective SEI message") #endif - ("SEIGreenMetadataType", m_greenMetadataType, 0u, "Value for the green_metadata_type specifies the type of metadata that is present in the SEI message. If green_metadata_type is 1, then metadata enabling quality recovery after low-power encoding is present") - ("SEIXSDMetricType", m_xsdMetricType, 0u, "Value for the xsd_metric_type indicates the type of the objective quality metric. PSNR is the only type currently supported") + + ("SEIErpEnabled", m_erpSEIEnabled, false, "Control generation of equirectangular projection SEI messages") + ("SEIErpCancelFlag", m_erpSEICancelFlag, true, "Indicate that equirectangular projection SEI message cancels the persistence or follows") + ("SEIErpPersistenceFlag", m_erpSEIPersistenceFlag, false, "Specifies the persistence of the equirectangular projection SEI messages") + ("SEIErpGuardBandFlag", m_erpSEIGuardBandFlag, false, "Indicate the existence of guard band areas in the constituent picture") + ("SEIErpGuardBandType", m_erpSEIGuardBandType, 0u, "Indicate the type of the guard band") + ("SEIErpLeftGuardBandWidth", m_erpSEILeftGuardBandWidth, 0u, "Indicate the width of the guard band on the left side of the constituent picture") + ("SEIErpRightGuardBandWidth", m_erpSEIRightGuardBandWidth, 0u, "Indicate the width of the guard band on the right side of the constituent picture") + ("SEISphereRotationEnabled", m_sphereRotationSEIEnabled, false, "Control generation of sphere rotation SEI messages") + ("SEISphereRotationCancelFlag", m_sphereRotationSEICancelFlag, true, "Indicate that sphere rotation SEI message cancels the persistence or follows") + ("SEISphereRotationPersistenceFlag", m_sphereRotationSEIPersistenceFlag, false, "Specifies the persistence of the sphere rotation SEI messages") + ("SEISphereRotationYaw", m_sphereRotationSEIYaw, 0, "Specifies the value of the yaw rotation angle") + ("SEISphereRotationPitch", m_sphereRotationSEIPitch, 0, "Specifies the value of the pitch rotation angle") + ("SEISphereRotationRoll", m_sphereRotationSEIRoll, 0, "Specifies the value of the roll rotation angle") + ("SEIOmniViewportEnabled", m_omniViewportSEIEnabled, false, "Control generation of omni viewport SEI messages") + ("SEIOmniViewportId", m_omniViewportSEIId, 0u, "An identifying number that may be used to identify the purpose of the one or more recommended viewport regions") + ("SEIOmniViewportCancelFlag", m_omniViewportSEICancelFlag, true, "Indicate that omni viewport SEI message cancels the persistence or follows") + ("SEIOmniViewportPersistenceFlag", m_omniViewportSEIPersistenceFlag, false, "Specifies the persistence of the omni viewport SEI messages") + ("SEIOmniViewportCntMinus1", m_omniViewportSEICntMinus1, 0u, "specifies the number of recommended viewport regions minus 1") + ("SEIOmniViewportAzimuthCentre", cfg_omniViewportSEIAzimuthCentre, cfg_omniViewportSEIAzimuthCentre, "Indicate the centre of the i-th recommended viewport region") + ("SEIOmniViewportElevationCentre", cfg_omniViewportSEIElevationCentre, cfg_omniViewportSEIElevationCentre, "Indicate the centre of the i-th recommended viewport region") + ("SEIOmniViewportTiltCentre", cfg_omniViewportSEITiltCentre, cfg_omniViewportSEITiltCentre, "Indicates the tilt angle of the i-th recommended viewport region") + ("SEIOmniViewportHorRange", cfg_omniViewportSEIHorRange, cfg_omniViewportSEIHorRange, "Indicates the azimuth range of the i-th recommended viewport region") + ("SEIOmniViewportVerRange", cfg_omniViewportSEIVerRange, cfg_omniViewportSEIVerRange, "Indicates the elevation range of the i-th recommended viewport region") + ("SEIRwpEnabled", m_rwpSEIEnabled, false, "Controls if region-wise packing SEI message enabled") + ("SEIRwpCancelFlag", m_rwpSEIRwpCancelFlag, true, "Specifies the persistence of any previous region-wise packing SEI message in output order.") + ("SEIRwpPersistenceFlag", m_rwpSEIRwpPersistenceFlag, false, "Specifies the persistence of the region-wise packing SEI message for the current layer.") + ("SEIRwpConstituentPictureMatchingFlag", m_rwpSEIConstituentPictureMatchingFlag, false, "Specifies the information in the SEI message apply individually to each constituent picture or to the projected picture.") + ("SEIRwpNumPackedRegions", m_rwpSEINumPackedRegions, 0, "specifies the number of packed regions when constituent picture matching flag is equal to 0.") + ("SEIRwpProjPictureWidth", m_rwpSEIProjPictureWidth, 0, "Specifies the width of the projected picture.") + ("SEIRwpProjPictureHeight", m_rwpSEIProjPictureHeight, 0, "Specifies the height of the projected picture.") + ("SEIRwpPackedPictureWidth", m_rwpSEIPackedPictureWidth, 0, "specifies the width of the packed picture.") + ("SEIRwpPackedPictureHeight", m_rwpSEIPackedPictureHeight, 0, "Specifies the height of the packed picture.") + ("SEIRwpTransformType", cfg_rwpSEIRwpTransformType, cfg_rwpSEIRwpTransformType, "specifies the rotation and mirroring to be applied to the i-th packed region.") + ("SEIRwpGuardBandFlag", cfg_rwpSEIRwpGuardBandFlag, cfg_rwpSEIRwpGuardBandFlag, "specifies the existence of guard band in the i-th packed region.") + ("SEIRwpProjRegionWidth", cfg_rwpSEIProjRegionWidth, cfg_rwpSEIProjRegionWidth, "specifies the width of the i-th projected region.") + ("SEIRwpProjRegionHeight", cfg_rwpSEIProjRegionHeight, cfg_rwpSEIProjRegionHeight, "specifies the height of the i-th projected region.") + ("SEIRwpProjRegionTop", cfg_rwpSEIRwpSEIProjRegionTop, cfg_rwpSEIRwpSEIProjRegionTop, "specifies the top sample row of the i-th projected region.") + ("SEIRwpProjRegionLeft", cfg_rwpSEIProjRegionLeft, cfg_rwpSEIProjRegionLeft, "specifies the left-most sample column of the i-th projected region.") + ("SEIRwpPackedRegionWidth", cfg_rwpSEIPackedRegionWidth, cfg_rwpSEIPackedRegionWidth, "specifies the width of the i-th packed region.") + ("SEIRwpPackedRegionHeight", cfg_rwpSEIPackedRegionHeight, cfg_rwpSEIPackedRegionHeight, "specifies the height of the i-th packed region.") + ("SEIRwpPackedRegionTop", cfg_rwpSEIPackedRegionTop, cfg_rwpSEIPackedRegionTop, "specifies the top luma sample row of the i-th packed region.") + ("SEIRwpPackedRegionLeft", cfg_rwpSEIPackedRegionLeft, cfg_rwpSEIPackedRegionLeft, "specifies the left-most luma sample column of the i-th packed region.") + ("SEIRwpLeftGuardBandWidth", cfg_rwpSEIRwpLeftGuardBandWidth, cfg_rwpSEIRwpLeftGuardBandWidth, "specifies the width of the guard band on the left side of the i-th packed region.") + ("SEIRwpRightGuardBandWidth", cfg_rwpSEIRwpRightGuardBandWidth, cfg_rwpSEIRwpRightGuardBandWidth, "specifies the width of the guard band on the right side of the i-th packed region.") + ("SEIRwpTopGuardBandHeight", cfg_rwpSEIRwpTopGuardBandHeight, cfg_rwpSEIRwpTopGuardBandHeight, "specifies the height of the guard band above the i-th packed region.") + ("SEIRwpBottomGuardBandHeight", cfg_rwpSEIRwpBottomGuardBandHeight, cfg_rwpSEIRwpBottomGuardBandHeight, "specifies the height of the guard band below the i-th packed region.") + ("SEIRwpGuardBandNotUsedForPredFlag", cfg_rwpSEIRwpGuardBandNotUsedForPredFlag, cfg_rwpSEIRwpGuardBandNotUsedForPredFlag, "Specifies if the guard bands is used in the inter prediction process.") + ("SEIRwpGuardBandType", cfg_rwpSEIRwpGuardBandType, cfg_rwpSEIRwpGuardBandType, "Specifies the type of the guard bands for the i-th packed region.") + ("SEIGcmpEnabled", m_gcmpSEIEnabled, false, "Control generation of generalized cubemap projection SEI messages") + ("SEIGcmpCancelFlag", m_gcmpSEICancelFlag, true, "Indicate that generalized cubemap projection SEI message cancels the persistence or follows") + ("SEIGcmpPersistenceFlag", m_gcmpSEIPersistenceFlag, false, "Specifies the persistence of the generalized cubemap projection SEI messages") + ("SEIGcmpPackingType", m_gcmpSEIPackingType, 0u, "Specifies the packing type") + ("SEIGcmpMappingFunctionType", m_gcmpSEIMappingFunctionType, 0u, "Specifies the mapping function used to adjust the sample locations of the cubemap projection") + ("SEIGcmpFaceIndex", cfg_gcmpSEIFaceIndex, cfg_gcmpSEIFaceIndex, "Specifies the face index for the i-th face") + ("SEIGcmpFaceRotation", cfg_gcmpSEIFaceRotation, cfg_gcmpSEIFaceRotation, "Specifies the rotation to be applied to the i-th face") + ("SEIGcmpFunctionCoeffU", cfg_gcmpSEIFunctionCoeffU, cfg_gcmpSEIFunctionCoeffU, "Specifies the coefficient used in the cubemap mapping function of the u-axis of the i-th face") + ("SEIGcmpFunctionUAffectedByVFlag", cfg_gcmpSEIFunctionUAffectedByVFlag, cfg_gcmpSEIFunctionUAffectedByVFlag, "Specifies whether the cubemap mapping function of the u-axis refers to the v position of the sample location") + ("SEIGcmpFunctionCoeffV", cfg_gcmpSEIFunctionCoeffV, cfg_gcmpSEIFunctionCoeffV, "Specifies the coefficient used in the cubemap mapping function of the v-axis of the i-th face") + ("SEIGcmpFunctionVAffectedByUFlag", cfg_gcmpSEIFunctionVAffectedByUFlag, cfg_gcmpSEIFunctionVAffectedByUFlag, "Specifies whether the cubemap mapping function of the v-axis refers to the u position of the sample location") + ("SEIGcmpGuardBandFlag", m_gcmpSEIGuardBandFlag, false, "Indicate the existence of guard band areas in the picture") + ("SEIGcmpGuardBandBoundaryType", m_gcmpSEIGuardBandBoundaryType, false, "Indicate which face boundaries contain guard bands") + ("SEIGcmpGuardBandSamplesMinus1", m_gcmpSEIGuardBandSamplesMinus1, 0u, "Specifies the number of guard band samples minus1 used in the cubemap projected picture") + ("SEISubpicureLevelInfo", m_subpicureLevelInfoSEIEnabled, false, "Control generation of Subpicture Level Information SEI messages") + ("SEISampleAspectRatioInfo", m_sampleAspectRatioInfoSEIEnabled, false, "Control generation of Sample Aspect Ratio Information SEI messages") + ("SEISARICancelFlag", m_sariCancelFlag, false, "Indicates that Sample Aspect Ratio Information SEI message cancels the persistence or follows") + ("SEISARIPersistenceFlag", m_sariPersistenceFlag, true, "Specifies the persistence of the Sample Aspect Ratio Information SEI message") + ("SEISARIAspectRatioIdc", m_sariAspectRatioIdc, 0, "Specifies the Sample Aspect Ratio IDC of Sample Aspect Ratio Information SEI messages") + ("SEISARISarWidth", m_sariSarWidth, 0, "Specifies the Sample Aspect Ratio Width of Sample Aspect Ratio Information SEI messages, if extended SAR is chosen.") + ("SEISARISarHeight", m_sariSarHeight, 0, "Specifies the Sample Aspect Ratio Height of Sample Aspect Ratio Information SEI messages, if extended SAR is chosen.") ("MCTSEncConstraint", m_MCTSEncConstraint, false, "For MCTS, constrain motion vectors at tile boundaries") #if ENABLE_TRACING ("TraceChannelsList", bTracingChannelsList, false, "List all available tracing channels") ("TraceRule", sTracingRule, string( "" ), "Tracing rule (ex: \"D_CABAC:poc==8\" or \"D_REC_CB_LUMA:poc==8\")") ("TraceFile", sTracingFile, string( "" ), "Tracing file") #endif +// film grain characteristics SEI + ("SEIFGCEnabled", m_fgcSEIEnabled, false, "Control generation of the film grain characteristics SEI message") + ("SEIFGCCancelFlag", m_fgcSEICancelFlag, true, "Specifies the persistence of any previous film grain characteristics SEI message in output order.") + ("SEIFGCPersistenceFlag", m_fgcSEIPersistenceFlag, false, "Specifies the persistence of the film grain characteristics SEI message for the current layer.") + ("SEIFGCModelID", m_fgcSEIModelID, 0u, "Specifies the film grain simulation model. 0: frequency filtering; 1: auto-regression.") + ("SEIFGCSepColourDescPresentFlag", m_fgcSEISepColourDescPresentFlag, false, "Specifies the presence of a distinct colour space description for the film grain characteristics specified in the SEI message.") + ("SEIFGCBlendingModeID", m_fgcSEIBlendingModeID, 0u, "Specifies the blending mode used to blend the simulated film grain with the decoded images. 0: additive; 1: multiplicative.") + ("SEIFGCLog2ScaleFactor", m_fgcSEILog2ScaleFactor, 0u, "Specifies a scale factor used in the film grain characterization equations.") + ("SEIFGCCompModelPresentComp0", m_fgcSEICompModelPresent[0], false, "Specifies the presence of film grain modelling on colour component 0.") + ("SEIFGCCompModelPresentComp1", m_fgcSEICompModelPresent[1], false, "Specifies the presence of film grain modelling on colour component 1.") + ("SEIFGCCompModelPresentComp2", m_fgcSEICompModelPresent[2], false, "Specifies the presence of film grain modelling on colour component 2.") + +// content light level SEI + ("SEICLLEnabled", m_cllSEIEnabled, false, "Control generation of the content light level SEI message") + ("SEICLLMaxContentLightLevel", m_cllSEIMaxContentLevel, 0u, "When not equal to 0, specifies an upper bound on the maximum light level among all individual samples in a 4:4:4 representation " + "of red, green, and blue colour primary intensities in the linear light domain for the pictures of the CLVS, " + "in units of candelas per square metre.When equal to 0, no such upper bound is indicated.") + ("SEICLLMaxPicAvgLightLevel", m_cllSEIMaxPicAvgLevel, 0u, "When not equal to 0, specifies an upper bound on the maximum average light level among the samples in a 4:4:4 representation " + "of red, green, and blue colour primary intensities in the linear light domain for any individual picture of the CLVS, " + "in units of candelas per square metre.When equal to 0, no such upper bound is indicated.") +// ambient viewing environment SEI + ("SEIAVEEnabled", m_aveSEIEnabled, false, "Control generation of the ambient viewing environment SEI message") + ("SEIAVEAmbientIlluminance", m_aveSEIAmbientIlluminance, 100000u, "Specifies the environmental illluminance of the ambient viewing environment in units of 1/10000 lux for the ambient viewing environment SEI message") + ("SEIAVEAmbientLightX", m_aveSEIAmbientLightX, 15635u, "Specifies the normalized x chromaticity coordinate of the environmental ambient light in the nominal viewing enviornment according to the CIE 1931 definition in units of 1/50000 lux for the ambient viewing enviornment SEI message") + ("SEIAVEAmbientLightY", m_aveSEIAmbientLightY, 16450u, "Specifies the normalized y chromaticity coordinate of the environmental ambient light in the nominal viewing enviornment according to the CIE 1931 definition in units of 1/50000 lux for the ambient viewing enviornment SEI message") +// content colour volume SEI + ("SEICCVEnabled", m_ccvSEIEnabled, false, "Control generation of the Content Colour Volume SEI message") + ("SEICCVCancelFlag", m_ccvSEICancelFlag, true, "Specifies the persistence of any previous content colour volume SEI message in output order.") + ("SEICCVPersistenceFlag", m_ccvSEIPersistenceFlag, false, "Specifies the persistence of the content colour volume SEI message for the current layer.") + ("SEICCVPrimariesPresent", m_ccvSEIPrimariesPresentFlag, true, "Specifies whether the CCV primaries are present in the content colour volume SEI message.") + ("m_ccvSEIPrimariesX0", m_ccvSEIPrimariesX[0], 0.300, "Specifies the x coordinate of the first (green) primary for the content colour volume SEI message") + ("m_ccvSEIPrimariesY0", m_ccvSEIPrimariesY[0], 0.600, "Specifies the y coordinate of the first (green) primary for the content colour volume SEI message") + ("m_ccvSEIPrimariesX1", m_ccvSEIPrimariesX[1], 0.150, "Specifies the x coordinate of the second (blue) primary for the content colour volume SEI message") + ("m_ccvSEIPrimariesY1", m_ccvSEIPrimariesY[1], 0.060, "Specifies the y coordinate of the second (blue) primary for the content colour volume SEI message") + ("m_ccvSEIPrimariesX2", m_ccvSEIPrimariesX[2], 0.640, "Specifies the x coordinate of the third (red) primary for the content colour volume SEI message") + ("m_ccvSEIPrimariesY2", m_ccvSEIPrimariesY[2], 0.330, "Specifies the y coordinate of the third (red) primary for the content colour volume SEI message") + ("SEICCVMinLuminanceValuePresent", m_ccvSEIMinLuminanceValuePresentFlag, true, "Specifies whether the CCV min luminance value is present in the content colour volume SEI message") + ("SEICCVMinLuminanceValue", m_ccvSEIMinLuminanceValue, 0.0, "specifies the CCV min luminance value in the content colour volume SEI message") + ("SEICCVMaxLuminanceValuePresent", m_ccvSEIMaxLuminanceValuePresentFlag, true, "Specifies whether the CCV max luminance value is present in the content colour volume SEI message") + ("SEICCVMaxLuminanceValue", m_ccvSEIMaxLuminanceValue, 0.1, "specifies the CCV max luminance value in the content colour volume SEI message") + ("SEICCVAvgLuminanceValuePresent", m_ccvSEIAvgLuminanceValuePresentFlag, true, "Specifies whether the CCV avg luminance value is present in the content colour volume SEI message") + ("SEICCVAvgLuminanceValue", m_ccvSEIAvgLuminanceValue, 0.01, "specifies the CCV avg luminance value in the content colour volume SEI message") ("DebugBitstream", m_decodeBitstreams[0], string( "" ), "Assume the frames up to POC DebugPOC will be the same as in this bitstream. Load those frames from the bitstream instead of encoding them." ) ("DebugPOC", m_switchPOC, -1, "If DebugBitstream is present, load frames up to this POC from this bitstream. Starting with DebugPOC, return to normal encoding." ) @@ -1297,14 +1356,35 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] ) ("NumWppThreads", m_numWppThreads, 1, "Number of threads used to run WPP-style parallelization") ("NumWppExtraLines", m_numWppExtraLines, 0, "Number of additional wpp lines to switch when threads are blocked") ("DebugCTU", m_debugCTU, -1, "If DebugBitstream is present, load frames up to this POC from this bitstream. Starting with DebugPOC-frame at CTUline containin debug CTU.") -#if ENABLE_WPP_PARALLELISM - ("EnsureWppBitEqual", m_ensureWppBitEqual, true, "Ensure the results are equal to results with WPP-style parallelism, even if WPP is off") -#else ("EnsureWppBitEqual", m_ensureWppBitEqual, false, "Ensure the results are equal to results with WPP-style parallelism, even if WPP is off") -#endif ( "ALF", m_alf, true, "Adpative Loop Filter\n" ) + ( "ScalingRatioHor", m_scalingRatioHor, 1.0, "Scaling ratio in hor direction" ) + ( "ScalingRatioVer", m_scalingRatioVer, 1.0, "Scaling ratio in ver direction" ) + ( "FractionNumFrames", m_fractionOfFrames, 1.0, "Encode a fraction of the specified in FramesToBeEncoded frames" ) + ( "SwitchPocPeriod", m_switchPocPeriod, 0, "Switch POC period for RPR" ) + ( "UpscaledOutput", m_upscaledOutput, 0, "Output upscaled (2), decoded but in full resolution buffer (1) or decoded cropped (0, default) picture for RPR" ) + ( "MaxLayers", m_maxLayers, 1, "Max number of layers" ) + + ; + opts.addOptions() + ( "MaxSublayers", m_maxSublayers, 1, "Max number of Sublayers") + ( "AllLayersSameNumSublayersFlag", m_allLayersSameNumSublayersFlag, true, "All layers same num sublayersflag") + ( "AllIndependentLayersFlag", m_allIndependentLayersFlag, true, "All layers are independent layer") + ( "LayerId%d", m_layerId, 0, MAX_VPS_LAYERS, "Max number of Sublayers") + ( "NumRefLayers%d", m_numRefLayers, 0, MAX_VPS_LAYERS, "Number of direct reference layer index of i-th layer") + ( "RefLayerIdx%d", m_refLayerIdxStr, string(""), MAX_VPS_LAYERS, "Reference layer index(es)") + ( "EachLayerIsAnOlsFlag", m_eachLayerIsAnOlsFlag, true, "Each layer is an OLS layer flag") + ( "OlsModeIdc", m_olsModeIdc, 0, "Output layer set mode") + ( "NumOutputLayerSets", m_numOutputLayerSets, 1, "Number of output layer sets") + ( "OlsOutputLayer%d", m_olsOutputLayerStr, string(""), MAX_VPS_LAYERS, "Output layer index of i-th OLS") ; + opts.addOptions() + ("TemporalFilter", m_gopBasedTemporalFilterEnabled, false, "Enable GOP based temporal filter. Disabled per default") + ("TemporalFilterFutureReference", m_gopBasedTemporalFilterFutureReference, true, "Enable referencing of future frames in the GOP based temporal filter. This is typically disabled for Low Delay configurations.") + ("TemporalFilterStrengthFrame*", m_gopBasedTemporalFilterStrengths, std::map<int, double>(), "Strength for every * frame in GOP based temporal filter, where * is an integer." + " E.g. --TemporalFilterStrengthFrame8 0.95 will enable GOP based temporal filter at every 8th frame with strength 0.95"); + #if EXTENSION_360_VIDEO TExt360AppEncCfg::TExt360AppEncCfgContext ext360CfgContext; m_ext360.addOptions(opts, ext360CfgContext); @@ -1316,19 +1396,96 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] ) cOSS<<"Frame"<<i; opts.addOptions()(cOSS.str(), m_GOPList[i-1], GOPEntry()); } + po::setDefaults(opts); po::ErrorReporter err; const list<const char*>& argv_unhandled = po::scanArgv(opts, argc, (const char**) argv, err); + m_rprEnabled = m_scalingRatioHor != 1.0 || m_scalingRatioVer != 1.0; + if( m_fractionOfFrames != 1.0 ) + { + m_framesToBeEncoded = int( m_framesToBeEncoded * m_fractionOfFrames ); + } + + if( m_rprEnabled && !m_switchPocPeriod ) + { + m_switchPocPeriod = m_iFrameRate / 2 / m_iGOPSize * m_iGOPSize; + } + m_bpDeltasGOPStructure = false; + if(m_iGOPSize == 16) + { + if ((m_GOPList[0].m_POC == 16 && m_GOPList[0].m_temporalId == 0 ) + && (m_GOPList[1].m_POC == 8 && m_GOPList[1].m_temporalId == 1 ) + && (m_GOPList[2].m_POC == 4 && m_GOPList[2].m_temporalId == 2 ) + && (m_GOPList[3].m_POC == 2 && m_GOPList[3].m_temporalId == 3 ) + && (m_GOPList[4].m_POC == 1 && m_GOPList[4].m_temporalId == 4 ) + && (m_GOPList[5].m_POC == 3 && m_GOPList[5].m_temporalId == 4 ) + && (m_GOPList[6].m_POC == 6 && m_GOPList[6].m_temporalId == 3 ) + && (m_GOPList[7].m_POC == 5 && m_GOPList[7].m_temporalId == 4 ) + && (m_GOPList[8].m_POC == 7 && m_GOPList[8].m_temporalId == 4 ) + && (m_GOPList[9].m_POC == 12 && m_GOPList[9].m_temporalId == 2 ) + && (m_GOPList[10].m_POC == 10 && m_GOPList[10].m_temporalId == 3 ) + && (m_GOPList[11].m_POC == 9 && m_GOPList[11].m_temporalId == 4 ) + && (m_GOPList[12].m_POC == 11 && m_GOPList[12].m_temporalId == 4 ) + && (m_GOPList[13].m_POC == 14 && m_GOPList[13].m_temporalId == 3 ) + && (m_GOPList[14].m_POC == 13 && m_GOPList[14].m_temporalId == 4 ) + && (m_GOPList[15].m_POC == 15 && m_GOPList[15].m_temporalId == 4 )) + { + m_bpDeltasGOPStructure = true; + } + } + else if(m_iGOPSize == 8) + { + if ((m_GOPList[0].m_POC == 8 && m_GOPList[0].m_temporalId == 0 ) + && (m_GOPList[1].m_POC == 4 && m_GOPList[1].m_temporalId == 1 ) + && (m_GOPList[2].m_POC == 2 && m_GOPList[2].m_temporalId == 2 ) + && (m_GOPList[3].m_POC == 1 && m_GOPList[3].m_temporalId == 3 ) + && (m_GOPList[4].m_POC == 3 && m_GOPList[4].m_temporalId == 3 ) + && (m_GOPList[5].m_POC == 6 && m_GOPList[5].m_temporalId == 2 ) + && (m_GOPList[6].m_POC == 5 && m_GOPList[6].m_temporalId == 3 ) + && (m_GOPList[7].m_POC == 7 && m_GOPList[7].m_temporalId == 3 )) + { + m_bpDeltasGOPStructure = true; + } + } + else + { + m_bpDeltasGOPStructure = false; + } + for (int i = 0; m_GOPList[i].m_POC != -1 && i < MAX_GOP + 1; i++) + { + m_RPLList0[i].m_POC = m_RPLList1[i].m_POC = m_GOPList[i].m_POC; + m_RPLList0[i].m_temporalId = m_RPLList1[i].m_temporalId = m_GOPList[i].m_temporalId; + m_RPLList0[i].m_refPic = m_RPLList1[i].m_refPic = m_GOPList[i].m_refPic; + m_RPLList0[i].m_sliceType = m_RPLList1[i].m_sliceType = m_GOPList[i].m_sliceType; + m_RPLList0[i].m_isEncoded = m_RPLList1[i].m_isEncoded = m_GOPList[i].m_isEncoded; + + m_RPLList0[i].m_numRefPicsActive = m_GOPList[i].m_numRefPicsActive0; + m_RPLList1[i].m_numRefPicsActive = m_GOPList[i].m_numRefPicsActive1; + m_RPLList0[i].m_numRefPics = m_GOPList[i].m_numRefPics0; + m_RPLList1[i].m_numRefPics = m_GOPList[i].m_numRefPics1; + m_RPLList0[i].m_ltrp_in_slice_header_flag = m_GOPList[i].m_ltrp_in_slice_header_flag; + m_RPLList1[i].m_ltrp_in_slice_header_flag = m_GOPList[i].m_ltrp_in_slice_header_flag; + for (int j = 0; j < m_GOPList[i].m_numRefPics0; j++) + m_RPLList0[i].m_deltaRefPics[j] = m_GOPList[i].m_deltaRefPics0[j]; + for (int j = 0; j < m_GOPList[i].m_numRefPics1; j++) + m_RPLList1[i].m_deltaRefPics[j] = m_GOPList[i].m_deltaRefPics1[j]; + } + if (m_compositeRefEnabled) { for (int i = 0; i < m_iGOPSize; i++) { m_GOPList[i].m_POC *= 2; - m_GOPList[i].m_deltaRPS *= 2; - for (int j = 0; j < m_GOPList[i].m_numRefPics; j++) + m_RPLList0[i].m_POC *= 2; + m_RPLList1[i].m_POC *= 2; + for (int j = 0; j < m_RPLList0[i].m_numRefPics; j++) { - m_GOPList[i].m_referencePics[j] *= 2; + m_RPLList0[i].m_deltaRefPics[j] *= 2; + } + for (int j = 0; j < m_RPLList1[i].m_numRefPics; j++) + { + m_RPLList1[i].m_deltaRefPics[j] *= 2; } } } @@ -1371,6 +1528,12 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] ) inputPathPrefix += "/"; } m_inputFileName = inputPathPrefix + m_inputFileName; + + if( m_temporalSubsampleRatio < 1) + { + EXIT ( "Error: TemporalSubsampleRatio must be greater than 0" ); + } + m_framesToBeEncoded = ( m_framesToBeEncoded + m_temporalSubsampleRatio - 1 ) / m_temporalSubsampleRatio; m_adIntraLambdaModifier = cfg_adIntraLambdaModifier.values; if(m_isField) @@ -1382,57 +1545,89 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] ) //number of fields to encode m_framesToBeEncoded *= 2; } - -#if HEVC_TILES_WPP - if( !m_tileUniformSpacingFlag && m_numTileColumnsMinus1 > 0 ) + if ( m_subPicPresentFlag ) { - if (cfg_ColumnWidth.values.size() > m_numTileColumnsMinus1) - { - EXIT( "Error: The number of columns whose width are defined is larger than the allowed number of columns." ); - } - else if (cfg_ColumnWidth.values.size() < m_numTileColumnsMinus1) + CHECK( m_numSubPics > 255 || m_numSubPics < 1, "Number of subpicture must be within 1 to 255" ); + m_subPicCtuTopLeftX = cfg_subPicCtuTopLeftX.values; + m_subPicCtuTopLeftY = cfg_subPicCtuTopLeftY.values; + m_subPicWidth = cfg_subPicWidth.values; + m_subPicHeight = cfg_subPicHeight.values; + m_subPicTreatedAsPicFlag = cfg_subPicTreatedAsPicFlag.values; + m_loopFilterAcrossSubpicEnabledFlag = cfg_loopFilterAcrossSubpicEnabledFlag.values; + m_subPicId = cfg_subPicId.values; + for(int i = 0; i < m_numSubPics; i++) { - EXIT( "Error: The width of some columns is not defined." ); + CHECK(m_subPicCtuTopLeftX[i] + m_subPicWidth[i] > (m_iSourceWidth + m_uiCTUSize - 1) / m_uiCTUSize, "subpicture must not exceed picture boundary"); + CHECK(m_subPicCtuTopLeftY[i] + m_subPicHeight[i] > (m_iSourceHeight + m_uiCTUSize - 1) / m_uiCTUSize, "subpicture must not exceed picture boundary"); } - else + if (m_subPicIdPresentFlag) { - m_tileColumnWidth.resize(m_numTileColumnsMinus1); - for(uint32_t i=0; i<cfg_ColumnWidth.values.size(); i++) + if (m_subPicIdSignallingPresentFlag) { - m_tileColumnWidth[i]=cfg_ColumnWidth.values[i]; + CHECK( m_subPicIdLen > 16, "sibpic ID length must not exceed 16 bits" ); } } } - else + if( m_picPartitionFlag ) { - m_tileColumnWidth.clear(); - } + // store tile column widths + m_tileColumnWidth.resize(cfgTileColumnWidth.values.size()); + for(uint32_t i=0; i<cfgTileColumnWidth.values.size(); i++) + { + m_tileColumnWidth[i]=cfgTileColumnWidth.values[i]; + } - if( !m_tileUniformSpacingFlag && m_numTileRowsMinus1 > 0 ) - { - if (cfg_RowHeight.values.size() > m_numTileRowsMinus1) + // store tile row heights + m_tileRowHeight.resize(cfgTileRowHeight.values.size()); + for(uint32_t i=0; i<cfgTileRowHeight.values.size(); i++) { - EXIT( "Error: The number of rows whose height are defined is larger than the allowed number of rows." ); + m_tileRowHeight[i]=cfgTileRowHeight.values[i]; } - else if (cfg_RowHeight.values.size() < m_numTileRowsMinus1) + + // store rectangular slice positions + if( !m_rasterSliceFlag ) { - EXIT( "Error: The height of some rows is not defined." ); + m_rectSlicePos.resize(cfgRectSlicePos.values.size()); + for(uint32_t i=0; i<cfgRectSlicePos.values.size(); i++) + { + m_rectSlicePos[i]=cfgRectSlicePos.values[i]; + } } - else + + // store raster-scan slice sizes + else { - m_tileRowHeight.resize(m_numTileRowsMinus1); - for(uint32_t i=0; i<cfg_RowHeight.values.size(); i++) + m_rasterSliceSize.resize(cfgRasterSliceSize.values.size()); + for(uint32_t i=0; i<cfgRasterSliceSize.values.size(); i++) { - m_tileRowHeight[i]=cfg_RowHeight.values[i]; + m_rasterSliceSize[i]=cfgRasterSliceSize.values[i]; } } } - else + else { + m_tileColumnWidth.clear(); m_tileRowHeight.clear(); + m_rectSlicePos.clear(); + m_rasterSliceSize.clear(); + m_rectSliceFixedWidth = 0; + m_rectSliceFixedHeight = 0; } -#endif + m_numSubProfile = (uint8_t) cfg_SubProfile.values.size(); + m_subProfile.resize(m_numSubProfile); + for (uint8_t i = 0; i < m_numSubProfile; ++i) + { + m_subProfile[i] = cfg_SubProfile.values[i]; + } + if (m_subPicPartitionFlag) + { + m_singleSlicePerSubPicFlag = false; + } + else + { + m_singleSlicePerSubPicFlag = true; + } /* rules for input, output and internal bitdepths as per help text */ if (m_MSBExtendedBitDepth[CHANNEL_TYPE_LUMA ] == 0) { @@ -1446,10 +1641,7 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] ) { m_internalBitDepth [CHANNEL_TYPE_LUMA ] = m_MSBExtendedBitDepth[CHANNEL_TYPE_LUMA ]; } - if (m_internalBitDepth [CHANNEL_TYPE_CHROMA] == 0) - { m_internalBitDepth [CHANNEL_TYPE_CHROMA] = m_internalBitDepth [CHANNEL_TYPE_LUMA ]; - } if (m_inputBitDepth [CHANNEL_TYPE_CHROMA] == 0) { m_inputBitDepth [CHANNEL_TYPE_CHROMA] = m_inputBitDepth [CHANNEL_TYPE_LUMA ]; @@ -1478,22 +1670,11 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] ) CHECK( tmpMotionEstimationSearchMethod < 0 || tmpMotionEstimationSearchMethod >= MESEARCH_NUMBER_OF_METHODS, "Error in cfg" ); m_motionEstimationSearchMethod=MESearchMethod(tmpMotionEstimationSearchMethod); - if (extendedProfile >= 1000 && extendedProfile <= 12316) + if (extendedProfile == ExtendedProfileName::AUTO) { - m_profile = Profile::MAINREXT; - if (m_bitDepthConstraint != 0 || tmpConstraintChromaFormat != 0) - { - EXIT( "Error: The bit depth and chroma format constraints are not used when an explicit RExt profile is specified"); - } - m_bitDepthConstraint = (extendedProfile%100); - m_intraConstraintFlag = ((extendedProfile%10000)>=2000); - m_onePictureOnlyConstraintFlag = (extendedProfile >= 10000); - switch ((extendedProfile/100)%10) + if (xAutoDetermineProfile()) { - case 0: tmpConstraintChromaFormat=400; break; - case 1: tmpConstraintChromaFormat=420; break; - case 2: tmpConstraintChromaFormat=422; break; - default: tmpConstraintChromaFormat=444; break; + EXIT( "Unable to determine profile from configured settings"); } } else @@ -1501,68 +1682,24 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] ) m_profile = Profile::Name(extendedProfile); } - if (m_profile == Profile::HIGHTHROUGHPUTREXT ) { + m_chromaFormatConstraint = (tmpConstraintChromaFormat == 0) ? m_chromaFormatIDC : numberToChromaFormat(tmpConstraintChromaFormat); if (m_bitDepthConstraint == 0) { - m_bitDepthConstraint = 16; - } - m_chromaFormatConstraint = (tmpConstraintChromaFormat == 0) ? CHROMA_444 : numberToChromaFormat(tmpConstraintChromaFormat); - } - else if (m_profile == Profile::MAINREXT) - { - if (m_bitDepthConstraint == 0 && tmpConstraintChromaFormat == 0) - { - // produce a valid combination, if possible. - const bool bUsingGeneralRExtTools = m_transformSkipRotationEnabledFlag || - m_transformSkipContextEnabledFlag || - m_rdpcmEnabledFlag[RDPCM_SIGNAL_IMPLICIT] || - m_rdpcmEnabledFlag[RDPCM_SIGNAL_EXPLICIT] || - !m_enableIntraReferenceSmoothing || - m_persistentRiceAdaptationEnabledFlag || - m_log2MaxTransformSkipBlockSize!=2; - const bool bUsingChromaQPAdjustment= m_cuChromaQpOffsetSubdiv >= 0; - const bool bUsingExtendedPrecision = m_extendedPrecisionProcessingFlag; - if (m_onePictureOnlyConstraintFlag) + if (m_profile == Profile::MAIN_10 || m_profile == Profile::MAIN_444_10) { - m_chromaFormatConstraint = CHROMA_444; - if (m_intraConstraintFlag != true) - { - EXIT( "Error: Intra constraint flag must be true when one_picture_only_constraint_flag is true"); - } - const int maxBitDepth = m_chromaFormatIDC==CHROMA_400 ? m_internalBitDepth[CHANNEL_TYPE_LUMA] : std::max(m_internalBitDepth[CHANNEL_TYPE_LUMA], m_internalBitDepth[CHANNEL_TYPE_CHROMA]); - m_bitDepthConstraint = maxBitDepth>8 ? 16:8; + m_bitDepthConstraint = 10; } - else + else // m_profile == Profile::NONE { - m_chromaFormatConstraint = NUM_CHROMA_FORMAT; - automaticallySelectRExtProfile(bUsingGeneralRExtTools, - bUsingChromaQPAdjustment, - bUsingExtendedPrecision, - m_intraConstraintFlag, - m_bitDepthConstraint, - m_chromaFormatConstraint, - m_chromaFormatIDC==CHROMA_400 ? m_internalBitDepth[CHANNEL_TYPE_LUMA] : std::max(m_internalBitDepth[CHANNEL_TYPE_LUMA], m_internalBitDepth[CHANNEL_TYPE_CHROMA]), - m_chromaFormatIDC); + m_bitDepthConstraint = 8+15; // max value - unconstrained. } } - else if (m_bitDepthConstraint == 0 || tmpConstraintChromaFormat == 0) - { - EXIT( "Error: The bit depth and chroma format constraints must either both be specified or both be configured automatically"); - } - else - { - m_chromaFormatConstraint = numberToChromaFormat(tmpConstraintChromaFormat); - } - } - else - { - m_chromaFormatConstraint = (tmpConstraintChromaFormat == 0) ? m_chromaFormatIDC : numberToChromaFormat(tmpConstraintChromaFormat); - m_bitDepthConstraint = ( ( m_profile == Profile::MAIN10 || m_profile == Profile::NEXT ) ? 10 : 8 ); } m_inputColourSpaceConvert = stringToInputColourSpaceConvert(inputColourSpaceConvert, true); + m_rgbFormat = (m_inputColourSpaceConvert == IPCOLOURSPACE_RGBtoGBR && m_chromaFormatIDC == CHROMA_444) ? true : false; switch (m_conformanceWindowMode) { @@ -1627,19 +1764,6 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] ) } } - if (tmpSliceMode<0 || tmpSliceMode>=int(NUMBER_OF_SLICE_CONSTRAINT_MODES)) - { - EXIT( "Error: bad slice mode"); - } - m_sliceMode = SliceConstraint(tmpSliceMode); - -#if HEVC_DEPENDENT_SLICES - if (tmpSliceSegmentMode<0 || tmpSliceSegmentMode>=int(NUMBER_OF_SLICE_CONSTRAINT_MODES)) - { - EXIT( "Error: bad slice segment mode"); - } - m_sliceSegmentMode = SliceConstraint(tmpSliceSegmentMode); -#endif if (tmpDecodedPictureHashSEIMappedType<0 || tmpDecodedPictureHashSEIMappedType>=int(NUMBER_OF_HASHTYPES)) { @@ -1726,12 +1850,70 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] ) } #endif + CHECK(cfg_qpInValCb.values.size() != cfg_qpOutValCb.values.size(), "Chroma QP table for Cb is incomplete."); + CHECK(cfg_qpInValCr.values.size() != cfg_qpOutValCr.values.size(), "Chroma QP table for Cr is incomplete."); + CHECK(cfg_qpInValCbCr.values.size() != cfg_qpOutValCbCr.values.size(), "Chroma QP table for CbCr is incomplete."); + if (m_useIdentityTableForNon420Chroma && m_chromaFormatIDC != CHROMA_420) + { + m_chromaQpMappingTableParams.m_sameCQPTableForAllChromaFlag = true; + cfg_qpInValCb.values = { 0 }; + cfg_qpInValCr.values = { 0 }; + cfg_qpInValCbCr.values = { 0 }; + cfg_qpOutValCb.values = { 0 }; + cfg_qpOutValCr.values = { 0 }; + cfg_qpOutValCbCr.values = { 0 }; + } + int qpBdOffsetC = 6 * (m_internalBitDepth[CHANNEL_TYPE_CHROMA] - 8); + m_chromaQpMappingTableParams.m_deltaQpInValMinus1[0].resize(cfg_qpInValCb.values.size()); + m_chromaQpMappingTableParams.m_deltaQpOutVal[0].resize(cfg_qpOutValCb.values.size()); + m_chromaQpMappingTableParams.m_numPtsInCQPTableMinus1[0] = (cfg_qpOutValCb.values.size() > 1) ? (int)cfg_qpOutValCb.values.size() - 2 : 0; + m_chromaQpMappingTableParams.m_qpTableStartMinus26[0] = (cfg_qpOutValCb.values.size() > 1) ? -26 + cfg_qpInValCb.values[0] : 0; + CHECK(m_chromaQpMappingTableParams.m_qpTableStartMinus26[0] < -26 - qpBdOffsetC || m_chromaQpMappingTableParams.m_qpTableStartMinus26[0] > 36, "qpTableStartMinus26[0] is out of valid range of -26 -qpBdOffsetC to 36, inclusive.") + CHECK(cfg_qpInValCb.values[0] != cfg_qpOutValCb.values[0], "First qpInValCb value should be equal to first qpOutValCb value"); + for (int i = 0; i < cfg_qpInValCb.values.size() - 1; i++) + { + CHECK(cfg_qpInValCb.values[i] < -qpBdOffsetC || cfg_qpInValCb.values[i] > MAX_QP, "Some entries cfg_qpInValCb are out of valid range of -qpBdOffsetC to 63, inclusive."); + CHECK(cfg_qpOutValCb.values[i] < -qpBdOffsetC || cfg_qpOutValCb.values[i] > MAX_QP, "Some entries cfg_qpOutValCb are out of valid range of -qpBdOffsetC to 63, inclusive."); + m_chromaQpMappingTableParams.m_deltaQpInValMinus1[0][i] = cfg_qpInValCb.values[i + 1] - cfg_qpInValCb.values[i] - 1; + m_chromaQpMappingTableParams.m_deltaQpOutVal[0][i] = cfg_qpOutValCb.values[i + 1] - cfg_qpOutValCb.values[i]; + } + if (!m_chromaQpMappingTableParams.m_sameCQPTableForAllChromaFlag) + { + m_chromaQpMappingTableParams.m_deltaQpInValMinus1[1].resize(cfg_qpInValCr.values.size()); + m_chromaQpMappingTableParams.m_deltaQpOutVal[1].resize(cfg_qpOutValCr.values.size()); + m_chromaQpMappingTableParams.m_numPtsInCQPTableMinus1[1] = (cfg_qpOutValCr.values.size() > 1) ? (int)cfg_qpOutValCr.values.size() - 2 : 0; + m_chromaQpMappingTableParams.m_qpTableStartMinus26[1] = (cfg_qpOutValCr.values.size() > 1) ? -26 + cfg_qpInValCr.values[0] : 0; + CHECK(m_chromaQpMappingTableParams.m_qpTableStartMinus26[1] < -26 - qpBdOffsetC || m_chromaQpMappingTableParams.m_qpTableStartMinus26[1] > 36, "qpTableStartMinus26[1] is out of valid range of -26 -qpBdOffsetC to 36, inclusive.") + CHECK(cfg_qpInValCr.values[0] != cfg_qpOutValCr.values[0], "First qpInValCr value should be equal to first qpOutValCr value"); + for (int i = 0; i < cfg_qpInValCr.values.size() - 1; i++) + { + CHECK(cfg_qpInValCr.values[i] < -qpBdOffsetC || cfg_qpInValCr.values[i] > MAX_QP, "Some entries cfg_qpInValCr are out of valid range of -qpBdOffsetC to 63, inclusive."); + CHECK(cfg_qpOutValCr.values[i] < -qpBdOffsetC || cfg_qpOutValCr.values[i] > MAX_QP, "Some entries cfg_qpOutValCr are out of valid range of -qpBdOffsetC to 63, inclusive."); + m_chromaQpMappingTableParams.m_deltaQpInValMinus1[1][i] = cfg_qpInValCr.values[i + 1] - cfg_qpInValCr.values[i] - 1; + m_chromaQpMappingTableParams.m_deltaQpOutVal[1][i] = cfg_qpOutValCr.values[i + 1] - cfg_qpOutValCr.values[i]; + } + m_chromaQpMappingTableParams.m_deltaQpInValMinus1[2].resize(cfg_qpInValCbCr.values.size()); + m_chromaQpMappingTableParams.m_deltaQpOutVal[2].resize(cfg_qpOutValCbCr.values.size()); + m_chromaQpMappingTableParams.m_numPtsInCQPTableMinus1[2] = (cfg_qpOutValCbCr.values.size() > 1) ? (int)cfg_qpOutValCbCr.values.size() - 2 : 0; + m_chromaQpMappingTableParams.m_qpTableStartMinus26[2] = (cfg_qpOutValCbCr.values.size() > 1) ? -26 + cfg_qpInValCbCr.values[0] : 0; + CHECK(m_chromaQpMappingTableParams.m_qpTableStartMinus26[2] < -26 - qpBdOffsetC || m_chromaQpMappingTableParams.m_qpTableStartMinus26[2] > 36, "qpTableStartMinus26[2] is out of valid range of -26 -qpBdOffsetC to 36, inclusive.") + CHECK(cfg_qpInValCbCr.values[0] != cfg_qpInValCbCr.values[0], "First qpInValCbCr value should be equal to first qpOutValCbCr value"); + for (int i = 0; i < cfg_qpInValCbCr.values.size() - 1; i++) + { + CHECK(cfg_qpInValCbCr.values[i] < -qpBdOffsetC || cfg_qpInValCbCr.values[i] > MAX_QP, "Some entries cfg_qpInValCbCr are out of valid range of -qpBdOffsetC to 63, inclusive."); + CHECK(cfg_qpOutValCbCr.values[i] < -qpBdOffsetC || cfg_qpOutValCbCr.values[i] > MAX_QP, "Some entries cfg_qpOutValCbCr are out of valid range of -qpBdOffsetC to 63, inclusive."); + m_chromaQpMappingTableParams.m_deltaQpInValMinus1[2][i] = cfg_qpInValCbCr.values[i + 1] - cfg_qpInValCbCr.values[i] - 1; + m_chromaQpMappingTableParams.m_deltaQpOutVal[2][i] = cfg_qpInValCbCr.values[i + 1] - cfg_qpInValCbCr.values[i]; + } + } + #if LUMA_ADAPTIVE_DEBLOCKING_FILTER_QP_OFFSET if ( m_LadfEnabed ) { CHECK( m_LadfNumIntervals != cfg_LadfQpOffset.values.size(), "size of LadfQpOffset must be equal to LadfNumIntervals"); CHECK( m_LadfNumIntervals - 1 != cfg_LadfIntervalLowerBound.values.size(), "size of LadfIntervalLowerBound must be equal to LadfNumIntervals - 1"); m_LadfQpOffset = cfg_LadfQpOffset.values; + m_LadfIntervalLowerBound[0] = 0; for (int k = 1; k < m_LadfNumIntervals; k++) { m_LadfIntervalLowerBound[k] = cfg_LadfIntervalLowerBound.values[k - 1]; @@ -1739,6 +1921,56 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] ) } #endif + +#if JVET_O0756_CONFIG_HDRMETRICS && !JVET_O0756_CALCULATE_HDRMETRICS + if ( m_calculateHdrMetrics == true) + { + printf ("Warning: Configuration enables HDR metric calculations. However, HDR metric support was not linked when compiling the VTM.\n"); + m_calculateHdrMetrics = false; + } +#endif + + if ( m_loopFilterAcrossVirtualBoundariesDisabledFlag ) + { + CHECK( m_numVerVirtualBoundaries > 3, "Number of vertical virtual boundaries must be comprised between 0 and 3 included" ); + CHECK( m_numHorVirtualBoundaries > 3, "Number of horizontal virtual boundaries must be comprised between 0 and 3 included" ); + CHECK( m_numVerVirtualBoundaries != cfg_virtualBoundariesPosX.values.size(), "Size of VirtualBoundariesPosX must be equal to NumVerVirtualBoundaries"); + CHECK( m_numHorVirtualBoundaries != cfg_virtualBoundariesPosY.values.size(), "Size of VirtualBoundariesPosY must be equal to NumHorVirtualBoundaries"); + m_virtualBoundariesPosX = cfg_virtualBoundariesPosX.values; + if (m_numVerVirtualBoundaries > 1) + { + sort(m_virtualBoundariesPosX.begin(), m_virtualBoundariesPosX.end()); + } + for (unsigned i = 0; i < m_numVerVirtualBoundaries; i++) + { + CHECK( m_virtualBoundariesPosX[i] == 0 || m_virtualBoundariesPosX[i] >= m_iSourceWidth, "The vertical virtual boundary must be within the picture" ); + CHECK( m_virtualBoundariesPosX[i] % 8, "The vertical virtual boundary must be a multiple of 8 luma samples" ); + if (i > 0) + { + CHECK( m_virtualBoundariesPosX[i] - m_virtualBoundariesPosX[i-1] < m_uiCTUSize, "The distance between any two vertical virtual boundaries shall be greater than or equal to the CTU size" ); + } + } + m_virtualBoundariesPosY = cfg_virtualBoundariesPosY.values; + if (m_numHorVirtualBoundaries > 1) + { + sort(m_virtualBoundariesPosY.begin(), m_virtualBoundariesPosY.end()); + } + for (unsigned i = 0; i < m_numHorVirtualBoundaries; i++) + { + CHECK( m_virtualBoundariesPosY[i] == 0 || m_virtualBoundariesPosY[i] >= m_iSourceHeight, "The horizontal virtual boundary must be within the picture" ); + CHECK( m_virtualBoundariesPosY[i] % 8, "The horizontal virtual boundary must be a multiple of 8 luma samples" ); + if (i > 0) + { + CHECK( m_virtualBoundariesPosY[i] - m_virtualBoundariesPosY[i-1] < m_uiCTUSize, "The distance between any two horizontal virtual boundaries shall be greater than or equal to the CTU size" ); + } + } + } + + if ( m_alf ) + { + CHECK( m_maxNumAlfAlternativesChroma < 1 || m_maxNumAlfAlternativesChroma > MAX_NUM_ALF_ALTERNATIVES_CHROMA, std::string("The maximum number of ALF Chroma filter alternatives must be in the range (1-") + std::to_string(MAX_NUM_ALF_ALTERNATIVES_CHROMA) + std::string (", inclusive)") ); + } + // reading external dQP description from file if ( !m_dQPFileName.empty() ) { @@ -1771,82 +2003,122 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] ) m_masteringDisplay.whitePoint[idx] = uint16_t((cfg_DisplayWhitePointCode.values.size() > idx) ? cfg_DisplayWhitePointCode.values[idx] : 0); } } - - if( m_toneMappingInfoSEIEnabled && !m_toneMapCancelFlag ) - { - if( m_toneMapModelId == 2 && !cfg_startOfCodedInterval.values.empty() ) - { - const uint32_t num = 1u<< m_toneMapTargetBitDepth; - m_startOfCodedInterval = new int[num]; - for(uint32_t i=0; i<num; i++) - { - m_startOfCodedInterval[i] = cfg_startOfCodedInterval.values.size() > i ? cfg_startOfCodedInterval.values[i] : 0; - } - } - else - { - m_startOfCodedInterval = NULL; - } - if( ( m_toneMapModelId == 3 ) && ( m_numPivots > 0 ) ) - { - if( !cfg_codedPivotValue.values.empty() && !cfg_targetPivotValue.values.empty() ) + if ( m_omniViewportSEIEnabled && !m_omniViewportSEICancelFlag ) + { + CHECK (!( m_omniViewportSEICntMinus1 >= 0 && m_omniViewportSEICntMinus1 < 16 ), "SEIOmniViewportCntMinus1 must be in the range of 0 to 16"); + m_omniViewportSEIAzimuthCentre.resize (m_omniViewportSEICntMinus1+1); + m_omniViewportSEIElevationCentre.resize(m_omniViewportSEICntMinus1+1); + m_omniViewportSEITiltCentre.resize (m_omniViewportSEICntMinus1+1); + m_omniViewportSEIHorRange.resize (m_omniViewportSEICntMinus1+1); + m_omniViewportSEIVerRange.resize (m_omniViewportSEICntMinus1+1); + for(int i=0; i<(m_omniViewportSEICntMinus1+1); i++) + { + m_omniViewportSEIAzimuthCentre[i] = cfg_omniViewportSEIAzimuthCentre .values.size() > i ? cfg_omniViewportSEIAzimuthCentre .values[i] : 0; + m_omniViewportSEIElevationCentre[i] = cfg_omniViewportSEIElevationCentre.values.size() > i ? cfg_omniViewportSEIElevationCentre.values[i] : 0; + m_omniViewportSEITiltCentre[i] = cfg_omniViewportSEITiltCentre .values.size() > i ? cfg_omniViewportSEITiltCentre .values[i] : 0; + m_omniViewportSEIHorRange[i] = cfg_omniViewportSEIHorRange .values.size() > i ? cfg_omniViewportSEIHorRange .values[i] : 0; + m_omniViewportSEIVerRange[i] = cfg_omniViewportSEIVerRange .values.size() > i ? cfg_omniViewportSEIVerRange .values[i] : 0; + } + } + + if(!m_rwpSEIRwpCancelFlag && m_rwpSEIEnabled) + { + CHECK (!( m_rwpSEINumPackedRegions > 0 && m_rwpSEINumPackedRegions <= std::numeric_limits<uint8_t>::max() ), "SEIRwpNumPackedRegions must be in the range of 1 to 255"); + CHECK (!(cfg_rwpSEIRwpTransformType.values.size() == m_rwpSEINumPackedRegions), "Number of must SEIRwpTransformType values be equal to SEIRwpNumPackedRegions"); + CHECK (!(cfg_rwpSEIRwpGuardBandFlag.values.size() == m_rwpSEINumPackedRegions), "Number of must SEIRwpGuardBandFlag values must be equal to SEIRwpNumPackedRegions"); + CHECK (!(cfg_rwpSEIProjRegionWidth.values.size() == m_rwpSEINumPackedRegions), "Number of must SEIProjRegionWidth values must be equal to SEIRwpNumPackedRegions"); + CHECK (!(cfg_rwpSEIProjRegionHeight.values.size() == m_rwpSEINumPackedRegions), "Number of must SEIProjRegionHeight values must be equal to SEIRwpNumPackedRegions"); + CHECK (!(cfg_rwpSEIRwpSEIProjRegionTop.values.size() == m_rwpSEINumPackedRegions), "Number of must SEIRwpSEIProjRegionTop values must be equal to SEIRwpNumPackedRegions"); + CHECK (!(cfg_rwpSEIProjRegionLeft.values.size() == m_rwpSEINumPackedRegions), "Number of must SEIProjRegionLeft values must be equal to SEIRwpNumPackedRegions"); + CHECK (!(cfg_rwpSEIPackedRegionWidth.values.size() == m_rwpSEINumPackedRegions), "Number of must SEIPackedRegionWidth values must be equal to SEIRwpNumPackedRegions"); + CHECK (!(cfg_rwpSEIPackedRegionHeight.values.size() == m_rwpSEINumPackedRegions), "Number of must SEIPackedRegionHeight values must be equal to SEIRwpNumPackedRegions"); + CHECK (!(cfg_rwpSEIPackedRegionTop.values.size() == m_rwpSEINumPackedRegions), "Number of must SEIPackedRegionTop values must be equal to SEIRwpNumPackedRegions"); + CHECK (!(cfg_rwpSEIPackedRegionLeft.values.size() == m_rwpSEINumPackedRegions), "Number of must SEIPackedRegionLeft values must be equal to SEIRwpNumPackedRegions"); + + m_rwpSEIRwpTransformType.resize(m_rwpSEINumPackedRegions); + m_rwpSEIRwpGuardBandFlag.resize(m_rwpSEINumPackedRegions); + m_rwpSEIProjRegionWidth.resize(m_rwpSEINumPackedRegions); + m_rwpSEIProjRegionHeight.resize(m_rwpSEINumPackedRegions); + m_rwpSEIRwpSEIProjRegionTop.resize(m_rwpSEINumPackedRegions); + m_rwpSEIProjRegionLeft.resize(m_rwpSEINumPackedRegions); + m_rwpSEIPackedRegionWidth.resize(m_rwpSEINumPackedRegions); + m_rwpSEIPackedRegionHeight.resize(m_rwpSEINumPackedRegions); + m_rwpSEIPackedRegionTop.resize(m_rwpSEINumPackedRegions); + m_rwpSEIPackedRegionLeft.resize(m_rwpSEINumPackedRegions); + m_rwpSEIRwpLeftGuardBandWidth.resize(m_rwpSEINumPackedRegions); + m_rwpSEIRwpRightGuardBandWidth.resize(m_rwpSEINumPackedRegions); + m_rwpSEIRwpTopGuardBandHeight.resize(m_rwpSEINumPackedRegions); + m_rwpSEIRwpBottomGuardBandHeight.resize(m_rwpSEINumPackedRegions); + m_rwpSEIRwpGuardBandNotUsedForPredFlag.resize(m_rwpSEINumPackedRegions); + m_rwpSEIRwpGuardBandType.resize(4*m_rwpSEINumPackedRegions); + for( int i=0; i < m_rwpSEINumPackedRegions; i++ ) + { + m_rwpSEIRwpTransformType[i] = cfg_rwpSEIRwpTransformType.values[i]; + CHECK (!( m_rwpSEIRwpTransformType[i] >= 0 && m_rwpSEIRwpTransformType[i] <= 7 ), "SEIRwpTransformType must be in the range of 0 to 7"); + m_rwpSEIRwpGuardBandFlag[i] = cfg_rwpSEIRwpGuardBandFlag.values[i]; + m_rwpSEIProjRegionWidth[i] = cfg_rwpSEIProjRegionWidth.values[i]; + m_rwpSEIProjRegionHeight[i] = cfg_rwpSEIProjRegionHeight.values[i]; + m_rwpSEIRwpSEIProjRegionTop[i] = cfg_rwpSEIRwpSEIProjRegionTop.values[i]; + m_rwpSEIProjRegionLeft[i] = cfg_rwpSEIProjRegionLeft.values[i]; + m_rwpSEIPackedRegionWidth[i] = cfg_rwpSEIPackedRegionWidth.values[i]; + m_rwpSEIPackedRegionHeight[i] = cfg_rwpSEIPackedRegionHeight.values[i]; + m_rwpSEIPackedRegionTop[i] = cfg_rwpSEIPackedRegionTop.values[i]; + m_rwpSEIPackedRegionLeft[i] = cfg_rwpSEIPackedRegionLeft.values[i]; + if( m_rwpSEIRwpGuardBandFlag[i] ) { - m_codedPivotValue = new int[m_numPivots]; - m_targetPivotValue = new int[m_numPivots]; - for(uint32_t i=0; i<m_numPivots; i++) + m_rwpSEIRwpLeftGuardBandWidth[i] = cfg_rwpSEIRwpLeftGuardBandWidth.values[i]; + m_rwpSEIRwpRightGuardBandWidth[i] = cfg_rwpSEIRwpRightGuardBandWidth.values[i]; + m_rwpSEIRwpTopGuardBandHeight[i] = cfg_rwpSEIRwpTopGuardBandHeight.values[i]; + m_rwpSEIRwpBottomGuardBandHeight[i] = cfg_rwpSEIRwpBottomGuardBandHeight.values[i]; + CHECK (! ( m_rwpSEIRwpLeftGuardBandWidth[i] > 0 || m_rwpSEIRwpRightGuardBandWidth[i] > 0 || m_rwpSEIRwpTopGuardBandHeight[i] >0 || m_rwpSEIRwpBottomGuardBandHeight[i] >0 ), "At least one of the RWP guard band parameters mut be greater than zero"); + m_rwpSEIRwpGuardBandNotUsedForPredFlag[i] = cfg_rwpSEIRwpGuardBandNotUsedForPredFlag.values[i]; + for( int j=0; j < 4; j++ ) { - m_codedPivotValue[i] = cfg_codedPivotValue.values.size() > i ? cfg_codedPivotValue.values [i] : 0; - m_targetPivotValue[i] = cfg_targetPivotValue.values.size() > i ? cfg_targetPivotValue.values[i] : 0; + m_rwpSEIRwpGuardBandType[i*4 + j] = cfg_rwpSEIRwpGuardBandType.values[i*4 + j]; } + } } - else - { - m_codedPivotValue = NULL; - m_targetPivotValue = NULL; - } } - - if( m_kneeSEIEnabled && !m_kneeSEICancelFlag ) + if (m_gcmpSEIEnabled && !m_gcmpSEICancelFlag) { - CHECK(!( m_kneeSEINumKneePointsMinus1 >= 0 && m_kneeSEINumKneePointsMinus1 < 999 ), "Inconsistent config"); - m_kneeSEIInputKneePoint = new int[m_kneeSEINumKneePointsMinus1+1]; - m_kneeSEIOutputKneePoint = new int[m_kneeSEINumKneePointsMinus1+1]; - for(int i=0; i<(m_kneeSEINumKneePointsMinus1+1); i++) + int numFace = m_gcmpSEIPackingType == 4 || m_gcmpSEIPackingType == 5 ? 5 : 6; + CHECK (!(cfg_gcmpSEIFaceIndex.values.size() == numFace), "Number of SEIGcmpFaceIndex must be equal to 5 when SEIGcmpPackingType is equal to 4 or 5, otherwise, it must be equal to 6"); + CHECK (!(cfg_gcmpSEIFaceRotation.values.size() == numFace), "Number of SEIGcmpFaceRotation must be equal to 5 when SEIGcmpPackingType is equal to 4 or 5, otherwise, it must be equal to 6"); + m_gcmpSEIFaceIndex.resize(numFace); + m_gcmpSEIFaceRotation.resize(numFace); + if (m_gcmpSEIMappingFunctionType == 2) { - m_kneeSEIInputKneePoint[i] = cfg_kneeSEIInputKneePointValue.values.size() > i ? cfg_kneeSEIInputKneePointValue.values[i] : 1; - m_kneeSEIOutputKneePoint[i] = cfg_kneeSEIOutputKneePointValue.values.size() > i ? cfg_kneeSEIOutputKneePointValue.values[i] : 0; + CHECK (!(cfg_gcmpSEIFunctionCoeffU.values.size() == numFace), "Number of SEIGcmpFunctionCoeffU must be equal to 5 when SEIGcmpPackingType is equal to 4 or 5, otherwise, it must be equal to 6"); + CHECK (!(cfg_gcmpSEIFunctionUAffectedByVFlag.values.size() == numFace), "Number of SEIGcmpFunctionUAffectedByVFlag must be equal to 5 when SEIGcmpPackingType is equal to 4 or 5, otherwise, it must be equal to 6"); + CHECK (!(cfg_gcmpSEIFunctionCoeffV.values.size() == numFace), "Number of SEIGcmpFunctionCoeffV must be equal to 5 when SEIGcmpPackingType is equal to 4 or 5, otherwise, it must be equal to 6"); + CHECK (!(cfg_gcmpSEIFunctionVAffectedByUFlag.values.size() == numFace), "Number of SEIGcmpFunctionVAffectedByUFlag must be equal to 5 when SEIGcmpPackingType is equal to 4 or 5, otherwise, it must be equal to 6"); + m_gcmpSEIFunctionCoeffU.resize(numFace); + m_gcmpSEIFunctionUAffectedByVFlag.resize(numFace); + m_gcmpSEIFunctionCoeffV.resize(numFace); + m_gcmpSEIFunctionVAffectedByUFlag.resize(numFace); } - } - - if(m_timeCodeSEIEnabled) - { - for(int i = 0; i < m_timeCodeSEINumTs && i < MAX_TIMECODE_SEI_SETS; i++) + for (int i = 0; i < numFace; i++) { - m_timeSetArray[i].clockTimeStampFlag = cfg_timeCodeSeiTimeStampFlag .values.size()>i ? cfg_timeCodeSeiTimeStampFlag .values [i] : false; - m_timeSetArray[i].numUnitFieldBasedFlag = cfg_timeCodeSeiNumUnitFieldBasedFlag.values.size()>i ? cfg_timeCodeSeiNumUnitFieldBasedFlag.values [i] : 0; - m_timeSetArray[i].countingType = cfg_timeCodeSeiCountingType .values.size()>i ? cfg_timeCodeSeiCountingType .values [i] : 0; - m_timeSetArray[i].fullTimeStampFlag = cfg_timeCodeSeiFullTimeStampFlag .values.size()>i ? cfg_timeCodeSeiFullTimeStampFlag .values [i] : 0; - m_timeSetArray[i].discontinuityFlag = cfg_timeCodeSeiDiscontinuityFlag .values.size()>i ? cfg_timeCodeSeiDiscontinuityFlag .values [i] : 0; - m_timeSetArray[i].cntDroppedFlag = cfg_timeCodeSeiCntDroppedFlag .values.size()>i ? cfg_timeCodeSeiCntDroppedFlag .values [i] : 0; - m_timeSetArray[i].numberOfFrames = cfg_timeCodeSeiNumberOfFrames .values.size()>i ? cfg_timeCodeSeiNumberOfFrames .values [i] : 0; - m_timeSetArray[i].secondsValue = cfg_timeCodeSeiSecondsValue .values.size()>i ? cfg_timeCodeSeiSecondsValue .values [i] : 0; - m_timeSetArray[i].minutesValue = cfg_timeCodeSeiMinutesValue .values.size()>i ? cfg_timeCodeSeiMinutesValue .values [i] : 0; - m_timeSetArray[i].hoursValue = cfg_timeCodeSeiHoursValue .values.size()>i ? cfg_timeCodeSeiHoursValue .values [i] : 0; - m_timeSetArray[i].secondsFlag = cfg_timeCodeSeiSecondsFlag .values.size()>i ? cfg_timeCodeSeiSecondsFlag .values [i] : 0; - m_timeSetArray[i].minutesFlag = cfg_timeCodeSeiMinutesFlag .values.size()>i ? cfg_timeCodeSeiMinutesFlag .values [i] : 0; - m_timeSetArray[i].hoursFlag = cfg_timeCodeSeiHoursFlag .values.size()>i ? cfg_timeCodeSeiHoursFlag .values [i] : 0; - m_timeSetArray[i].timeOffsetLength = cfg_timeCodeSeiTimeOffsetLength .values.size()>i ? cfg_timeCodeSeiTimeOffsetLength .values [i] : 0; - m_timeSetArray[i].timeOffsetValue = cfg_timeCodeSeiTimeOffsetValue .values.size()>i ? cfg_timeCodeSeiTimeOffsetValue .values [i] : 0; + m_gcmpSEIFaceIndex[i] = cfg_gcmpSEIFaceIndex.values[i]; + m_gcmpSEIFaceRotation[i] = cfg_gcmpSEIFaceRotation.values[i]; + if (m_gcmpSEIMappingFunctionType == 2) + { + m_gcmpSEIFunctionCoeffU[i] = cfg_gcmpSEIFunctionCoeffU.values[i]; + m_gcmpSEIFunctionUAffectedByVFlag[i] = cfg_gcmpSEIFunctionUAffectedByVFlag.values[i]; + m_gcmpSEIFunctionCoeffV[i] = cfg_gcmpSEIFunctionCoeffV.values[i]; + m_gcmpSEIFunctionVAffectedByUFlag[i] = cfg_gcmpSEIFunctionVAffectedByUFlag.values[i]; + } } } - m_reshapeCW.binCW.resize(3); m_reshapeCW.rspFps = m_iFrameRate; - m_reshapeCW.rspIntraPeriod = m_iIntraPeriod; m_reshapeCW.rspPicSize = m_iSourceWidth*m_iSourceHeight; m_reshapeCW.rspFpsToIp = std::max(16, 16 * (int)(round((double)m_iFrameRate /16.0))); m_reshapeCW.rspBaseQP = m_iQP; + m_reshapeCW.updateCtrl = m_updateCtrl; + m_reshapeCW.adpOption = m_adpOption; + m_reshapeCW.initialCW = m_initialCW; #if ENABLE_TRACING g_trace_ctx = tracing_init(sTracingFile, sTracingRule); if( bTracingChannelsList && g_trace_ctx ) @@ -1858,7 +2130,7 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] ) #endif #if ENABLE_QPA - if (m_bUsePerceptQPA && !m_bUseAdaptiveQP && m_dualTree && (m_cbQpOffsetDualTree != 0 || m_crQpOffsetDualTree != 0)) + if (m_bUsePerceptQPA && !m_bUseAdaptiveQP && m_dualTree && (m_cbQpOffsetDualTree != 0 || m_crQpOffsetDualTree != 0 || m_cbCrQpOffsetDualTree != 0)) { msg( WARNING, "*************************************************************************\n" ); msg( WARNING, "* WARNING: chroma QPA on, ignoring nonzero dual-tree chroma QP offsets! *\n" ); @@ -1871,11 +2143,10 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] ) #else if (((int)m_fQP < 38) && m_bUsePerceptQPA && !m_bUseAdaptiveQP && (m_iSourceWidth <= 2048) && (m_iSourceHeight <= 1280) #endif -#if MAX_TB_SIZE_SIGNALLING + #if WCG_EXT && ER_CHROMA_QP_WCG_PPS + && (!m_wcgChromaQpControl.enabled) + #endif && ((1 << (m_log2MaxTbSize + 1)) == m_uiCTUSize) && (m_iSourceWidth > 512 || m_iSourceHeight > 320)) -#else - && ((1 << (MAX_TB_LOG2_SIZEY + 1)) == m_uiCTUSize) && (m_iSourceWidth > 512 || m_iSourceHeight > 320)) -#endif { m_cuQpDeltaSubdiv = 2; } @@ -1891,13 +2162,18 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] ) msg( WARNING, "*************************************************************************\n" ); m_uiCTUSize = m_uiMaxCUWidth; -#if MAX_TB_SIZE_SIGNALLING if( ( 1u << m_log2MaxTbSize ) > m_uiCTUSize ) m_log2MaxTbSize--; -#endif } #endif #endif // ENABLE_QPA +#if JVET_AHG14_LOSSLESS_ENC_QP_FIX + if( m_costMode == COST_LOSSLESS_CODING ) + { + m_iQP = LOSSLESS_AND_MIXED_LOSSLESS_RD_COST_TEST_QP - ( ( m_internalBitDepth[CHANNEL_TYPE_LUMA] - 8 ) * 6 ); + } +#endif + const int minCuSize = 1 << MIN_CU_LOG2; m_uiMaxCodingDepth = 0; while( ( m_uiCTUSize >> m_uiMaxCodingDepth ) > minCuSize ) @@ -1926,6 +2202,34 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] ) // Private member functions // ==================================================================================================================== +///< auto determine the profile to use given the other configuration settings. Returns 1 if erred. Can select profile 'NONE' + +int EncAppCfg::xAutoDetermineProfile() +{ + const int maxBitDepth= std::max(m_internalBitDepth[CHANNEL_TYPE_LUMA], m_internalBitDepth[m_chromaFormatIDC==ChromaFormat::CHROMA_400 ? CHANNEL_TYPE_LUMA : CHANNEL_TYPE_CHROMA]); + m_profile=Profile::NONE; + + if (m_chromaFormatIDC==ChromaFormat::CHROMA_400 || m_chromaFormatIDC==ChromaFormat::CHROMA_420) + { + if (maxBitDepth<=10) + { + m_profile=Profile::MAIN_10; + } + } + else if (m_chromaFormatIDC==ChromaFormat::CHROMA_422 || m_chromaFormatIDC==ChromaFormat::CHROMA_444) + { + if (maxBitDepth<=10) + { + m_profile=Profile::MAIN_444_10; + } + } + else + { + return 1; // unknown chroma format + } + return 0; +} + bool EncAppCfg::xCheckParameter() { msg( NOTICE, "\n" ); @@ -1954,69 +2258,29 @@ bool EncAppCfg::xCheckParameter() #define xConfirmPara(a,b) check_failed |= confirmPara(a,b) - if( m_profile != Profile::NEXT ) + if( m_depQuantEnabledFlag ) { - THROW( "Next profile with an alternative partitioner has to be enabled if HEVC_USE_RQT is off!" ); -#if ENABLE_WPP_PARALLELISM - xConfirmPara( m_numWppThreads > 1, "WPP-style parallelization only supported with NEXT profile" ); -#endif - xConfirmPara( m_LMChroma, "LMChroma only allowed with NEXT profile" ); - xConfirmPara( m_ImvMode, "IMV is only allowed with NEXT profile" ); - xConfirmPara(m_IBCMode, "IBC Mode only allowed with NEXT profile"); - xConfirmPara( m_HashME, "Hash motion estimation only allowed with NEXT profile" ); - xConfirmPara( m_useFastLCTU, "Fast large CTU can only be applied when encoding with NEXT profile" ); - xConfirmPara( m_MTS, "MTS only allowed with NEXT profile" ); - xConfirmPara( m_MTSIntraMaxCand, "MTS only allowed with NEXT profile" ); - xConfirmPara( m_MTSInterMaxCand, "MTS only allowed with NEXT profile" ); - xConfirmPara( m_compositeRefEnabled, "Composite Reference Frame is only allowed with NEXT profile" ); - xConfirmPara( m_GBi, "GBi is only allowed with NEXT profile" ); - xConfirmPara( m_GBiFast, "GBiFast is only allowed with NEXT profile" ); - xConfirmPara( m_Triangle, "Triangle is only allowed with NEXT profile" ); - xConfirmPara(m_DMVR, "DMVR only allowed with NEXT profile"); - // ADD_NEW_TOOL : (parameter check) add a check for next tools here - } - else - { - if( m_depQuantEnabledFlag ) - { - xConfirmPara( !m_useRDOQ || !m_useRDOQTS, "RDOQ and RDOQTS must be equal to 1 if dependent quantization is enabled" ); -#if HEVC_USE_SIGN_HIDING - xConfirmPara( m_signDataHidingEnabledFlag, "SignHideFlag must be equal to 0 if dependent quantization is enabled" ); -#endif - } - + xConfirmPara( !m_useRDOQ || !m_useRDOQTS, "RDOQ and RDOQTS must be equal to 1 if dependent quantization is enabled" ); + xConfirmPara( m_signDataHidingEnabledFlag, "SignHideFlag must be equal to 0 if dependent quantization is enabled" ); } if( m_wrapAround ) { - xConfirmPara( m_wrapAroundOffset == 0, "Wrap-around offset must be greater than 0" ); + xConfirmPara( m_wrapAroundOffset <= m_uiCTUSize + (m_uiMaxCUWidth >> m_uiLog2DiffMaxMinCodingBlockSize), "Wrap-around offset must be greater than CtbSizeY + MinCbSize" ); xConfirmPara( m_wrapAroundOffset > m_iSourceWidth, "Wrap-around offset must not be greater than the source picture width" ); - xConfirmPara( m_wrapAroundOffset % SPS::getWinUnitX(m_chromaFormatIDC) != 0, "Wrap-around offset must be an integer multiple of the specified chroma subsampling" ); + int minCUSize = m_uiCTUSize >> m_uiLog2DiffMaxMinCodingBlockSize; + xConfirmPara( m_wrapAroundOffset % minCUSize != 0, "Wrap-around offset must be an integer multiple of the specified minimum CU size" ); } #if ENABLE_SPLIT_PARALLELISM xConfirmPara( m_numSplitThreads < 1, "Number of used threads cannot be smaller than 1" ); xConfirmPara( m_numSplitThreads > PARL_SPLIT_MAX_NUM_THREADS, "Number of used threads cannot be higher than the number of actual jobs" ); -#if _MSC_VER && ENABLE_WPP_PARALLELISM - xConfirmPara( m_numSplitThreads > 1 && m_numSplitThreads != NUM_SPLIT_THREADS_IF_MSVC, "Due to poor implementation by Microsoft, NumSplitThreads cannot be set dynamically on runtime!" ); -#endif #else xConfirmPara( m_numSplitThreads != 1, "ENABLE_SPLIT_PARALLELISM is disabled, numSplitThreads has to be 1" ); #endif -#if ENABLE_WPP_PARALLELISM - xConfirmPara( m_numWppThreads < 1, "Number of threads used for WPP-style parallelization cannot be smaller than 1" ); - xConfirmPara( m_numWppThreads > PARL_WPP_MAX_NUM_THREADS, "Number of threads used for WPP-style parallelization cannot be bigger than PARL_WPP_MAX_NUM_THREADS" ); - xConfirmPara( !m_ensureWppBitEqual && m_numWppThreads > 1, "WPP bit equality is implied when using WPP-style parallelism" ); -#if ENABLE_WPP_STATIC_LINK - xConfirmPara( m_numWppExtraLines != 0, "WPP-style extra lines out of range" ); -#else - xConfirmPara( m_numWppExtraLines < 0, "WPP-style extra lines out of range" ); -#endif -#else xConfirmPara( m_numWppThreads != 1, "ENABLE_WPP_PARALLELISM is disabled, numWppThreads has to be 1" ); xConfirmPara( m_ensureWppBitEqual, "ENABLE_WPP_PARALLELISM is disabled, cannot ensure being WPP bit-equal" ); -#endif #if SHARP_LUMA_DELTA_QP && ENABLE_QPA @@ -2036,76 +2300,22 @@ bool EncAppCfg::xCheckParameter() xConfirmPara(m_bitstreamFileName.empty(), "A bitstream file name must be specified (BitstreamFile)"); - const uint32_t maxBitDepth=(m_chromaFormatIDC==CHROMA_400) ? m_internalBitDepth[CHANNEL_TYPE_LUMA] : std::max(m_internalBitDepth[CHANNEL_TYPE_LUMA], m_internalBitDepth[CHANNEL_TYPE_CHROMA]); - xConfirmPara(m_bitDepthConstraint<maxBitDepth, "The internalBitDepth must not be greater than the bitDepthConstraint value"); - xConfirmPara(m_chromaFormatConstraint<m_chromaFormatIDC, "The chroma format used must not be greater than the chromaFormatConstraint value"); - - if (m_profile==Profile::MAINREXT || m_profile==Profile::HIGHTHROUGHPUTREXT) - { - xConfirmPara(m_lowerBitRateConstraintFlag==false && m_intraConstraintFlag==false, "The lowerBitRateConstraint flag cannot be false when intraConstraintFlag is false"); - xConfirmPara(m_cabacBypassAlignmentEnabledFlag && m_profile!=Profile::HIGHTHROUGHPUTREXT, "AlignCABACBeforeBypass must not be enabled unless the high throughput profile is being used."); - if (m_profile == Profile::MAINREXT) - { - const uint32_t intraIdx = m_intraConstraintFlag ? 1:0; - const uint32_t bitDepthIdx = (m_bitDepthConstraint == 8 ? 0 : (m_bitDepthConstraint ==10 ? 1 : (m_bitDepthConstraint == 12 ? 2 : (m_bitDepthConstraint == 16 ? 3 : 4 )))); - const uint32_t chromaFormatIdx = uint32_t(m_chromaFormatConstraint); - const bool bValidProfile = (bitDepthIdx > 3 || chromaFormatIdx>3) ? false : (validRExtProfileNames[intraIdx][bitDepthIdx][chromaFormatIdx] != NONE); - xConfirmPara(!bValidProfile, "Invalid intra constraint flag, bit depth constraint flag and chroma format constraint flag combination for a RExt profile"); - const bool bUsingGeneralRExtTools = m_transformSkipRotationEnabledFlag || - m_transformSkipContextEnabledFlag || - m_rdpcmEnabledFlag[RDPCM_SIGNAL_IMPLICIT] || - m_rdpcmEnabledFlag[RDPCM_SIGNAL_EXPLICIT] || - !m_enableIntraReferenceSmoothing || - m_persistentRiceAdaptationEnabledFlag || - m_log2MaxTransformSkipBlockSize!=2; - const bool bUsingChromaQPTool = m_cuChromaQpOffsetSubdiv >= 0; - const bool bUsingExtendedPrecision = m_extendedPrecisionProcessingFlag; - - xConfirmPara((m_chromaFormatConstraint==CHROMA_420 || m_chromaFormatConstraint==CHROMA_400) && bUsingChromaQPTool, "CU Chroma QP adjustment cannot be used for 4:0:0 or 4:2:0 RExt profiles"); - xConfirmPara(m_bitDepthConstraint != 16 && bUsingExtendedPrecision, "Extended precision can only be used in 16-bit RExt profiles"); - if (!(m_chromaFormatConstraint == CHROMA_400 && m_bitDepthConstraint == 16) && m_chromaFormatConstraint!=CHROMA_444) - { - xConfirmPara(bUsingGeneralRExtTools, "Combination of tools and profiles are not possible in the specified RExt profile."); - } - xConfirmPara( m_onePictureOnlyConstraintFlag && m_chromaFormatConstraint!=CHROMA_444, "chroma format constraint must be 4:4:4 when one-picture-only constraint flag is 1"); - xConfirmPara( m_onePictureOnlyConstraintFlag && m_bitDepthConstraint != 8 && m_bitDepthConstraint != 16, "bit depth constraint must be 8 or 16 when one-picture-only constraint flag is 1"); - xConfirmPara( m_onePictureOnlyConstraintFlag && m_framesToBeEncoded > 1, "Number of frames to be encoded must be 1 when one-picture-only constraint flag is 1."); - - if (!m_intraConstraintFlag && m_bitDepthConstraint==16 && m_chromaFormatConstraint==CHROMA_444) - { - msg( WARNING, "********************************************************************************************************\n"); - msg( WARNING, "** WARNING: The RExt constraint flags describe a non standard combination (used for development only) **\n"); - msg( WARNING, "********************************************************************************************************\n"); - } - } - else - { - xConfirmPara( m_chromaFormatConstraint != CHROMA_444, "chroma format constraint must be 4:4:4 in the High Throughput 4:4:4 16-bit Intra profile."); - xConfirmPara( m_bitDepthConstraint != 16, "bit depth constraint must be 4:4:4 in the High Throughput 4:4:4 16-bit Intra profile."); - xConfirmPara( m_intraConstraintFlag != 1, "intra constraint flag must be 1 in the High Throughput 4:4:4 16-bit Intra profile."); - } - } - else + xConfirmPara(m_internalBitDepth[CHANNEL_TYPE_CHROMA] != m_internalBitDepth[CHANNEL_TYPE_LUMA], "The internalBitDepth must be the same for luma and chroma"); + if (m_profile==Profile::MAIN_10 || m_profile==Profile::MAIN_444_10) { - xConfirmPara(m_bitDepthConstraint!=((m_profile==Profile::MAIN10 || m_profile==Profile::NEXT)?10:8), "BitDepthConstraint must be 8 for MAIN profile and 10 for MAIN10 profile."); - xConfirmPara(m_chromaFormatConstraint!=CHROMA_420 && m_profile!=Profile::NEXT, "ChromaFormatConstraint must be 420 for non main-RExt and non-Next profiles."); - xConfirmPara(m_intraConstraintFlag==true, "IntraConstraintFlag must be false for non main_RExt profiles."); - xConfirmPara(m_lowerBitRateConstraintFlag==false, "LowerBitrateConstraintFlag must be true for non main-RExt profiles."); - xConfirmPara(m_profile == Profile::MAINSTILLPICTURE && m_framesToBeEncoded > 1, "Number of frames to be encoded must be 1 when main still picture profile is used."); - - xConfirmPara(m_crossComponentPredictionEnabledFlag==true, "CrossComponentPrediction must not be used for non main-RExt profiles."); - xConfirmPara(m_log2MaxTransformSkipBlockSize>=6, "Transform Skip Log2 Max Size must be less or equal to 5."); - xConfirmPara(m_transformSkipRotationEnabledFlag==true, "UseResidualRotation must not be enabled for non main-RExt profiles."); - xConfirmPara(m_transformSkipContextEnabledFlag==true, "UseSingleSignificanceMapContext must not be enabled for non main-RExt profiles."); - xConfirmPara(m_rdpcmEnabledFlag[RDPCM_SIGNAL_IMPLICIT]==true, "ImplicitResidualDPCM must not be enabled for non main-RExt profiles."); - xConfirmPara(m_rdpcmEnabledFlag[RDPCM_SIGNAL_EXPLICIT]==true, "ExplicitResidualDPCM must not be enabled for non main-RExt profiles."); - xConfirmPara(m_persistentRiceAdaptationEnabledFlag==true, "GolombRiceParameterAdaption must not be enabled for non main-RExt profiles."); - xConfirmPara(m_extendedPrecisionProcessingFlag==true, "UseExtendedPrecision must not be enabled for non main-RExt profiles."); - xConfirmPara(m_highPrecisionOffsetsEnabledFlag==true, "UseHighPrecisionPredictionWeighting must not be enabled for non main-RExt profiles."); - xConfirmPara(m_enableIntraReferenceSmoothing==false, "EnableIntraReferenceSmoothing must be enabled for non main-RExt profiles."); - xConfirmPara(m_cabacBypassAlignmentEnabledFlag, "AlignCABACBeforeBypass cannot be enabled for non main-RExt profiles."); - } - xConfirmPara( m_chromaFormatIDC==CHROMA_422, "4:2:2 chroma sampling format not supported with current compiler setting. Set compiler flag \"ENABLE_CHROMA_422\" equal to 1 for enabling 4:2:2.\n\n" ); + xConfirmPara(m_crossComponentPredictionEnabledFlag==true, "CrossComponentPrediction must not be used for given profile."); + xConfirmPara(m_log2MaxTransformSkipBlockSize>=6, "Transform Skip Log2 Max Size must be less or equal to 5 for given profile."); + xConfirmPara(m_transformSkipRotationEnabledFlag==true, "UseResidualRotation must not be enabled for given profile."); + xConfirmPara(m_transformSkipContextEnabledFlag==true, "UseSingleSignificanceMapContext must not be enabled for given profile."); + xConfirmPara(m_rdpcmEnabledFlag[RDPCM_SIGNAL_IMPLICIT]==true, "ImplicitResidualDPCM must not be enabled for given profile."); + xConfirmPara(m_rdpcmEnabledFlag[RDPCM_SIGNAL_EXPLICIT]==true, "ExplicitResidualDPCM must not be enabled for given profile."); + xConfirmPara(m_persistentRiceAdaptationEnabledFlag==true, "GolombRiceParameterAdaption must not be enabled for given profile."); + xConfirmPara(m_extendedPrecisionProcessingFlag==true, "UseExtendedPrecision must not be enabled for given profile."); + xConfirmPara(m_highPrecisionOffsetsEnabledFlag==true, "UseHighPrecisionPredictionWeighting must not be enabled for given profile."); + xConfirmPara(m_enableIntraReferenceSmoothing==false, "EnableIntraReferenceSmoothing must be enabled for given profile."); + xConfirmPara(m_cabacBypassAlignmentEnabledFlag, "AlignCABACBeforeBypass cannot be enabled for given profile."); + } + // check range of parameters xConfirmPara( m_inputBitDepth[CHANNEL_TYPE_LUMA ] < 8, "InputBitDepth must be at least 8" ); @@ -2132,55 +2342,56 @@ bool EncAppCfg::xCheckParameter() xConfirmPara( (m_MSBExtendedBitDepth[CHANNEL_TYPE_CHROMA] < m_inputBitDepth[CHANNEL_TYPE_CHROMA]), "MSB-extended bit depth for chroma channel (--MSBExtendedBitDepthC) must be greater than or equal to input bit depth for chroma channel (--InputBitDepthC)" ); xConfirmPara( m_log2SaoOffsetScale[CHANNEL_TYPE_LUMA] > (m_internalBitDepth[CHANNEL_TYPE_LUMA ]<10?0:(m_internalBitDepth[CHANNEL_TYPE_LUMA ]-10)), "SaoLumaOffsetBitShift must be in the range of 0 to InternalBitDepth-10, inclusive"); - xConfirmPara( m_log2SaoOffsetScale[CHANNEL_TYPE_CHROMA] > (m_internalBitDepth[CHANNEL_TYPE_CHROMA]<10?0:(m_internalBitDepth[CHANNEL_TYPE_CHROMA]-10)), "SaoChromaOffsetBitShift must be in the range of 0 to InternalBitDepthC-10, inclusive"); + xConfirmPara( m_log2SaoOffsetScale[CHANNEL_TYPE_CHROMA] > (m_internalBitDepth[CHANNEL_TYPE_CHROMA]<10?0:(m_internalBitDepth[CHANNEL_TYPE_CHROMA]-10)), "SaoChromaOffsetBitShift must be in the range of 0 to InternalBitDepth-10, inclusive"); xConfirmPara( m_chromaFormatIDC >= NUM_CHROMA_FORMAT, "ChromaFormatIDC must be either 400, 420, 422 or 444" ); std::string sTempIPCSC="InputColourSpaceConvert must be empty, "+getListOfColourSpaceConverts(true); xConfirmPara( m_inputColourSpaceConvert >= NUMBER_INPUT_COLOUR_SPACE_CONVERSIONS, sTempIPCSC.c_str() ); xConfirmPara( m_InputChromaFormatIDC >= NUM_CHROMA_FORMAT, "InputChromaFormatIDC must be either 400, 420, 422 or 444" ); xConfirmPara( m_iFrameRate <= 0, "Frame rate must be more than 1" ); - xConfirmPara( m_temporalSubsampleRatio < 1, "Temporal subsample rate must be no less than 1" ); xConfirmPara( m_framesToBeEncoded <= 0, "Total Number Of Frames encoded must be more than 0" ); xConfirmPara( m_framesToBeEncoded < m_switchPOC, "debug POC out of range" ); xConfirmPara( m_iGOPSize < 1 , "GOP Size must be greater or equal to 1" ); xConfirmPara( m_iGOPSize > 1 && m_iGOPSize % 2, "GOP Size must be a multiple of 2, if GOP Size is greater than 1" ); xConfirmPara( (m_iIntraPeriod > 0 && m_iIntraPeriod < m_iGOPSize) || m_iIntraPeriod == 0, "Intra period must be more than GOP size, or -1 , not 0" ); + xConfirmPara( m_drapPeriod < 0, "DRAP period must be greater or equal to 0" ); xConfirmPara( m_iDecodingRefreshType < 0 || m_iDecodingRefreshType > 3, "Decoding Refresh Type must be comprised between 0 and 3 included" ); +#if HEVC_SEI if(m_iDecodingRefreshType == 3) { xConfirmPara( !m_recoveryPointSEIEnabled, "When using RecoveryPointSEI messages as RA points, recoveryPointSEI must be enabled" ); } +#endif if (m_isField) { - if (!m_pictureTimingSEIEnabled) + if (!m_frameFieldInfoSEIEnabled) { - msg( WARNING, "****************************************************************************\n"); - msg( WARNING, "** WARNING: Picture Timing SEI should be enabled for field coding! **\n"); - msg( WARNING, "****************************************************************************\n"); + msg( WARNING, "*************************************************************************************\n"); + msg( WARNING, "** WARNING: Frame field information SEI should be enabled for field coding! **\n"); + msg( WARNING, "*************************************************************************************\n"); } } - - if(m_crossComponentPredictionEnabledFlag && (m_chromaFormatIDC != CHROMA_444)) + if ( m_pictureTimingSEIEnabled && (!m_bufferingPeriodSEIEnabled)) { msg( WARNING, "****************************************************************************\n"); - msg( WARNING, "** WARNING: Cross-component prediction is specified for 4:4:4 format only **\n"); + msg( WARNING, "** WARNING: Picture Timing SEI requires Buffering Period SEI. Disabling. **\n"); msg( WARNING, "****************************************************************************\n"); - - m_crossComponentPredictionEnabledFlag = false; + m_pictureTimingSEIEnabled = false; } - if ( m_CUTransquantBypassFlagForce && m_bUseHADME ) + if(m_crossComponentPredictionEnabledFlag && (m_chromaFormatIDC != CHROMA_444)) { msg( WARNING, "****************************************************************************\n"); - msg( WARNING, "** WARNING: --HadamardME has been disabled due to the enabling of **\n"); - msg( WARNING, "** --CUTransquantBypassFlagForce **\n"); + msg( WARNING, "** WARNING: Cross-component prediction is specified for 4:4:4 format only **\n"); msg( WARNING, "****************************************************************************\n"); - m_bUseHADME = false; // this has been disabled so that the lambda is calculated slightly differently for lossless modes (as a result of JCTVC-R0104). + m_crossComponentPredictionEnabledFlag = false; } + xConfirmPara( m_bufferingPeriodSEIEnabled == true && m_RCCpbSize == 0, "RCCpbSize must be greater than zero, when buffering period SEI is enabled" ); + xConfirmPara (m_log2MaxTransformSkipBlockSize < 2, "Transform Skip Log2 Max Size must be at least 2 (4x4)"); @@ -2216,27 +2427,39 @@ bool EncAppCfg::xCheckParameter() #if SHARP_LUMA_DELTA_QP xConfirmPara( m_lumaLevelToDeltaQPMapping.mode && m_uiDeltaQpRD > 0, "Luma-level-based Delta QP cannot be used together with slice level multiple-QP optimization\n" ); #endif - if (m_lumaLevelToDeltaQPMapping.mode && m_lumaReshapeEnable) + if (m_lumaLevelToDeltaQPMapping.mode && m_lmcsEnabled) { - msg(WARNING, "For HDR-PQ, reshaper should be used mutual-exclusively with Luma-level-based Delta QP. If use luma DQP, turn reshaper off.\n"); - m_lumaReshapeEnable = false; + msg(WARNING, "For HDR-PQ, LMCS should be used mutual-exclusively with Luma-level-based Delta QP. If use LMCS, turn lumaDQP off.\n"); + m_lumaLevelToDeltaQPMapping.mode = LUMALVL_TO_DQP_DISABLED; } - if (!m_lumaReshapeEnable) + if (!m_lmcsEnabled) { m_reshapeSignalType = RESHAPE_SIGNAL_NULL; m_intraCMD = 0; } - if (m_lumaReshapeEnable && m_reshapeSignalType == RESHAPE_SIGNAL_PQ) + if (m_lmcsEnabled && m_reshapeSignalType == RESHAPE_SIGNAL_PQ) { m_intraCMD = 1; } - else if (m_lumaReshapeEnable && m_reshapeSignalType == RESHAPE_SIGNAL_SDR) + else if (m_lmcsEnabled && (m_reshapeSignalType == RESHAPE_SIGNAL_SDR || m_reshapeSignalType == RESHAPE_SIGNAL_HLG)) { m_intraCMD = 0; } else { - m_lumaReshapeEnable = false; + m_lmcsEnabled = false; + } + if (m_lmcsEnabled) + { + xConfirmPara(m_updateCtrl < 0, "Min. LMCS Update Control is 0"); + xConfirmPara(m_updateCtrl > 2, "Max. LMCS Update Control is 2"); + xConfirmPara(m_adpOption < 0, "Min. LMCS Adaptation Option is 0"); + xConfirmPara(m_adpOption > 4, "Max. LMCS Adaptation Option is 4"); + xConfirmPara(m_initialCW < 0, "Min. Initial Total Codeword is 0"); + xConfirmPara(m_initialCW > 1023, "Max. Initial Total Codeword is 1023"); + xConfirmPara(m_CSoffset < -7, "Min. LMCS Offset value is -7"); + xConfirmPara(m_CSoffset > 7, "Max. LMCS Offset value is 7"); + if (m_updateCtrl > 0 && m_adpOption > 2) { m_adpOption -= 2; } } xConfirmPara( m_cbQpOffset < -12, "Min. Chroma Cb QP Offset is -12" ); @@ -2247,7 +2470,20 @@ bool EncAppCfg::xCheckParameter() xConfirmPara( m_cbQpOffsetDualTree > 12, "Max. Chroma Cb QP Offset for dual tree is 12" ); xConfirmPara( m_crQpOffsetDualTree < -12, "Min. Chroma Cr QP Offset for dual tree is -12" ); xConfirmPara( m_crQpOffsetDualTree > 12, "Max. Chroma Cr QP Offset for dual tree is 12" ); - + if (m_JointCbCrMode && (m_chromaFormatIDC == CHROMA_400)) + { + msg( WARNING, "****************************************************************************\n"); + msg( WARNING, "** WARNING: --JointCbCr has been disabled because the chromaFormat is 400 **\n"); + msg( WARNING, "****************************************************************************\n"); + m_JointCbCrMode = false; + } + if (m_JointCbCrMode) + { + xConfirmPara( m_cbCrQpOffset < -12, "Min. Joint Cb-Cr QP Offset is -12"); + xConfirmPara( m_cbCrQpOffset > 12, "Max. Joint Cb-Cr QP Offset is 12"); + xConfirmPara( m_cbCrQpOffsetDualTree < -12, "Min. Joint Cb-Cr QP Offset for dual tree is -12"); + xConfirmPara( m_cbCrQpOffsetDualTree > 12, "Max. Joint Cb-Cr QP Offset for dual tree is 12"); + } xConfirmPara( m_iQPAdaptationRange <= 0, "QP Adaptation Range must be more than 0" ); if (m_iDecodingRefreshType == 2) { @@ -2258,70 +2494,46 @@ bool EncAppCfg::xCheckParameter() xConfirmPara( m_uiMinQT[0] < 1<<MIN_CU_LOG2, "Minimum QT size should be larger than or equal to 4"); xConfirmPara( m_uiMinQT[1] < 1<<MIN_CU_LOG2, "Minimum QT size should be larger than or equal to 4"); - xConfirmPara( m_uiCTUSize < 16, "Maximum partition width size should be larger than or equal to 16"); - xConfirmPara( m_uiCTUSize < 16, "Maximum partition height size should be larger than or equal to 16"); - xConfirmPara( (m_iSourceWidth % (1<<MIN_CU_LOG2))!=0, "Resulting coded frame width must be a multiple of the minimum unit size"); - xConfirmPara( (m_iSourceHeight % (1<<MIN_CU_LOG2))!=0, "Resulting coded frame height must be a multiple of the minimum unit size"); - xConfirmPara( (m_iSourceWidth % (1<<MIN_CU_LOG2))!=0, "Resulting coded frame width must be a multiple of the minimum unit size"); - xConfirmPara( (m_iSourceHeight % (1<<MIN_CU_LOG2))!=0, "Resulting coded frame height must be a multiple of the minimum unit size"); - xConfirmPara( (m_iSourceWidth % (1<<MIN_CU_LOG2))!=0, "Resulting coded frame width must be a multiple of the minimum unit size"); - xConfirmPara( (m_iSourceHeight % (1<<MIN_CU_LOG2))!=0, "Resulting coded frame height must be a multiple of the minimum unit size"); + xConfirmPara( m_uiCTUSize < 32, "CTUSize must be greater than or equal to 32"); + xConfirmPara( m_uiCTUSize > 128, "CTUSize must be less than or equal to 128"); + xConfirmPara( m_uiCTUSize != 32 && m_uiCTUSize != 64 && m_uiCTUSize != 128, "CTUSize must be a power of 2 (32, 64, or 128)"); xConfirmPara( m_uiMaxCUDepth < 1, "MaxPartitionDepth must be greater than zero"); xConfirmPara( (m_uiMaxCUWidth >> m_uiMaxCUDepth) < 4, "Minimum partition width size should be larger than or equal to 8"); xConfirmPara( (m_uiMaxCUHeight >> m_uiMaxCUDepth) < 4, "Minimum partition height size should be larger than or equal to 8"); xConfirmPara( m_uiMaxCUWidth < 16, "Maximum partition width size should be larger than or equal to 16"); xConfirmPara( m_uiMaxCUHeight < 16, "Maximum partition height size should be larger than or equal to 16"); - xConfirmPara( (m_iSourceWidth % (m_uiMaxCUWidth >> (m_uiMaxCUDepth-1)))!=0, "Resulting coded frame width must be a multiple of the minimum CU size"); - xConfirmPara( (m_iSourceHeight % (m_uiMaxCUHeight >> (m_uiMaxCUDepth-1)))!=0, "Resulting coded frame height must be a multiple of the minimum CU size"); - -#if MAX_TB_SIZE_SIGNALLING + xConfirmPara( (m_iSourceWidth % (std::max(8, int(m_uiMaxCUWidth >> (m_uiMaxCUDepth - 1))))) != 0, "Resulting coded frame width must be a multiple of Max(8, the minimum CU size)"); + xConfirmPara( (m_iSourceHeight % (std::max(8, int(m_uiMaxCUHeight >> (m_uiMaxCUDepth - 1))))) != 0, "Resulting coded frame height must be a multiple of Max(8, the minimum CU size)"); xConfirmPara( m_log2MaxTbSize > 6, "Log2MaxTbSize must be 6 or smaller." ); -#endif + xConfirmPara( m_log2MaxTbSize < 5, "Log2MaxTbSize must be 5 or greater." ); xConfirmPara( m_maxNumMergeCand < 1, "MaxNumMergeCand must be 1 or greater."); xConfirmPara( m_maxNumMergeCand > MRG_MAX_NUM_CANDS, "MaxNumMergeCand must be no more than MRG_MAX_NUM_CANDS." ); - + xConfirmPara( m_maxNumTriangleCand > TRIANGLE_MAX_NUM_UNI_CANDS, "MaxNumTriangleCand must be no more than TRIANGLE_MAX_NUM_UNI_CANDS." ); + xConfirmPara( m_maxNumTriangleCand > m_maxNumMergeCand, "MaxNumTriangleCand must be no more than MaxNumMergeCand." ); + xConfirmPara( 0 < m_maxNumTriangleCand && m_maxNumTriangleCand < 2, "MaxNumTriangleCand must be no less than 2 unless MaxNumTriangleCand is 0." ); + xConfirmPara( m_maxNumIBCMergeCand < 1, "MaxNumIBCMergeCand must be 1 or greater." ); + xConfirmPara( m_maxNumIBCMergeCand > IBC_MRG_MAX_NUM_CANDS, "MaxNumIBCMergeCand must be no more than IBC_MRG_MAX_NUM_CANDS." ); xConfirmPara( m_maxNumAffineMergeCand < 1, "MaxNumAffineMergeCand must be 1 or greater." ); xConfirmPara( m_maxNumAffineMergeCand > AFFINE_MRG_MAX_NUM_CANDS, "MaxNumAffineMergeCand must be no more than AFFINE_MRG_MAX_NUM_CANDS." ); if ( m_Affine == 0 ) { m_maxNumAffineMergeCand = m_SubPuMvpMode; + if (m_PROF) msg(WARNING, "PROF is forcefully disabled when Affine is off \n"); + m_PROF = false; } xConfirmPara( m_MTS < 0 || m_MTS > 3, "MTS must be greater than 0 smaller than 4" ); xConfirmPara( m_MTSIntraMaxCand < 0 || m_MTSIntraMaxCand > 5, "m_MTSIntraMaxCand must be greater than 0 and smaller than 6" ); xConfirmPara( m_MTSInterMaxCand < 0 || m_MTSInterMaxCand > 5, "m_MTSInterMaxCand must be greater than 0 and smaller than 6" ); xConfirmPara( m_MTS != 0 && m_MTSImplicit != 0, "Both explicit and implicit MTS cannot be enabled at the same time" ); - if( m_usePCM) - { - for (uint32_t channelType = 0; channelType < MAX_NUM_CHANNEL_TYPE; channelType++) - { - xConfirmPara(((m_MSBExtendedBitDepth[channelType] > m_internalBitDepth[channelType]) && m_bPCMInputBitDepthFlag), "PCM bit depth cannot be greater than internal bit depth (PCMInputBitDepthFlag cannot be used when InputBitDepth or MSBExtendedBitDepth > InternalBitDepth)"); - } - xConfirmPara( m_uiPCMLog2MinSize < 3, "PCMLog2MinSize must be 3 or greater."); - xConfirmPara( m_uiPCMLog2MinSize > 5, "PCMLog2MinSize must be 5 or smaller."); - xConfirmPara( m_pcmLog2MaxSize > 5, "PCMLog2MaxSize must be 5 or smaller."); - xConfirmPara( m_pcmLog2MaxSize < m_uiPCMLog2MinSize, "PCMLog2MaxSize must be equal to or greater than m_uiPCMLog2MinSize."); - } - if (m_sliceMode!=NO_SLICES) + if (m_useBDPCM) { - xConfirmPara( m_sliceArgument < 1 , "SliceArgument should be larger than or equal to 1" ); + xConfirmPara(!m_useTransformSkip, "BDPCM cannot be used when transform skip is disabled."); } -#if HEVC_DEPENDENT_SLICES - if (m_sliceSegmentMode!=NO_SLICES) - { - xConfirmPara( m_sliceSegmentArgument < 1 , "SliceSegmentArgument should be larger than or equal to 1" ); - } -#endif -#if HEVC_TILES_WPP - bool tileFlag = (m_numTileColumnsMinus1 > 0 || m_numTileRowsMinus1 > 0 ); - if (m_profile!=Profile::HIGHTHROUGHPUTREXT) - { - xConfirmPara( tileFlag && m_entropyCodingSyncEnabledFlag, "Tiles and entropy-coding-sync (Wavefronts) can not be applied together, except in the High Throughput Intra 4:4:4 16 profile"); - } -#endif + xConfirmPara( m_iSourceWidth % SPS::getWinUnitX(m_chromaFormatIDC) != 0, "Picture width must be an integer multiple of the specified chroma subsampling"); xConfirmPara( m_iSourceHeight % SPS::getWinUnitY(m_chromaFormatIDC) != 0, "Picture height must be an integer multiple of the specified chroma subsampling"); @@ -2334,15 +2546,6 @@ bool EncAppCfg::xCheckParameter() xConfirmPara( m_confWinTop % SPS::getWinUnitY(m_chromaFormatIDC) != 0, "Top conformance window offset must be an integer multiple of the specified chroma subsampling"); xConfirmPara( m_confWinBottom % SPS::getWinUnitY(m_chromaFormatIDC) != 0, "Bottom conformance window offset must be an integer multiple of the specified chroma subsampling"); - xConfirmPara( m_defaultDisplayWindowFlag && !m_vuiParametersPresentFlag, "VUI needs to be enabled for default display window"); - - if (m_defaultDisplayWindowFlag) - { - xConfirmPara( m_defDispWinLeftOffset % SPS::getWinUnitX(m_chromaFormatIDC) != 0, "Left default display window offset must be an integer multiple of the specified chroma subsampling"); - xConfirmPara( m_defDispWinRightOffset % SPS::getWinUnitX(m_chromaFormatIDC) != 0, "Right default display window offset must be an integer multiple of the specified chroma subsampling"); - xConfirmPara( m_defDispWinTopOffset % SPS::getWinUnitY(m_chromaFormatIDC) != 0, "Top default display window offset must be an integer multiple of the specified chroma subsampling"); - xConfirmPara( m_defDispWinBottomOffset % SPS::getWinUnitY(m_chromaFormatIDC) != 0, "Bottom default display window offset must be an integer multiple of the specified chroma subsampling"); - } // max CU width and height should be power of 2 uint32_t ui = m_uiMaxCUWidth; @@ -2373,7 +2576,11 @@ bool EncAppCfg::xCheckParameter() m_GOPList[0].m_betaOffsetDiv2 = 0; m_GOPList[0].m_tcOffsetDiv2 = 0; m_GOPList[0].m_POC = 1; - m_GOPList[0].m_numRefPicsActive = 4; + m_RPLList0[0] = RPLEntry(); + m_RPLList1[0] = RPLEntry(); + m_RPLList0[0].m_POC = m_RPLList1[0].m_POC = 1; + m_RPLList0[0].m_numRefPicsActive = 4; + m_GOPList[0].m_numRefPicsActive0 = 4; } else { @@ -2385,8 +2592,7 @@ bool EncAppCfg::xCheckParameter() bool errorGOP=false; int checkGOP=1; int numRefs = m_isField ? 2 : 1; - int refList[MAX_NUM_REF_PICS+1]; - refList[0]=0; + int refList[MAX_NUM_REF_PICS+1] = {0}; if(m_isField) { refList[1] = 1; @@ -2430,202 +2636,253 @@ bool EncAppCfg::xCheckParameter() xConfirmPara( abs(m_sliceChromaQpOffsetIntraOrPeriodic[1] + m_crQpOffset ) > 12, "Intra/periodic Cr QP Offset, when combined with the PPS Cr offset, exceeds supported range (-12 to 12)" ); #endif - m_extraRPSs=0; + xConfirmPara( m_fastLocalDualTreeMode < 0 || m_fastLocalDualTreeMode > 2, "FastLocalDualTreeMode must be in range [0..2]" ); + + int extraRPLs = 0; //start looping through frames in coding order until we can verify that the GOP structure is correct. - while(!verifiedGOP&&!errorGOP) + while (!verifiedGOP && !errorGOP) { - int curGOP = (checkGOP-1)%m_iGOPSize; - int curPOC = ((checkGOP - 1) / m_iGOPSize)*m_iGOPSize * multipleFactor + m_GOPList[curGOP].m_POC; - if(m_GOPList[curGOP].m_POC<0) + int curGOP = (checkGOP - 1) % m_iGOPSize; + int curPOC = ((checkGOP - 1) / m_iGOPSize)*m_iGOPSize * multipleFactor + m_RPLList0[curGOP].m_POC; + if (m_RPLList0[curGOP].m_POC < 0 || m_RPLList1[curGOP].m_POC < 0) { - msg( WARNING, "\nError: found fewer Reference Picture Sets than GOPSize\n"); - errorGOP=true; + msg(WARNING, "\nError: found fewer Reference Picture Sets than GOPSize\n"); + errorGOP = true; } else { //check that all reference pictures are available, or have a POC < 0 meaning they might be available in the next GOP. bool beforeI = false; - for(int i = 0; i< m_GOPList[curGOP].m_numRefPics; i++) + for (int i = 0; i< m_RPLList0[curGOP].m_numRefPics; i++) { - int absPOC = curPOC+m_GOPList[curGOP].m_referencePics[i]; - if(absPOC < 0) + int absPOC = curPOC - m_RPLList0[curGOP].m_deltaRefPics[i]; + if (absPOC < 0) { - beforeI=true; + beforeI = true; } else { - bool found=false; - for(int j=0; j<numRefs; j++) + bool found = false; + for (int j = 0; j<numRefs; j++) { - if(refList[j]==absPOC) + if (refList[j] == absPOC) { - found=true; - for(int k=0; k<m_iGOPSize; k++) + found = true; + for (int k = 0; k<m_iGOPSize; k++) { - if (absPOC % (m_iGOPSize * multipleFactor) == m_GOPList[k].m_POC % (m_iGOPSize * multipleFactor)) + if (absPOC % (m_iGOPSize * multipleFactor) == m_RPLList0[k].m_POC % (m_iGOPSize * multipleFactor)) { - if(m_GOPList[k].m_temporalId==m_GOPList[curGOP].m_temporalId) + if (m_RPLList0[k].m_temporalId == m_RPLList0[curGOP].m_temporalId) { - m_GOPList[k].m_refPic = true; + m_RPLList0[k].m_refPic = true; } - m_GOPList[curGOP].m_usedByCurrPic[i]=m_GOPList[k].m_temporalId<=m_GOPList[curGOP].m_temporalId; } } } } - if(!found) + if (!found) { - msg( WARNING, "\nError: ref pic %d is not available for GOP frame %d\n",m_GOPList[curGOP].m_referencePics[i],curGOP+1); - errorGOP=true; + msg(WARNING, "\nError: ref pic %d is not available for GOP frame %d\n", m_RPLList0[curGOP].m_deltaRefPics[i], curGOP + 1); + errorGOP = true; } } } - if(!beforeI&&!errorGOP) + if (!beforeI && !errorGOP) { //all ref frames were present - if(!isOK[curGOP]) + if (!isOK[curGOP]) { numOK++; - isOK[curGOP]=true; - if(numOK==m_iGOPSize) + isOK[curGOP] = true; + if (numOK == m_iGOPSize) { - verifiedGOP=true; + verifiedGOP = true; } } } else { - //create a new GOPEntry for this frame containing all the reference pictures that were available (POC > 0) - m_GOPList[m_iGOPSize+m_extraRPSs]=m_GOPList[curGOP]; - int newRefs=0; - for(int i = 0; i< m_GOPList[curGOP].m_numRefPics; i++) + //create a new RPLEntry for this frame containing all the reference pictures that were available (POC > 0) + m_RPLList0[m_iGOPSize + extraRPLs] = m_RPLList0[curGOP]; + m_RPLList1[m_iGOPSize + extraRPLs] = m_RPLList1[curGOP]; + int newRefs0 = 0; + for (int i = 0; i< m_RPLList0[curGOP].m_numRefPics; i++) { - int absPOC = curPOC+m_GOPList[curGOP].m_referencePics[i]; - if(absPOC>=0) + int absPOC = curPOC - m_RPLList0[curGOP].m_deltaRefPics[i]; + if (absPOC >= 0) { - m_GOPList[m_iGOPSize+m_extraRPSs].m_referencePics[newRefs]=m_GOPList[curGOP].m_referencePics[i]; - m_GOPList[m_iGOPSize+m_extraRPSs].m_usedByCurrPic[newRefs]=m_GOPList[curGOP].m_usedByCurrPic[i]; - newRefs++; + m_RPLList0[m_iGOPSize + extraRPLs].m_deltaRefPics[newRefs0] = m_RPLList0[curGOP].m_deltaRefPics[i]; + newRefs0++; } } - int numPrefRefs = m_GOPList[curGOP].m_numRefPicsActive; + int numPrefRefs0 = m_RPLList0[curGOP].m_numRefPicsActive; - for(int offset = -1; offset>-checkGOP; offset--) + int newRefs1 = 0; + for (int i = 0; i< m_RPLList1[curGOP].m_numRefPics; i++) + { + int absPOC = curPOC - m_RPLList1[curGOP].m_deltaRefPics[i]; + if (absPOC >= 0) + { + m_RPLList1[m_iGOPSize + extraRPLs].m_deltaRefPics[newRefs1] = m_RPLList1[curGOP].m_deltaRefPics[i]; + newRefs1++; + } + } + int numPrefRefs1 = m_RPLList1[curGOP].m_numRefPicsActive; + + for (int offset = -1; offset>-checkGOP; offset--) { //step backwards in coding order and include any extra available pictures we might find useful to replace the ones with POC < 0. - int offGOP = (checkGOP-1+offset)%m_iGOPSize; - int offPOC = ((checkGOP - 1 + offset) / m_iGOPSize)*(m_iGOPSize * multipleFactor) + m_GOPList[offGOP].m_POC; - if(offPOC>=0&&m_GOPList[offGOP].m_temporalId<=m_GOPList[curGOP].m_temporalId) + int offGOP = (checkGOP - 1 + offset) % m_iGOPSize; + int offPOC = ((checkGOP - 1 + offset) / m_iGOPSize)*(m_iGOPSize * multipleFactor) + m_RPLList0[offGOP].m_POC; + if (offPOC >= 0 && m_RPLList0[offGOP].m_temporalId <= m_RPLList0[curGOP].m_temporalId) { - bool newRef=false; - for(int i=0; i<numRefs; i++) + bool newRef = false; + for (int i = 0; i<(newRefs0 + newRefs1); i++) { - if(refList[i]==offPOC) + if (refList[i] == offPOC) { - newRef=true; + newRef = true; } } - for(int i=0; i<newRefs; i++) + for (int i = 0; i<newRefs0; i++) { - if(m_GOPList[m_iGOPSize+m_extraRPSs].m_referencePics[i]==offPOC-curPOC) + if (m_RPLList0[m_iGOPSize + extraRPLs].m_deltaRefPics[i] == curPOC - offPOC) { - newRef=false; + newRef = false; } } - if(newRef) + if (newRef) { - int insertPoint=newRefs; + int insertPoint = newRefs0; //this picture can be added, find appropriate place in list and insert it. - if(m_GOPList[offGOP].m_temporalId==m_GOPList[curGOP].m_temporalId) + if (m_RPLList0[offGOP].m_temporalId == m_RPLList0[curGOP].m_temporalId) { - m_GOPList[offGOP].m_refPic = true; + m_RPLList0[offGOP].m_refPic = true; } - for(int j=0; j<newRefs; j++) + for (int j = 0; j<newRefs0; j++) { - if(m_GOPList[m_iGOPSize+m_extraRPSs].m_referencePics[j]<offPOC-curPOC||m_GOPList[m_iGOPSize+m_extraRPSs].m_referencePics[j]>0) + if (m_RPLList0[m_iGOPSize + extraRPLs].m_deltaRefPics[j] > curPOC - offPOC && curPOC - offPOC > 0) { insertPoint = j; break; } } - int prev = offPOC-curPOC; - int prevUsed = m_GOPList[offGOP].m_temporalId<=m_GOPList[curGOP].m_temporalId; - for(int j=insertPoint; j<newRefs+1; j++) + int prev = curPOC - offPOC; + for (int j = insertPoint; j<newRefs0 + 1; j++) { - int newPrev = m_GOPList[m_iGOPSize+m_extraRPSs].m_referencePics[j]; - int newUsed = m_GOPList[m_iGOPSize+m_extraRPSs].m_usedByCurrPic[j]; - m_GOPList[m_iGOPSize+m_extraRPSs].m_referencePics[j]=prev; - m_GOPList[m_iGOPSize+m_extraRPSs].m_usedByCurrPic[j]=prevUsed; - prevUsed=newUsed; - prev=newPrev; + int newPrev = m_RPLList0[m_iGOPSize + extraRPLs].m_deltaRefPics[j]; + m_RPLList0[m_iGOPSize + extraRPLs].m_deltaRefPics[j] = prev; + prev = newPrev; } - newRefs++; + newRefs0++; } } - if(newRefs>=numPrefRefs) + if (newRefs0 >= numPrefRefs0) { break; } } - m_GOPList[m_iGOPSize+m_extraRPSs].m_numRefPics=newRefs; - m_GOPList[m_iGOPSize+m_extraRPSs].m_POC = curPOC; - if (m_extraRPSs == 0) - { - m_GOPList[m_iGOPSize+m_extraRPSs].m_interRPSPrediction = 0; - m_GOPList[m_iGOPSize+m_extraRPSs].m_numRefIdc = 0; - } - else + + for (int offset = -1; offset>-checkGOP; offset--) { - int rIdx = m_iGOPSize + m_extraRPSs - 1; - int refPOC = m_GOPList[rIdx].m_POC; - int refPics = m_GOPList[rIdx].m_numRefPics; - int newIdc=0; - for(int i = 0; i<= refPics; i++) + //step backwards in coding order and include any extra available pictures we might find useful to replace the ones with POC < 0. + int offGOP = (checkGOP - 1 + offset) % m_iGOPSize; + int offPOC = ((checkGOP - 1 + offset) / m_iGOPSize)*(m_iGOPSize * multipleFactor) + m_RPLList1[offGOP].m_POC; + if (offPOC >= 0 && m_RPLList1[offGOP].m_temporalId <= m_RPLList1[curGOP].m_temporalId) { - int deltaPOC = ((i != refPics)? m_GOPList[rIdx].m_referencePics[i] : 0); // check if the reference abs POC is >= 0 - int absPOCref = refPOC+deltaPOC; - int refIdc = 0; - for (int j = 0; j < m_GOPList[m_iGOPSize+m_extraRPSs].m_numRefPics; j++) + bool newRef = false; + for (int i = 0; i<(newRefs0 + newRefs1); i++) { - if ( (absPOCref - curPOC) == m_GOPList[m_iGOPSize+m_extraRPSs].m_referencePics[j]) + if (refList[i] == offPOC) { - if (m_GOPList[m_iGOPSize+m_extraRPSs].m_usedByCurrPic[j]) - { - refIdc = 1; - } - else + newRef = true; + } + } + for (int i = 0; i<newRefs1; i++) + { + if (m_RPLList1[m_iGOPSize + extraRPLs].m_deltaRefPics[i] == curPOC - offPOC) + { + newRef = false; + } + } + if (newRef) + { + int insertPoint = newRefs1; + //this picture can be added, find appropriate place in list and insert it. + if (m_RPLList1[offGOP].m_temporalId == m_RPLList1[curGOP].m_temporalId) + { + m_RPLList1[offGOP].m_refPic = true; + } + for (int j = 0; j<newRefs1; j++) + { + if (m_RPLList1[m_iGOPSize + extraRPLs].m_deltaRefPics[j] > curPOC - offPOC && curPOC - offPOC > 0) { - refIdc = 2; + insertPoint = j; + break; } } + int prev = curPOC - offPOC; + for (int j = insertPoint; j<newRefs1 + 1; j++) + { + int newPrev = m_RPLList1[m_iGOPSize + extraRPLs].m_deltaRefPics[j]; + m_RPLList1[m_iGOPSize + extraRPLs].m_deltaRefPics[j] = prev; + prev = newPrev; + } + newRefs1++; } - m_GOPList[m_iGOPSize+m_extraRPSs].m_refIdc[newIdc]=refIdc; - newIdc++; } - m_GOPList[m_iGOPSize+m_extraRPSs].m_interRPSPrediction = 1; - m_GOPList[m_iGOPSize+m_extraRPSs].m_numRefIdc = newIdc; - m_GOPList[m_iGOPSize+m_extraRPSs].m_deltaRPS = refPOC - m_GOPList[m_iGOPSize+m_extraRPSs].m_POC; + if (newRefs1 >= numPrefRefs1) + { + break; + } } - curGOP=m_iGOPSize+m_extraRPSs; - m_extraRPSs++; + + m_RPLList0[m_iGOPSize + extraRPLs].m_numRefPics = newRefs0; + m_RPLList0[m_iGOPSize + extraRPLs].m_numRefPicsActive = min(m_RPLList0[m_iGOPSize + extraRPLs].m_numRefPics, m_RPLList0[m_iGOPSize + extraRPLs].m_numRefPicsActive); + m_RPLList1[m_iGOPSize + extraRPLs].m_numRefPics = newRefs1; + m_RPLList1[m_iGOPSize + extraRPLs].m_numRefPicsActive = min(m_RPLList1[m_iGOPSize + extraRPLs].m_numRefPics, m_RPLList1[m_iGOPSize + extraRPLs].m_numRefPicsActive); + curGOP = m_iGOPSize + extraRPLs; + extraRPLs++; } - numRefs=0; - for(int i = 0; i< m_GOPList[curGOP].m_numRefPics; i++) + numRefs = 0; + for (int i = 0; i< m_RPLList0[curGOP].m_numRefPics; i++) { - int absPOC = curPOC+m_GOPList[curGOP].m_referencePics[i]; - if(absPOC >= 0) + int absPOC = curPOC - m_RPLList0[curGOP].m_deltaRefPics[i]; + if (absPOC >= 0) { - refList[numRefs]=absPOC; + refList[numRefs] = absPOC; numRefs++; } } - refList[numRefs]=curPOC; + for (int i = 0; i< m_RPLList1[curGOP].m_numRefPics; i++) + { + int absPOC = curPOC - m_RPLList1[curGOP].m_deltaRefPics[i]; + if (absPOC >= 0) + { + bool alreadyExist = false; + for (int j = 0; !alreadyExist && j < numRefs; j++) + { + if (refList[j] == absPOC) + { + alreadyExist = true; + } + } + if (!alreadyExist) + { + refList[numRefs] = absPOC; + numRefs++; + } + } + } + refList[numRefs] = curPOC; numRefs++; } checkGOP++; } - xConfirmPara(errorGOP,"Invalid GOP structure given"); + xConfirmPara(errorGOP, "Invalid GOP structure given"); + m_maxTempLayer = 1; + for(int i=0; i<m_iGOPSize; i++) { if(m_GOPList[i].m_temporalId >= m_maxTempLayer) @@ -2641,9 +2898,19 @@ bool EncAppCfg::xCheckParameter() } for(int i=0; i<m_iGOPSize; i++) { - if(m_GOPList[i].m_numRefPics+1 > m_maxDecPicBuffering[m_GOPList[i].m_temporalId]) + int numRefPic = m_RPLList0[i].m_numRefPics; + for (int tmp = 0; tmp < m_RPLList1[i].m_numRefPics; tmp++) + { + bool notSame = true; + for (int jj = 0; notSame && jj < m_RPLList0[i].m_numRefPics; jj++) + { + if (m_RPLList1[i].m_deltaRefPics[tmp] == m_RPLList0[i].m_deltaRefPics[jj]) notSame = false; + } + if (notSame) numRefPic++; + } + if (numRefPic + 1 > m_maxDecPicBuffering[m_GOPList[i].m_temporalId]) { - m_maxDecPicBuffering[m_GOPList[i].m_temporalId] = m_GOPList[i].m_numRefPics + 1; + m_maxDecPicBuffering[m_GOPList[i].m_temporalId] = numRefPic + 1; } int highestDecodingNumberWithLowerPOC = 0; for(int j=0; j<m_iGOPSize; j++) @@ -2667,6 +2934,7 @@ bool EncAppCfg::xCheckParameter() m_numReorderPics[m_GOPList[i].m_temporalId] = numReorder; } } + for(int i=0; i<MAX_TLAYER-1; i++) { // a lower layer can not have higher value of m_numReorderPics than a higher layer @@ -2692,91 +2960,267 @@ bool EncAppCfg::xCheckParameter() m_maxDecPicBuffering[MAX_TLAYER-1] = m_numReorderPics[MAX_TLAYER-1] + 1; } - if(m_vuiParametersPresentFlag && m_bitstreamRestrictionFlag) + if( m_picPartitionFlag ) { - int PicSizeInSamplesY = m_iSourceWidth * m_iSourceHeight; -#if HEVC_TILES_WPP - if(tileFlag) + PPS pps; + uint32_t colIdx, rowIdx; + uint32_t remSize; + + pps.setPicWidthInLumaSamples( m_iSourceWidth ); + pps.setPicHeightInLumaSamples( m_iSourceHeight ); + pps.setLog2CtuSize( floorLog2(m_uiCTUSize) ); + + // set default tile column if not provided + if( m_tileColumnWidth.size() == 0 ) + { + m_tileColumnWidth.push_back( pps.getPicWidthInCtu() ); + } + // set default tile row if not provided + if( m_tileRowHeight.size() == 0 ) + { + m_tileRowHeight.push_back( pps.getPicHeightInCtu() ); + } + + // remove any tile columns that can be specified implicitly + while( m_tileColumnWidth.size() > 1 && m_tileColumnWidth.end()[-1] == m_tileColumnWidth.end()[-2] ) + { + m_tileColumnWidth.pop_back(); + } + + // remove any tile rows that can be specified implicitly + while( m_tileRowHeight.size() > 1 && m_tileRowHeight.end()[-1] == m_tileRowHeight.end()[-2] ) + { + m_tileRowHeight.pop_back(); + } + + // setup tiles in temporary PPS structure + remSize = pps.getPicWidthInCtu(); + for( colIdx=0; remSize > 0 && colIdx<m_tileColumnWidth.size(); colIdx++ ) { - int maxTileWidth = 0; - int maxTileHeight = 0; - int widthInCU = (m_iSourceWidth % m_uiMaxCUWidth) ? m_iSourceWidth/m_uiMaxCUWidth + 1: m_iSourceWidth/m_uiMaxCUWidth; - int heightInCU = (m_iSourceHeight % m_uiMaxCUHeight) ? m_iSourceHeight/m_uiMaxCUHeight + 1: m_iSourceHeight/m_uiMaxCUHeight; - if(m_tileUniformSpacingFlag) + xConfirmPara(m_tileColumnWidth[ colIdx ] == 0, "Tile column widths cannot be equal to 0"); + m_tileColumnWidth[ colIdx ] = std::min( remSize, m_tileColumnWidth[ colIdx ]); + pps.addTileColumnWidth( m_tileColumnWidth[ colIdx ] ); + remSize -= m_tileColumnWidth[ colIdx ]; + } + m_tileColumnWidth.resize( colIdx ); + pps.setNumExpTileColumns( (uint32_t)m_tileColumnWidth.size() ); + remSize = pps.getPicHeightInCtu(); + for( rowIdx=0; remSize > 0 && rowIdx<m_tileRowHeight.size(); rowIdx++ ) + { + xConfirmPara(m_tileRowHeight[ rowIdx ] == 0, "Tile row heights cannot be equal to 0"); + m_tileRowHeight[ rowIdx ] = std::min( remSize, m_tileRowHeight[ rowIdx ]); + pps.addTileRowHeight( m_tileRowHeight[ rowIdx ] ); + remSize -= m_tileRowHeight[ rowIdx ]; + } + m_tileRowHeight.resize( rowIdx ); + pps.setNumExpTileRows( (uint32_t)m_tileRowHeight.size() ); + pps.initTiles(); + xConfirmPara(pps.getNumTileColumns() > getMaxTileColsByLevel( m_level ), "Number of tile columns exceeds maximum number allowed according to specified level"); + xConfirmPara(pps.getNumTileRows() > getMaxTileRowsByLevel( m_level ), "Number of tile rows exceeds maximum number allowed according to specified level"); + m_numTileCols = pps.getNumTileColumns(); + m_numTileRows = pps.getNumTileRows(); + + // rectangular slices + if( !m_rasterSliceFlag ) + { + uint32_t sliceIdx; + bool needTileIdxDelta = false; + + // generate slice list for the simplified fixed-rectangular-slice-size config option + if( m_rectSliceFixedWidth > 0 && m_rectSliceFixedHeight > 0 ) { - maxTileWidth = m_uiMaxCUWidth*((widthInCU+m_numTileColumnsMinus1)/(m_numTileColumnsMinus1+1)); - maxTileHeight = m_uiMaxCUHeight*((heightInCU+m_numTileRowsMinus1)/(m_numTileRowsMinus1+1)); - // if only the last tile-row is one treeblock higher than the others - // the maxTileHeight becomes smaller if the last row of treeblocks has lower height than the others - if(!((heightInCU-1)%(m_numTileRowsMinus1+1))) - { - maxTileHeight = maxTileHeight - m_uiMaxCUHeight + (m_iSourceHeight % m_uiMaxCUHeight); - } - // if only the last tile-column is one treeblock wider than the others - // the maxTileWidth becomes smaller if the last column of treeblocks has lower width than the others - if(!((widthInCU-1)%(m_numTileColumnsMinus1+1))) + int tileIdx = 0; + m_rectSlicePos.clear(); + while( tileIdx < pps.getNumTiles() ) { - maxTileWidth = maxTileWidth - m_uiMaxCUWidth + (m_iSourceWidth % m_uiMaxCUWidth); + uint32_t startTileX = tileIdx % pps.getNumTileColumns(); + uint32_t startTileY = tileIdx / pps.getNumTileColumns(); + uint32_t startCtuX = pps.getTileColumnBd( startTileX ); + uint32_t startCtuY = pps.getTileRowBd( startTileY ); + uint32_t stopCtuX = (startTileX + m_rectSliceFixedWidth) >= pps.getNumTileColumns() ? pps.getPicWidthInCtu() - 1 : pps.getTileColumnBd( startTileX + m_rectSliceFixedWidth ) - 1; + uint32_t stopCtuY = (startTileY + m_rectSliceFixedHeight) >= pps.getNumTileRows() ? pps.getPicHeightInCtu() - 1 : pps.getTileRowBd( startTileY + m_rectSliceFixedHeight ) - 1; + uint32_t stopTileX = pps.ctuToTileCol( stopCtuX ); + uint32_t stopTileY = pps.ctuToTileRow( stopCtuY ); + + // add rectangular slice to list + m_rectSlicePos.push_back( startCtuY * pps.getPicWidthInCtu() + startCtuX ); + m_rectSlicePos.push_back( stopCtuY * pps.getPicWidthInCtu() + stopCtuX ); + + // get slice size in tiles + uint32_t sliceWidth = stopTileX - startTileX + 1; + uint32_t sliceHeight = stopTileY - startTileY + 1; + + // move to next tile in raster scan order + tileIdx += sliceWidth; + if( tileIdx % pps.getNumTileColumns() == 0 ) + { + tileIdx += (sliceHeight - 1) * pps.getNumTileColumns(); + } } } - else // not uniform spacing + + xConfirmPara( m_rectSlicePos.size() & 1, "Odd number of rectangular slice positions provided. Rectangular slice positions must be specified in pairs of (top-left / bottom-right) raster-scan CTU addresses."); + + // set default slice size if not provided + if( m_rectSlicePos.size() == 0 ) { - if(m_numTileColumnsMinus1<1) + m_rectSlicePos.push_back( 0 ); + m_rectSlicePos.push_back( pps.getPicWidthInCtu() * pps.getPicHeightInCtu() - 1 ); + } + pps.setNumSlicesInPic( (uint32_t)(m_rectSlicePos.size() >> 1) ); + xConfirmPara(pps.getNumSlicesInPic() > getMaxSlicesByLevel( m_level ), "Number of rectangular slices exceeds maximum number allowed according to specified level"); + pps.initRectSlices(); + + // set slice parameters from CTU addresses + for( sliceIdx = 0; sliceIdx < pps.getNumSlicesInPic(); sliceIdx++ ) + { + xConfirmPara( m_rectSlicePos[2*sliceIdx] >= pps.getPicWidthInCtu() * pps.getPicHeightInCtu(), "Rectangular slice position exceeds total number of CTU in picture."); + xConfirmPara( m_rectSlicePos[2*sliceIdx + 1] >= pps.getPicWidthInCtu() * pps.getPicHeightInCtu(), "Rectangular slice position exceeds total number of CTU in picture."); + + // map raster scan CTU address to X/Y position + uint32_t startCtuX = m_rectSlicePos[2*sliceIdx] % pps.getPicWidthInCtu(); + uint32_t startCtuY = m_rectSlicePos[2*sliceIdx] / pps.getPicWidthInCtu(); + uint32_t stopCtuX = m_rectSlicePos[2*sliceIdx + 1] % pps.getPicWidthInCtu(); + uint32_t stopCtuY = m_rectSlicePos[2*sliceIdx + 1] / pps.getPicWidthInCtu(); + + // get corresponding tile index + uint32_t startTileX = pps.ctuToTileCol( startCtuX ); + uint32_t startTileY = pps.ctuToTileRow( startCtuY ); + uint32_t stopTileX = pps.ctuToTileCol( stopCtuX ); + uint32_t stopTileY = pps.ctuToTileRow( stopCtuY ); + uint32_t tileIdx = startTileY * pps.getNumTileColumns() + startTileX; + + // get slice size in tiles + uint32_t sliceWidth = stopTileX - startTileX + 1; + uint32_t sliceHeight = stopTileY - startTileY + 1; + + // check for slice / tile alignment + xConfirmPara( startCtuX != pps.getTileColumnBd( startTileX ), "Rectangular slice position does not align with a left tile edge."); + xConfirmPara( stopCtuX != (pps.getTileColumnBd( stopTileX + 1 ) - 1), "Rectangular slice position does not align with a right tile edge."); + if( sliceWidth > 1 || sliceHeight > 1 ) { - maxTileWidth = m_iSourceWidth; + xConfirmPara( startCtuY != pps.getTileRowBd( startTileY ), "Rectangular slice position does not align with a top tile edge."); + xConfirmPara( stopCtuY != (pps.getTileRowBd( stopTileY + 1 ) - 1), "Rectangular slice position does not align with a bottom tile edge."); } - else + + // set slice size and tile index + pps.setSliceWidthInTiles( sliceIdx, sliceWidth ); + pps.setSliceHeightInTiles( sliceIdx, sliceHeight ); + pps.setSliceTileIdx( sliceIdx, tileIdx ); + if( sliceIdx > 0 && !needTileIdxDelta ) { - int accColumnWidth = 0; - for(int col=0; col<(m_numTileColumnsMinus1); col++) + uint32_t lastTileIdx = pps.getSliceTileIdx( sliceIdx-1 ); + lastTileIdx += pps.getSliceWidthInTiles( sliceIdx-1 ); + if( lastTileIdx % pps.getNumTileColumns() == 0) { - maxTileWidth = m_tileColumnWidth[col]>maxTileWidth ? m_tileColumnWidth[col]:maxTileWidth; - accColumnWidth += m_tileColumnWidth[col]; + lastTileIdx += (pps.getSliceHeightInTiles( sliceIdx-1 ) - 1) * pps.getNumTileColumns(); + } + if( lastTileIdx != tileIdx ) + { + needTileIdxDelta = true; } - maxTileWidth = (widthInCU-accColumnWidth)>maxTileWidth ? m_uiMaxCUWidth*(widthInCU-accColumnWidth):m_uiMaxCUWidth*maxTileWidth; - } - if(m_numTileRowsMinus1<1) - { - maxTileHeight = m_iSourceHeight; } - else + + // special case for multiple slices within a single tile + if( sliceWidth == 1 && sliceHeight == 1 ) { - int accRowHeight = 0; - for(int row=0; row<(m_numTileRowsMinus1); row++) + uint32_t firstSliceIdx = sliceIdx; + uint32_t numSlicesInTile = 1; + pps.setSliceHeightInCtu( sliceIdx, stopCtuY - startCtuY + 1 ); + + while( sliceIdx < pps.getNumSlicesInPic()-1 ) { - maxTileHeight = m_tileRowHeight[row]>maxTileHeight ? m_tileRowHeight[row]:maxTileHeight; - accRowHeight += m_tileRowHeight[row]; + uint32_t nextTileIdx; + startCtuX = m_rectSlicePos[2*(sliceIdx+1)] % pps.getPicWidthInCtu(); + startCtuY = m_rectSlicePos[2*(sliceIdx+1)] / pps.getPicWidthInCtu(); + stopCtuX = m_rectSlicePos[2*(sliceIdx+1) + 1] % pps.getPicWidthInCtu(); + stopCtuY = m_rectSlicePos[2*(sliceIdx+1) + 1] / pps.getPicWidthInCtu(); + startTileX = pps.ctuToTileCol( startCtuX ); + startTileY = pps.ctuToTileRow( startCtuY ); + stopTileX = pps.ctuToTileCol( stopCtuX ); + stopTileY = pps.ctuToTileRow( stopCtuY ); + nextTileIdx = startTileY * pps.getNumTileColumns() + startTileX; + sliceWidth = stopTileX - startTileX + 1; + sliceHeight = stopTileY - startTileY + 1; + if(nextTileIdx != tileIdx || sliceWidth != 1 || sliceHeight != 1) + { + break; + } + numSlicesInTile++; + sliceIdx++; + pps.setSliceWidthInTiles( sliceIdx, 1 ); + pps.setSliceHeightInTiles( sliceIdx, 1 ); + pps.setSliceTileIdx( sliceIdx, tileIdx ); + pps.setSliceHeightInCtu( sliceIdx, stopCtuY - startCtuY + 1 ); } - maxTileHeight = (heightInCU-accRowHeight)>maxTileHeight ? m_uiMaxCUHeight*(heightInCU-accRowHeight):m_uiMaxCUHeight*maxTileHeight; + pps.setNumSlicesInTile( firstSliceIdx, numSlicesInTile ); } } - int maxSizeInSamplesY = maxTileWidth*maxTileHeight; - m_minSpatialSegmentationIdc = 4*PicSizeInSamplesY/maxSizeInSamplesY-4; - } - else if(m_entropyCodingSyncEnabledFlag) - { - m_minSpatialSegmentationIdc = 4*PicSizeInSamplesY/((2*m_iSourceHeight+m_iSourceWidth)*m_uiMaxCUHeight)-4; - } - else if(m_sliceMode == FIXED_NUMBER_OF_CTU) - { - m_minSpatialSegmentationIdc = 4*PicSizeInSamplesY/(m_sliceArgument*m_uiMaxCUWidth*m_uiMaxCUHeight)-4; + pps.setTileIdxDeltaPresentFlag( needTileIdxDelta ); + m_tileIdxDeltaPresentFlag = needTileIdxDelta; + + // check rectangular slice mapping and full picture CTU coverage + pps.initRectSliceMap(); + + // store rectangular slice parameters from temporary PPS structure + m_numSlicesInPic = pps.getNumSlicesInPic(); + m_rectSlices.resize( pps.getNumSlicesInPic() ); + for( sliceIdx = 0; sliceIdx < pps.getNumSlicesInPic(); sliceIdx++ ) + { + m_rectSlices[sliceIdx].setSliceWidthInTiles( pps.getSliceWidthInTiles(sliceIdx) ); + m_rectSlices[sliceIdx].setSliceHeightInTiles( pps.getSliceHeightInTiles(sliceIdx) ); + m_rectSlices[sliceIdx].setNumSlicesInTile( pps.getNumSlicesInTile(sliceIdx) ); + m_rectSlices[sliceIdx].setSliceHeightInCtu( pps.getSliceHeightInCtu(sliceIdx) ); + m_rectSlices[sliceIdx].setTileIdx( pps.getSliceTileIdx(sliceIdx) ); + } } + // raster-scan slices else { - m_minSpatialSegmentationIdc = 0; - } -#else - if(m_sliceMode == FIXED_NUMBER_OF_CTU) - { - m_minSpatialSegmentationIdc = 4*PicSizeInSamplesY/(m_sliceArgument*m_uiMaxCUWidth*m_uiMaxCUHeight)-4; + uint32_t listIdx = 0; + uint32_t remTiles = pps.getNumTiles(); + + // set default slice size if not provided + if( m_rasterSliceSize.size() == 0 ) + { + m_rasterSliceSize.push_back( remTiles ); + } + + // set raster slice sizes + while( remTiles > 0 ) + { + // truncate if size exceeds number of remaining tiles + if( listIdx < m_rasterSliceSize.size() ) + { + m_rasterSliceSize[listIdx] = std::min( remTiles, m_rasterSliceSize[listIdx] ); + remTiles -= m_rasterSliceSize[listIdx]; + } + // replicate last size uniformly as needed to cover the remainder of the picture + else + { + m_rasterSliceSize.push_back( std::min( remTiles, m_rasterSliceSize.back() ) ); + remTiles -= m_rasterSliceSize.back(); + } + listIdx++; + } + // shrink list if too many sizes were provided + m_rasterSliceSize.resize( listIdx ); + + m_numSlicesInPic = (uint32_t)m_rasterSliceSize.size(); + xConfirmPara(m_rasterSliceSize.size() > getMaxSlicesByLevel( m_level ), "Number of raster-scan slices exceeds maximum number allowed according to specified level"); } -#endif + } + else + { + m_numTileCols = 1; + m_numTileRows = 1; + m_numSlicesInPic = 1; } - if ((m_MCTSEncConstraint) && (m_bLFCrossTileBoundaryFlag)) + if ((m_MCTSEncConstraint) && (!m_disableLFCrossTileBoundaryFlag)) { printf("Warning: Constrained Encoding for Motion Constrained Tile Sets (MCTS) is enabled. Disabling filtering across tile boundaries!\n"); - m_bLFCrossTileBoundaryFlag = false; + m_disableLFCrossTileBoundaryFlag = true; } if ((m_MCTSEncConstraint) && (m_TMVPModeId)) { @@ -2795,6 +3239,58 @@ bool EncAppCfg::xCheckParameter() m_BIO = false; } + // If m_PPSorSliceFlag is equal to 1, for each PPS parameter below, + // 0: value is signaled in slice header + // >0: value is derived from PPS parameter as value - 1 + switch (m_PPSorSliceMode) + { + case 0: // All parameter values are signaled in slice header + m_constantSliceHeaderParamsEnabledFlag = 0; + m_PPSDepQuantEnabledIdc = 0; + m_PPSRefPicListSPSIdc0 = 0; + m_PPSRefPicListSPSIdc1 = 0; + m_PPSMvdL1ZeroIdc = 0; + m_PPSCollocatedFromL0Idc = 0; + m_PPSSixMinusMaxNumMergeCandPlus1 = 0; + m_PPSMaxNumMergeCandMinusMaxNumTriangleCandPlus1 = 0; + break; + case 1: // RA setting + m_constantSliceHeaderParamsEnabledFlag = 1; + m_PPSDepQuantEnabledIdc = (m_depQuantEnabledFlag ? 1 : 0) + 1; + m_PPSRefPicListSPSIdc0 = 0; + m_PPSRefPicListSPSIdc1 = 0; + m_PPSMvdL1ZeroIdc = 0; + m_PPSCollocatedFromL0Idc = 0; + m_PPSSixMinusMaxNumMergeCandPlus1 = 6 - m_maxNumMergeCand + 1; + m_PPSMaxNumMergeCandMinusMaxNumTriangleCandPlus1 = m_maxNumMergeCand - m_maxNumTriangleCand + 1; + break; + case 2: // LDB setting + m_constantSliceHeaderParamsEnabledFlag = 1; + m_PPSDepQuantEnabledIdc = (m_depQuantEnabledFlag ? 1 : 0) + 1; + m_PPSRefPicListSPSIdc0 = 2; + m_PPSRefPicListSPSIdc1 = 2; + m_PPSMvdL1ZeroIdc = 2; + m_PPSCollocatedFromL0Idc = 1; + m_PPSSixMinusMaxNumMergeCandPlus1 = 6 - m_maxNumMergeCand + 1; + m_PPSMaxNumMergeCandMinusMaxNumTriangleCandPlus1 = m_maxNumMergeCand - m_maxNumTriangleCand + 1; + break; + case 3: // LDP setting + m_constantSliceHeaderParamsEnabledFlag = 1; + m_PPSDepQuantEnabledIdc = (m_depQuantEnabledFlag ? 1 : 0) + 1; + m_PPSRefPicListSPSIdc0 = 2; + m_PPSRefPicListSPSIdc1 = 2; + m_PPSMvdL1ZeroIdc = 0; + m_PPSCollocatedFromL0Idc = 0; + m_PPSSixMinusMaxNumMergeCandPlus1 = 6 - m_maxNumMergeCand + 1; + m_PPSMaxNumMergeCandMinusMaxNumTriangleCandPlus1 = 0; + break; + default: + THROW("Invalid value for PPSorSliceMode"); + } + xConfirmPara(m_drapPeriod > 0 && m_PPSRefPicListSPSIdc0 > 0, "PPSRefPicListSPSIdc0 shall be 0 when DRAP is used. This can be fixed by setting PPSorSliceMode=0."); + xConfirmPara(m_drapPeriod > 0 && m_PPSRefPicListSPSIdc1 > 0, "PPSRefPicListSPSIdc1 shall be 0 when DRAP is used. This can be fixed by setting PPSorSliceMode=0."); + +#if HEVC_SEI if (m_toneMappingInfoSEIEnabled) { xConfirmPara( m_toneMapCodedDataBitDepth < 8 || m_toneMapCodedDataBitDepth > 14 , "SEIToneMapCodedDataBitDepth must be in rage 8 to 14"); @@ -2827,6 +3323,8 @@ bool EncAppCfg::xCheckParameter() xConfirmPara( (m_chromaFormatIDC == CHROMA_400 ), "chromaResamplingFilterSEI is not allowed to be present when ChromaFormatIDC is equal to zero (4:0:0)" ); xConfirmPara(m_vuiParametersPresentFlag && m_chromaLocInfoPresentFlag && (m_chromaSampleLocTypeTopField != m_chromaSampleLocTypeBottomField ), "When chromaResamplingFilterSEI is enabled, ChromaSampleLocTypeTopField has to be equal to ChromaSampleLocTypeBottomField" ); } +#endif + xConfirmPara( m_sariAspectRatioIdc < 0 || m_sariAspectRatioIdc > 255, "SEISARISampleAspectRatioIdc must be in the range of 0 to 255"); if ( m_RCEnableRateControl ) { @@ -2853,42 +3351,59 @@ bool EncAppCfg::xCheckParameter() { xConfirmPara( m_RCCpbSaturationEnabled != 0, "Target bits saturation cannot be processed without Rate control" ); } - if (m_vuiParametersPresentFlag) - { - xConfirmPara(m_RCTargetBitrate == 0, "A target bit rate is required to be set for VUI/HRD parameters."); - if (m_RCCpbSize == 0) - { - msg( WARNING, "Warning: CPB size is set equal to zero. Adjusting value to be equal to TargetBitrate!\n"); - m_RCCpbSize = m_RCTargetBitrate; - } - } #endif - xConfirmPara(!m_TransquantBypassEnabledFlag && m_CUTransquantBypassFlagForce, "CUTransquantBypassFlagForce cannot be 1 when TransquantBypassEnableFlag is 0"); - - xConfirmPara(m_log2ParallelMergeLevel < 2, "Log2ParallelMergeLevel should be larger than or equal to 2"); - if (m_framePackingSEIEnabled) { xConfirmPara(m_framePackingSEIType < 3 || m_framePackingSEIType > 5 , "SEIFramePackingType must be in rage 3 to 5"); } - if (m_segmentedRectFramePackingSEIEnabled) + if( m_erpSEIEnabled && !m_erpSEICancelFlag ) { - xConfirmPara(m_framePackingSEIEnabled , "SEISegmentedRectFramePacking must be 0 when SEIFramePacking is 1"); + xConfirmPara( m_erpSEIGuardBandType < 0 || m_erpSEIGuardBandType > 8, "SEIEquirectangularprojectionGuardBandType must be in the range of 0 to 7"); + xConfirmPara( (m_chromaFormatIDC == CHROMA_420 || m_chromaFormatIDC == CHROMA_422) && (m_erpSEILeftGuardBandWidth%2 == 1), "SEIEquirectangularprojectionLeftGuardBandWidth must be an even number for 4:2:0 or 4:2:2 chroma format"); + xConfirmPara( (m_chromaFormatIDC == CHROMA_420 || m_chromaFormatIDC == CHROMA_422) && (m_erpSEIRightGuardBandWidth%2 == 1), "SEIEquirectangularprojectionRightGuardBandWidth must be an even number for 4:2:0 or 4:2:2 chroma format"); } -#if HEVC_TILES_WPP - if((m_numTileColumnsMinus1 <= 0) && (m_numTileRowsMinus1 <= 0) && m_tmctsSEIEnabled) + if( m_sphereRotationSEIEnabled && !m_sphereRotationSEICancelFlag ) { - msg( WARNING, "Warning: SEITempMotionConstrainedTileSets is set to false to disable temporal motion-constrained tile sets SEI message because there are no tiles enabled.\n"); - m_tmctsSEIEnabled = false; + xConfirmPara( m_sphereRotationSEIYaw < -(180<<16) || m_sphereRotationSEIYaw > (180<<16)-1, "SEISphereRotationYaw must be in the range of -11 796 480 to 11 796 479"); + xConfirmPara( m_sphereRotationSEIPitch < -(90<<16) || m_sphereRotationSEIYaw > (90<<16), "SEISphereRotationPitch must be in the range of -5 898 240 to 5 898 240"); + xConfirmPara( m_sphereRotationSEIRoll < -(180<<16) || m_sphereRotationSEIYaw > (180<<16)-1, "SEISphereRotationRoll must be in the range of -11 796 480 to 11 796 479"); } -#endif - if(m_timeCodeSEIEnabled) + if ( m_omniViewportSEIEnabled && !m_omniViewportSEICancelFlag ) { - xConfirmPara(m_timeCodeSEINumTs > MAX_TIMECODE_SEI_SETS, "Number of time sets cannot exceed 3"); + xConfirmPara( m_omniViewportSEIId < 0 || m_omniViewportSEIId > 1023, "SEIomniViewportId must be in the range of 0 to 1023"); + xConfirmPara( m_omniViewportSEICntMinus1 < 0 || m_omniViewportSEICntMinus1 > 15, "SEIomniViewportCntMinus1 must be in the range of 0 to 15"); + for ( uint32_t i=0; i<=m_omniViewportSEICntMinus1; i++ ) + { + xConfirmPara( m_omniViewportSEIAzimuthCentre[i] < -(180<<16) || m_omniViewportSEIAzimuthCentre[i] > (180<<16)-1, "SEIOmniViewportAzimuthCentre must be in the range of -11 796 480 to 11 796 479"); + xConfirmPara( m_omniViewportSEIElevationCentre[i] < -(90<<16) || m_omniViewportSEIElevationCentre[i] > (90<<16), "SEIOmniViewportSEIElevationCentre must be in the range of -5 898 240 to 5 898 240"); + xConfirmPara( m_omniViewportSEITiltCentre[i] < -(180<<16) || m_omniViewportSEITiltCentre[i] > (180<<16)-1, "SEIOmniViewportTiltCentre must be in the range of -11 796 480 to 11 796 479"); + xConfirmPara( m_omniViewportSEIHorRange[i] < 1 || m_omniViewportSEIHorRange[i] > (360<<16), "SEIOmniViewportHorRange must be in the range of 1 to 360*2^16"); + xConfirmPara( m_omniViewportSEIVerRange[i] < 1 || m_omniViewportSEIVerRange[i] > (180<<16), "SEIOmniViewportVerRange must be in the range of 1 to 180*2^16"); + } + } + + if (m_gcmpSEIEnabled && !m_gcmpSEICancelFlag) + { + xConfirmPara( m_gcmpSEIMappingFunctionType < 0 || m_gcmpSEIMappingFunctionType > 2, "SEIGcmpMappingFunctionType must be in the range of 0 to 2"); + int numFace = m_gcmpSEIPackingType == 4 || m_gcmpSEIPackingType == 5 ? 5 : 6; + for ( int i = 0; i < numFace; i++ ) + { + xConfirmPara( m_gcmpSEIFaceIndex[i] < 0 || m_gcmpSEIFaceIndex[i] > 5, "SEIGcmpFaceIndex must be in the range of 0 to 5"); + xConfirmPara( m_gcmpSEIFaceRotation[i] < 0 || m_gcmpSEIFaceRotation[i] > 3, "SEIGcmpFaceRotation must be in the range of 0 to 3"); + if (m_gcmpSEIMappingFunctionType == 2) + { + xConfirmPara( m_gcmpSEIFunctionCoeffU[i] <= 0.0 || m_gcmpSEIFunctionCoeffU[i] > 1.0, "SEIGcmpFunctionCoeffU must be in the range (0, 1]"); + xConfirmPara( m_gcmpSEIFunctionCoeffV[i] <= 0.0 || m_gcmpSEIFunctionCoeffV[i] > 1.0, "SEIGcmpFunctionCoeffV must be in the range (0, 1]"); + } + } + if (m_gcmpSEIGuardBandFlag) + { + xConfirmPara( m_gcmpSEIGuardBandSamplesMinus1 < 0 || m_gcmpSEIGuardBandSamplesMinus1 > 15, "SEIGcmpGuardBandSamplesMinus1 must be in the range of 0 to 15"); + } } #if U0033_ALTERNATIVE_TRANSFER_CHARACTERISTICS_SEI @@ -2898,10 +3413,16 @@ bool EncAppCfg::xCheckParameter() xConfirmPara( m_decodeBitstreams[0] == m_bitstreamFileName, "Debug bitstream and the output bitstream cannot be equal.\n" ); xConfirmPara( m_decodeBitstreams[1] == m_bitstreamFileName, "Decode2 bitstream and the output bitstream cannot be equal.\n" ); xConfirmPara(unsigned(m_LMChroma) > 1, "LMMode exceeds range (0 to 1)"); + if (m_gopBasedTemporalFilterEnabled) + { + xConfirmPara(m_temporalSubsampleRatio != 1, "GOP Based Temporal Filter only support Temporal sub-sample ratio 1"); + } #if EXTENSION_360_VIDEO check_failed |= m_ext360.verifyParameters(); #endif + xConfirmPara(m_useBDPCM < 0 || m_useBDPCM > 2, "BDPCM must be in range 0..2"); + #undef xConfirmPara return check_failed; } @@ -2948,43 +3469,39 @@ void EncAppCfg::xPrintParameter() msg( DETAILS, "Frame/Field : Frame based coding\n" ); msg( DETAILS, "Frame index : %u - %d (%d frames)\n", m_FrameSkip, m_FrameSkip + m_framesToBeEncoded - 1, m_framesToBeEncoded ); } - if (m_profile == Profile::MAINREXT) { - ExtendedProfileName validProfileName; - if (m_onePictureOnlyConstraintFlag) - { - validProfileName = m_bitDepthConstraint == 8 ? MAIN_444_STILL_PICTURE : (m_bitDepthConstraint == 16 ? MAIN_444_16_STILL_PICTURE : NONE); - } - else - { - const uint32_t intraIdx = m_intraConstraintFlag ? 1:0; - const uint32_t bitDepthIdx = (m_bitDepthConstraint == 8 ? 0 : (m_bitDepthConstraint ==10 ? 1 : (m_bitDepthConstraint == 12 ? 2 : (m_bitDepthConstraint == 16 ? 3 : 4 )))); - const uint32_t chromaFormatIdx = uint32_t(m_chromaFormatConstraint); - validProfileName = (bitDepthIdx > 3 || chromaFormatIdx>3) ? NONE : validRExtProfileNames[intraIdx][bitDepthIdx][chromaFormatIdx]; - } - std::string rextSubProfile; - if (validProfileName!=NONE) - { - rextSubProfile=enumToString(strToExtendedProfile, sizeof(strToExtendedProfile)/sizeof(*strToExtendedProfile), validProfileName); - } - if (rextSubProfile == "main_444_16") + msg( DETAILS, "Profile : %s\n", profileToString(m_profile) ); + } + msg( DETAILS, "CU size / depth / total-depth : %d / %d / %d\n", m_uiMaxCUWidth, m_uiMaxCUDepth, m_uiMaxCodingDepth ); + msg(DETAILS, "subpicture present flag : %d\n", m_subPicPresentFlag); + if (m_subPicPresentFlag) + { + msg(DETAILS, "number of subpictures : %d\n", m_numSubPics); + for (int i = 0; i < m_numSubPics; i++) { - rextSubProfile="main_444_16 [NON STANDARD]"; + msg(DETAILS, "[%d]th subpictures location :[%d %d]\n", i, m_subPicCtuTopLeftX[i], m_subPicCtuTopLeftY[i]); + msg(DETAILS, "[%d]th subpictures size :[%d %d]\n", i, m_subPicWidth[i], m_subPicHeight[i]); + msg(DETAILS, "[%d]th subpictures treated as picture flag :%d\n", i, m_subPicTreatedAsPicFlag[i]); + msg(DETAILS, "loop filter cross [%d]th subpictures enabled flag :%d\n", i, m_loopFilterAcrossSubpicEnabledFlag[i]); + } - msg( DETAILS, "Profile : %s (%s)\n", profileToString(m_profile), (rextSubProfile.empty())?"INVALID REXT PROFILE":rextSubProfile.c_str() ); } - else + msg(DETAILS, "subpicture ID present flag : %d\n", m_subPicIdPresentFlag); + if (m_subPicIdPresentFlag) { - msg( DETAILS, "Profile : %s\n", profileToString(m_profile) ); + msg(DETAILS, "subpicture ID signalling present flag : %d\n", m_subPicIdSignallingPresentFlag); + for (int i = 0; i < m_numSubPics; i++) + { + msg(DETAILS, "[%d]th subpictures ID length :%d\n", i, m_subPicIdLen); + msg(DETAILS, "[%d]th subpictures ID :%d\n", i, m_subPicId[i]); + + } } - msg( DETAILS, "CU size / depth / total-depth : %d / %d / %d\n", m_uiMaxCUWidth, m_uiMaxCUDepth, m_uiMaxCodingDepth ); -#if MAX_TB_SIZE_SIGNALLING msg( DETAILS, "Max TB size : %d \n", 1 << m_log2MaxTbSize ); -#endif - msg( DETAILS, "Min PCM size : %d\n", 1 << m_uiPCMLog2MinSize); msg( DETAILS, "Motion search range : %d\n", m_iSearchRange ); msg( DETAILS, "Intra period : %d\n", m_iIntraPeriod ); msg( DETAILS, "Decoding refresh type : %d\n", m_iDecodingRefreshType ); + msg( DETAILS, "DRAP period : %d\n", m_drapPeriod ); #if QP_SWITCHING_FOR_PARALLEL if (m_qpIncrementAtSourceFrame.bPresent) { @@ -3006,8 +3523,6 @@ void EncAppCfg::xPrintParameter() msg( DETAILS, "Input bit depth : (Y:%d, C:%d)\n", m_inputBitDepth[CHANNEL_TYPE_LUMA], m_inputBitDepth[CHANNEL_TYPE_CHROMA] ); msg( DETAILS, "MSB-extended bit depth : (Y:%d, C:%d)\n", m_MSBExtendedBitDepth[CHANNEL_TYPE_LUMA], m_MSBExtendedBitDepth[CHANNEL_TYPE_CHROMA] ); msg( DETAILS, "Internal bit depth : (Y:%d, C:%d)\n", m_internalBitDepth[CHANNEL_TYPE_LUMA], m_internalBitDepth[CHANNEL_TYPE_CHROMA] ); - msg( DETAILS, "PCM sample bit depth : (Y:%d, C:%d)\n", m_bPCMInputBitDepthFlag ? m_MSBExtendedBitDepth[CHANNEL_TYPE_LUMA] : m_internalBitDepth[CHANNEL_TYPE_LUMA], - m_bPCMInputBitDepthFlag ? m_MSBExtendedBitDepth[CHANNEL_TYPE_CHROMA] : m_internalBitDepth[CHANNEL_TYPE_CHROMA] ); msg( DETAILS, "Intra reference smoothing : %s\n", (m_enableIntraReferenceSmoothing ? "Enabled" : "Disabled") ); msg( DETAILS, "cu_chroma_qp_offset_subdiv : %d\n", m_cuChromaQpOffsetSubdiv); msg( DETAILS, "extended_precision_processing_flag : %s\n", (m_extendedPrecisionProcessingFlag ? "Enabled" : "Disabled") ); @@ -3035,7 +3550,7 @@ void EncAppCfg::xPrintParameter() } msg( DETAILS, "RateControl : %d\n", m_RCEnableRateControl ); - msg( DETAILS, "WPMethod : %d\n", int(m_weightedPredictionMethod)); + msg( DETAILS, "WeightedPredMethod : %d\n", int(m_weightedPredictionMethod)); if(m_RCEnableRateControl) { @@ -3057,6 +3572,8 @@ void EncAppCfg::xPrintParameter() msg( DETAILS, "Max Num Merge Candidates : %d\n", m_maxNumMergeCand ); msg( DETAILS, "Max Num Affine Merge Candidates : %d\n", m_maxNumAffineMergeCand ); + msg( DETAILS, "Max Num Triangle Merge Candidates : %d\n", m_maxNumTriangleCand ); + msg( DETAILS, "Max Num IBC Merge Candidates : %d\n", m_maxNumIBCMergeCand ); msg( DETAILS, "\n"); msg( VERBOSE, "TOOL CFG: "); @@ -3080,85 +3597,67 @@ void EncAppCfg::xPrintParameter() msg( VERBOSE, "TransformSkip:%d ", m_useTransformSkip ); msg( VERBOSE, "TransformSkipFast:%d ", m_useTransformSkipFast ); msg( VERBOSE, "TransformSkipLog2MaxSize:%d ", m_log2MaxTransformSkipBlockSize); - msg( VERBOSE, "Slice: M=%d ", int(m_sliceMode)); - if (m_sliceMode!=NO_SLICES) - { - msg( VERBOSE, "A=%d ", m_sliceArgument); - } -#if HEVC_DEPENDENT_SLICES - msg( VERBOSE, "SliceSegment: M=%d ",m_sliceSegmentMode); - if (m_sliceSegmentMode!=NO_SLICES) - { - msg( VERBOSE, "A=%d ", m_sliceSegmentArgument); - } -#endif - msg( VERBOSE, "Tiles:%dx%d ", m_numTileColumnsMinus1 + 1, m_numTileRowsMinus1 + 1 ); + msg(VERBOSE, "ChromaTS:%d ", m_useChromaTS); + msg( VERBOSE, "BDPCM:%d ", m_useBDPCM ); + msg( VERBOSE, "Tiles: %dx%d ", m_numTileCols, m_numTileRows ); + msg( VERBOSE, "Slices: %d ", m_numSlicesInPic); msg( VERBOSE, "MCTS:%d ", m_MCTSEncConstraint ); - msg( VERBOSE, "CIP:%d ", m_bUseConstrainedIntraPred); msg( VERBOSE, "SAO:%d ", (m_bUseSAO)?(1):(0)); msg( VERBOSE, "ALF:%d ", m_alf ? 1 : 0 ); - msg( VERBOSE, "PCM:%d ", (m_usePCM && (1<<m_uiPCMLog2MinSize) <= m_uiMaxCUWidth)? 1 : 0); - - if (m_TransquantBypassEnabledFlag && m_CUTransquantBypassFlagForce) - { - msg( VERBOSE, "TransQuantBypassEnabled: =1"); - } - else - { - msg( VERBOSE, "TransQuantBypassEnabled:%d ", (m_TransquantBypassEnabledFlag)? 1:0 ); - } msg( VERBOSE, "WPP:%d ", (int)m_useWeightedPred); msg( VERBOSE, "WPB:%d ", (int)m_useWeightedBiPred); - msg( VERBOSE, "PME:%d ", m_log2ParallelMergeLevel); -#if HEVC_TILES_WPP const int iWaveFrontSubstreams = m_entropyCodingSyncEnabledFlag ? (m_iSourceHeight + m_uiMaxCUHeight - 1) / m_uiMaxCUHeight : 1; msg( VERBOSE, " WaveFrontSynchro:%d WaveFrontSubstreams:%d", m_entropyCodingSyncEnabledFlag?1:0, iWaveFrontSubstreams); -#endif -#if HEVC_USE_SCALING_LISTS msg( VERBOSE, " ScalingList:%d ", m_useScalingListId ); -#endif - msg( VERBOSE, "TMVPMode:%d ", m_TMVPModeId ); - + msg( VERBOSE, "TMVPMode:%d ", m_TMVPModeId ); msg( VERBOSE, " DQ:%d ", m_depQuantEnabledFlag); -#if HEVC_USE_SIGN_HIDING msg( VERBOSE, " SignBitHidingFlag:%d ", m_signDataHidingEnabledFlag); -#endif msg( VERBOSE, "RecalQP:%d ", m_recalculateQPAccordingToLambda ? 1 : 0 ); - if( m_profile == Profile::NEXT ) { - msg( VERBOSE, "\nNEXT TOOL CFG: " ); + msg( VERBOSE, "\nTOOL CFG: " ); + msg( VERBOSE, "LFNST:%d ", m_LFNST ); + msg( VERBOSE, "MMVD:%d ", m_MMVD); msg( VERBOSE, "Affine:%d ", m_Affine ); if ( m_Affine ) { msg( VERBOSE, "AffineType:%d ", m_AffineType ); } + msg(VERBOSE, "PROF:%d ", m_PROF); msg(VERBOSE, "SubPuMvp:%d+%d ", m_SubPuMvpMode & 1, (m_SubPuMvpMode & 2) == 2); msg( VERBOSE, "DualITree:%d ", m_dualTree ); msg( VERBOSE, "IMV:%d ", m_ImvMode ); msg( VERBOSE, "BIO:%d ", m_BIO ); msg( VERBOSE, "LMChroma:%d ", m_LMChroma ); - if( m_LMChroma && m_chromaFormatIDC == CHROMA_420 ) - { - msg( VERBOSE, "CclmCollocatedChroma:%d ", m_cclmCollocatedChromaFlag ); - } + msg( VERBOSE, "HorCollocatedChroma:%d ", m_horCollocatedChromaFlag ); + msg( VERBOSE, "VerCollocatedChroma:%d ", m_verCollocatedChromaFlag ); msg( VERBOSE, "MTS: %1d(intra) %1d(inter) ", m_MTS & 1, ( m_MTS >> 1 ) & 1 ); msg( VERBOSE, "SBT:%d ", m_SBT ); + msg( VERBOSE, "ISP:%d ", m_ISP ); + msg( VERBOSE, "SMVD:%d ", m_SMVD ); msg( VERBOSE, "CompositeLTReference:%d ", m_compositeRefEnabled); - msg( VERBOSE, "GBi:%d ", m_GBi ); - msg( VERBOSE, "GBiFast:%d ", m_GBiFast ); + msg( VERBOSE, "Bcw:%d ", m_bcw ); + msg( VERBOSE, "BcwFast:%d ", m_BcwFast ); #if LUMA_ADAPTIVE_DEBLOCKING_FILTER_QP_OFFSET msg( VERBOSE, "LADF:%d ", m_LadfEnabed ); #endif - msg(VERBOSE, "MHIntra:%d ", m_MHIntra); + msg(VERBOSE, "CIIP:%d ", m_ciip); msg( VERBOSE, "Triangle:%d ", m_Triangle ); - msg( VERBOSE, "AllowDisFracMMVD:%d ", m_allowDisFracMMVD ); + m_allowDisFracMMVD = m_MMVD ? m_allowDisFracMMVD : false; + if ( m_MMVD ) + msg(VERBOSE, "AllowDisFracMMVD:%d ", m_allowDisFracMMVD); msg( VERBOSE, "AffineAmvr:%d ", m_AffineAmvr ); m_AffineAmvrEncOpt = m_AffineAmvr ? m_AffineAmvrEncOpt : false; msg( VERBOSE, "AffineAmvrEncOpt:%d ", m_AffineAmvrEncOpt ); msg(VERBOSE, "DMVR:%d ", m_DMVR); + msg(VERBOSE, "MmvdDisNum:%d ", m_MmvdDisNum); + msg(VERBOSE, "JointCbCr:%d ", m_JointCbCrMode); } + m_useColorTrans = (m_chromaFormatIDC == CHROMA_444 && m_costMode != COST_LOSSLESS_CODING) ? m_useColorTrans : 0u; + msg(VERBOSE, "ACT:%d ", m_useColorTrans); + m_PLTMode = ( m_chromaFormatIDC == CHROMA_444) ? m_PLTMode : 0u; + msg(VERBOSE, "PLT:%d ", m_PLTMode); msg(VERBOSE, "IBC:%d ", m_IBCMode); msg( VERBOSE, "HashME:%d ", m_HashME ); msg( VERBOSE, "WrapAround:%d ", m_wrapAround); @@ -3167,12 +3666,32 @@ void EncAppCfg::xPrintParameter() msg( VERBOSE, "WrapAroundOffset:%d ", m_wrapAroundOffset ); } // ADD_NEW_TOOL (add some output indicating the usage of tools) - msg(VERBOSE, "Reshape:%d ", m_lumaReshapeEnable); - if (m_lumaReshapeEnable) + msg(VERBOSE, "LoopFilterAcrossVirtualBoundaries:%d ", m_loopFilterAcrossVirtualBoundariesDisabledFlag); + if ( m_loopFilterAcrossVirtualBoundariesDisabledFlag ) + { + msg(VERBOSE, "vertical virtual boundaries:["); + for (unsigned i = 0; i < m_numVerVirtualBoundaries; i++) { - msg(VERBOSE, "(Sigal:%s ", m_reshapeSignalType==0? "SDR" : "HDR-PQ"); + msg(VERBOSE, " %d", m_virtualBoundariesPosX[i]); + } + msg(VERBOSE, " ] horizontal virtual boundaries:["); + for (unsigned i = 0; i < m_numHorVirtualBoundaries; i++) + { + msg(VERBOSE, " %d", m_virtualBoundariesPosY[i]); + } + msg(VERBOSE, " ] "); + } + msg(VERBOSE, "Reshape:%d ", m_lmcsEnabled); + if (m_lmcsEnabled) + { + msg(VERBOSE, "(Signal:%s ", m_reshapeSignalType == 0 ? "SDR" : (m_reshapeSignalType == 2 ? "HDR-HLG" : "HDR-PQ")); + msg(VERBOSE, "Opt:%d", m_adpOption); + if (m_adpOption > 0) { msg(VERBOSE, " CW:%d", m_initialCW); } + msg(VERBOSE, " CSoffset:%d", m_CSoffset); msg(VERBOSE, ") "); } + msg(VERBOSE, "MRL:%d ", m_MRL); + msg(VERBOSE, "MIP:%d ", m_MIP); msg(VERBOSE, "EncDbOpt:%d ", m_encDbOpt); msg( VERBOSE, "\nFAST TOOL CFG: " ); msg( VERBOSE, "LCTUFast:%d ", m_useFastLCTU ); @@ -3180,10 +3699,16 @@ void EncAppCfg::xPrintParameter() msg( VERBOSE, "PBIntraFast:%d ", m_usePbIntraFast ); if( m_ImvMode ) msg( VERBOSE, "IMV4PelFast:%d ", m_Imv4PelFast ); if( m_MTS ) msg( VERBOSE, "MTSMaxCand: %1d(intra) %1d(inter) ", m_MTSIntraMaxCand, m_MTSInterMaxCand ); - msg( VERBOSE, "ISPFast:%d ", m_useFastISP ); + if( m_ISP ) msg( VERBOSE, "ISPFast:%d ", m_useFastISP ); + if( m_LFNST ) msg( VERBOSE, "FastLFNST:%d ", m_useFastLFNST ); msg( VERBOSE, "AMaxBT:%d ", m_useAMaxBT ); msg( VERBOSE, "E0023FastEnc:%d ", m_e0023FastEnc ); msg( VERBOSE, "ContentBasedFastQtbt:%d ", m_contentBasedFastQtbt ); + msg( VERBOSE, "UseNonLinearAlfLuma:%d ", m_useNonLinearAlfLuma ); + msg( VERBOSE, "UseNonLinearAlfChroma:%d ", m_useNonLinearAlfChroma ); + msg( VERBOSE, "MaxNumAlfAlternativesChroma:%d ", m_maxNumAlfAlternativesChroma ); + if( m_MIP ) msg(VERBOSE, "FastMIP:%d ", m_useFastMIP); + msg( VERBOSE, "FastLocalDualTree:%d ", m_fastLocalDualTreeMode ); msg( VERBOSE, "NumSplitThreads:%d ", m_numSplitThreads ); if( m_numSplitThreads > 1 ) @@ -3193,6 +3718,15 @@ void EncAppCfg::xPrintParameter() msg( VERBOSE, "NumWppThreads:%d+%d ", m_numWppThreads, m_numWppExtraLines ); msg( VERBOSE, "EnsureWppBitEqual:%d ", m_ensureWppBitEqual ); + if( m_rprEnabled ) + { + msg( VERBOSE, "RPR:(%1.2lfx, %1.2lfx)|%d ", m_scalingRatioHor, m_scalingRatioVer, m_switchPocPeriod ); + } + else + { + msg( VERBOSE, "RPR:%d ", 0 ); + } + msg(VERBOSE, "TemporalFilter:%d ", m_gopBasedTemporalFilterEnabled); #if EXTENSION_360_VIDEO m_ext360.outputConfigurationSummary(); #endif @@ -3204,6 +3738,41 @@ void EncAppCfg::xPrintParameter() fflush( stdout ); } +bool EncAppCfg::xHasNonZeroTemporalID () +{ + for (unsigned int i = 0; i < m_iGOPSize; i++) + { + if ( m_GOPList[i].m_temporalId != 0 ) + { + return true; + } + } + return false; +} + +bool EncAppCfg::xHasLeadingPicture () +{ + for (unsigned int i = 0; i < m_iGOPSize; i++) + { + for ( unsigned int j = 0; j < m_GOPList[i].m_numRefPics0; j++) + { + if ( m_GOPList[i].m_deltaRefPics0[j] < 0 ) + { + return true; + } + } + for ( unsigned int j = 0; j < m_GOPList[i].m_numRefPics1; j++) + { + if ( m_GOPList[i].m_deltaRefPics1[j] < 0 ) + { + return true; + } + } + } + return false; +} + + bool confirmPara(bool bflag, const char* message) { if (!bflag) @@ -3215,4 +3784,6 @@ bool confirmPara(bool bflag, const char* message) return true; } + + //! \} diff --git a/source/App/EncoderApp/EncAppCfg.h b/source/App/EncoderApp/EncAppCfg.h index 1debceb530f63da624cf1cbce902bb79306ef40b..4ce37e402f737e4f6cecb69d29b34f4c1b5b426d 100644 --- a/source/App/EncoderApp/EncAppCfg.h +++ b/source/App/EncoderApp/EncAppCfg.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -40,10 +40,22 @@ #include "CommonLib/CommonDef.h" +#include <map> +template <class T1, class T2> +static inline std::istream& operator >> (std::istream &in, std::map<T1, T2> &map); + +#include "Utilities/program_options_lite.h" + #include "EncoderLib/EncCfg.h" #if EXTENSION_360_VIDEO #include "AppEncHelper360/TExt360AppEncCfg.h" #endif + +#if JVET_O0756_CALCULATE_HDRMETRICS +#include "HDRLib/inc/DistortionMetric.H" +#endif +namespace po = df::program_options_lite; + #include <sstream> #include <vector> //! \ingroup EncoderApp @@ -122,35 +134,50 @@ protected: uint32_t m_maxChromaFormatConstraintIdc; bool m_bFrameConstraintFlag; bool m_bNoQtbttDualTreeIntraConstraintFlag; + bool m_noPartitionConstraintsOverrideConstraintFlag; bool m_bNoSaoConstraintFlag; bool m_bNoAlfConstraintFlag; - bool m_bNoPcmConstraintFlag; bool m_bNoRefWraparoundConstraintFlag; bool m_bNoTemporalMvpConstraintFlag; bool m_bNoSbtmvpConstraintFlag; bool m_bNoAmvrConstraintFlag; bool m_bNoBdofConstraintFlag; + bool m_noDmvrConstraintFlag; bool m_bNoCclmConstraintFlag; bool m_bNoMtsConstraintFlag; + bool m_noSbtConstraintFlag; bool m_bNoAffineMotionConstraintFlag; - bool m_bNoGbiConstraintFlag; - bool m_bNoMhIntraConstraintFlag; + bool m_bNoBcwConstraintFlag; + bool m_noIbcConstraintFlag; + bool m_bNoCiipConstraintFlag; + bool m_noFPelMmvdConstraintFlag; bool m_bNoTriangleConstraintFlag; bool m_bNoLadfConstraintFlag; - bool m_bNoCurrPicRefConstraintFlag; + bool m_noTransformSkipConstraintFlag; + bool m_noBDPCMConstraintFlag; + bool m_noJointCbCrConstraintFlag; bool m_bNoQpDeltaConstraintFlag; bool m_bNoDepQuantConstraintFlag; bool m_bNoSignDataHidingConstraintFlag; + bool m_noTrailConstraintFlag; + bool m_noStsaConstraintFlag; + bool m_noRaslConstraintFlag; + bool m_noRadlConstraintFlag; + bool m_noIdrConstraintFlag; + bool m_noCraConstraintFlag; + bool m_noGdrConstraintFlag; + bool m_noApsConstraintFlag; // profile/level Profile::Name m_profile; Level::Tier m_levelTier; Level::Name m_level; + std::vector<uint32_t> m_subProfile; + uint8_t m_numSubProfile; + uint32_t m_bitDepthConstraint; ChromaFormat m_chromaFormatConstraint; bool m_intraConstraintFlag; - bool m_onePictureOnlyConstraintFlag; - bool m_lowerBitRateConstraintFlag; bool m_progressiveSourceFlag; bool m_interlacedSourceFlag; bool m_nonPackedConstraintFlag; @@ -160,10 +187,11 @@ protected: int m_iIntraPeriod; ///< period of I-slice (random access period) int m_iDecodingRefreshType; ///< random access type int m_iGOPSize; ///< GOP size of hierarchical structure -#if JCTVC_Y0038_PARAMS + int m_drapPeriod; ///< period of dependent RAP pictures bool m_rewriteParamSets; ///< Flag to enable rewriting of parameter sets at random access points -#endif - int m_extraRPSs; ///< extra RPSs added to handle CRA + RPLEntry m_RPLList0[MAX_GOP]; ///< the RPL entries from the config file + RPLEntry m_RPLList1[MAX_GOP]; ///< the RPL entries from the config file + bool m_idrRefParamList; ///< indicates if reference picture list syntax elements are present in slice headers of IDR pictures GOPEntry m_GOPList[MAX_GOP]; ///< the coding structure entries from the config file int m_numReorderPics[MAX_TLAYER]; ///< total number of reorder pictures int m_maxDecPicBuffering[MAX_TLAYER]; ///< total number of pictures in the decoded picture buffer @@ -172,12 +200,14 @@ protected: uint32_t m_log2SaoOffsetScale[MAX_NUM_CHANNEL_TYPE]; ///< number of bits for the upward bit shift operation on the decoded SAO offsets bool m_useTransformSkip; ///< flag for enabling intra transform skipping bool m_useTransformSkipFast; ///< flag for enabling fast intra transform skipping + int m_useBDPCM; uint32_t m_log2MaxTransformSkipBlockSize; ///< transform-skip maximum size (minimum of 2) bool m_transformSkipRotationEnabledFlag; ///< control flag for transform-skip/transquant-bypass residual rotation bool m_transformSkipContextEnabledFlag; ///< control flag for transform-skip/transquant-bypass single significance map context bool m_rdpcmEnabledFlag[NUMBER_OF_RDPCM_SIGNALLING_MODES];///< control flags for residual DPCM bool m_persistentRiceAdaptationEnabledFlag; ///< control flag for Golomb-Rice parameter adaptation over each slice bool m_cabacBypassAlignmentEnabledFlag; + bool m_ISP; bool m_useFastISP; ///< flag for enabling fast methods for ISP // coding quality @@ -187,6 +217,8 @@ protected: double m_fQP; ///< QP value of key-picture (floating point) #endif int m_iQP; ///< QP value of key-picture (integer) + bool m_useIdentityTableForNon420Chroma; + ChromaQpMappingTableParams m_chromaQpMappingTableParams; #if X0038_LAMBDA_FROM_QP_CAPABILITY int m_intraQPOffset; ///< QP offset for intra slice (integer) bool m_lambdaFromQPEnable; ///< enable flag for QP:lambda fix @@ -203,6 +235,8 @@ protected: int m_crQpOffset; ///< Chroma Cr QP Offset (0:default) int m_cbQpOffsetDualTree; ///< Chroma Cb QP Offset for dual tree (overwrite m_cbQpOffset for dual tree) int m_crQpOffsetDualTree; ///< Chroma Cr QP Offset for dual tree (overwrite m_crQpOffset for dual tree) + int m_cbCrQpOffset; ///< QP Offset for joint Cb-Cr mode + int m_cbCrQpOffsetDualTree; ///< QP Offset for joint Cb-Cr mode (overwrite m_cbCrQpOffset for dual tree) #if ER_CHROMA_QP_WCG_PPS WCGChromaQPControl m_wcgChromaQpControl; ///< Wide-colour-gamut chroma QP control. #endif @@ -225,27 +259,44 @@ protected: // coding unit (CU) definition unsigned m_uiCTUSize; + bool m_subPicPresentFlag; + unsigned m_numSubPics; + std::vector<uint32_t> m_subPicCtuTopLeftX; + std::vector<uint32_t> m_subPicCtuTopLeftY; + std::vector<uint32_t> m_subPicWidth; + std::vector<uint32_t> m_subPicHeight; + std::vector<uint32_t> m_subPicTreatedAsPicFlag; + std::vector<uint32_t> m_loopFilterAcrossSubpicEnabledFlag; + bool m_subPicIdPresentFlag; + bool m_subPicIdSignallingPresentFlag; + unsigned m_subPicIdLen; + std::vector<uint32_t> m_subPicId; bool m_SplitConsOverrideEnabledFlag; unsigned m_uiMinQT[3]; // 0: I slice luma; 1: P/B slice; 2: I slice chroma - unsigned m_uiMaxBTDepth; - unsigned m_uiMaxBTDepthI; - unsigned m_uiMaxBTDepthIChroma; + unsigned m_uiMaxMTTHierarchyDepth; + unsigned m_uiMaxMTTHierarchyDepthI; + unsigned m_uiMaxMTTHierarchyDepthIChroma; bool m_dualTree; + bool m_LFNST; + bool m_useFastLFNST; int m_SubPuMvpMode; bool m_Affine; bool m_AffineType; + bool m_PROF; bool m_BIO; int m_LMChroma; - bool m_cclmCollocatedChromaFlag; + bool m_horCollocatedChromaFlag; + bool m_verCollocatedChromaFlag; int m_MTS; ///< XZ: Multiple Transform Set int m_MTSIntraMaxCand; ///< XZ: Number of additional candidates to test int m_MTSInterMaxCand; ///< XZ: Number of additional candidates to test int m_MTSImplicit; bool m_SBT; ///< Sub-Block Transform for inter blocks - + int m_SBTFast64WidthTh; + bool m_SMVD; bool m_compositeRefEnabled; - bool m_GBi; - bool m_GBiFast; + bool m_bcw; + bool m_BcwFast; #if LUMA_ADAPTIVE_DEBLOCKING_FILTER_QP_OFFSET bool m_LadfEnabed; int m_LadfNumIntervals; @@ -253,14 +304,20 @@ protected: int m_LadfIntervalLowerBound[MAX_LADF_INTERVALS]; #endif - bool m_MHIntra; + bool m_ciip; bool m_Triangle; bool m_HashME; bool m_allowDisFracMMVD; bool m_AffineAmvr; bool m_AffineAmvrEncOpt; bool m_DMVR; - + bool m_MMVD; + int m_MmvdDisNum; + bool m_rgbFormat; + bool m_useColorTrans; + unsigned m_PLTMode; + bool m_JointCbCrMode; + bool m_useChromaTS; unsigned m_IBCMode; unsigned m_IBCLocalSearchRangeX; unsigned m_IBCLocalSearchRangeY; @@ -273,10 +330,19 @@ protected: unsigned m_wrapAroundOffset; // ADD_NEW_TOOL : (encoder app) add tool enabling flags and associated parameters here - bool m_lumaReshapeEnable; + bool m_loopFilterAcrossVirtualBoundariesDisabledFlag; + unsigned m_numVerVirtualBoundaries; + unsigned m_numHorVirtualBoundaries; + std::vector<unsigned> m_virtualBoundariesPosX; + std::vector<unsigned> m_virtualBoundariesPosY; + bool m_lmcsEnabled; uint32_t m_reshapeSignalType; uint32_t m_intraCMD; ReshapeCW m_reshapeCW; + int m_updateCtrl; + int m_adpOption; + uint32_t m_initialCW; + int m_CSoffset; bool m_encDbOpt; unsigned m_uiMaxCUWidth; ///< max. CU width in pixel unsigned m_uiMaxCUHeight; ///< max. CU height in pixel @@ -290,6 +356,13 @@ protected: bool m_useFastMrg; bool m_e0023FastEnc; bool m_contentBasedFastQtbt; + bool m_useNonLinearAlfLuma; + bool m_useNonLinearAlfChroma; + unsigned m_maxNumAlfAlternativesChroma; + bool m_MRL; + bool m_MIP; + bool m_useFastMIP; + int m_fastLocalDualTreeMode; int m_numSplitThreads; @@ -298,9 +371,7 @@ protected: int m_numWppExtraLines; bool m_ensureWppBitEqual; -#if MAX_TB_SIZE_SIGNALLING int m_log2MaxTbSize; -#endif // coding tools (bit-depth) int m_inputBitDepth [MAX_NUM_CHANNEL_TYPE]; ///< bit-depth of input file int m_outputBitDepth [MAX_NUM_CHANNEL_TYPE]; ///< bit-depth of output file @@ -312,8 +383,6 @@ protected: //coding tools (chroma format) ChromaFormat m_chromaFormatIDC; - // coding tools (PCM bit-depth) - bool m_bPCMInputBitDepthFlag; ///< 0: PCM bit-depth is internal bit-depth. 1: PCM bit-depth is input bit-depth. // coding tool (SAO) bool m_bUseSAO; @@ -322,9 +391,7 @@ protected: double m_saoEncodingRateChroma; ///< The SAO early picture termination rate to use for chroma (when m_SaoEncodingRate is >0). If <=0, use results for luma. int m_maxNumOffsetsPerPic; ///< SAO maximun number of offset per picture bool m_saoCtuBoundary; ///< SAO parameter estimation using non-deblocked pixels for CTU bottom and right boundary areas -#if K0238_SAO_GREEDY_MERGE_ENCODING bool m_saoGreedyMergeEnc; ///< SAO greedy merge encoding algorithm -#endif // coding tools (loop filter) bool m_bLoopFilterDisable; ///< flag for using deblocking filter bool m_loopFilterOffsetInPPS; ///< offset for deblocking filter in 0 = slice header, 1 = PPS @@ -335,11 +402,6 @@ protected: #else bool m_DeblockingFilterMetric; ///< blockiness metric in encoder #endif - // coding tools (PCM) - bool m_usePCM; ///< flag for using IPCM - uint32_t m_pcmLog2MaxSize; ///< log2 of maximum PCM block size - uint32_t m_uiPCMLog2MinSize; ///< log2 of minimum PCM block size - bool m_bPCMFilterDisableFlag; ///< PCM filter disable flag bool m_enableIntraReferenceSmoothing; ///< flag for enabling(default)/disabling intra reference smoothing/filtering // coding tools (encoder-only parameters) @@ -364,99 +426,153 @@ protected: bool m_useFastDecisionForMerge; ///< flag for using Fast Decision Merge RD-Cost bool m_bUseCbfFastMode; ///< flag for using Cbf Fast PU Mode Decision bool m_useEarlySkipDetection; ///< flag for using Early SKIP Detection - SliceConstraint m_sliceMode; - int m_sliceArgument; ///< argument according to selected slice mode -#if HEVC_DEPENDENT_SLICES - SliceConstraint m_sliceSegmentMode; - int m_sliceSegmentArgument; ///< argument according to selected slice segment mode -#endif - - bool m_bLFCrossSliceBoundaryFlag; ///< 1: filter across slice boundaries 0: do not filter across slice boundaries -#if HEVC_TILES_WPP - bool m_bLFCrossTileBoundaryFlag; ///< 1: filter across tile boundaries 0: do not filter across tile boundaries - bool m_tileUniformSpacingFlag; - int m_numTileColumnsMinus1; - int m_numTileRowsMinus1; - std::vector<int> m_tileColumnWidth; - std::vector<int> m_tileRowHeight; + bool m_picPartitionFlag; ///< enable picture partitioning (0: single tile, single slice, 1: multiple tiles/slices can be used) + std::vector<uint32_t> m_tileColumnWidth; ///< tile column widths in units of CTUs (last column width will be repeated uniformly to cover any remaining picture width) + std::vector<uint32_t> m_tileRowHeight; ///< tile row heights in units of CTUs (last row height will be repeated uniformly to cover any remaining picture height) + bool m_rasterSliceFlag; ///< indicates if using raster-scan or rectangular slices (0: rectangular, 1: raster-scan) + std::vector<uint32_t> m_rectSlicePos; ///< rectangular slice positions (pairs of top-left CTU address followed by bottom-right CTU address) + int m_rectSliceFixedWidth; ///< fixed rectangular slice width in units of tiles (0: disable this feature and use RectSlicePositions instead) + int m_rectSliceFixedHeight; ///< fixed rectangular slice height in units of tiles (0: disable this feature and use RectSlicePositions instead) + std::vector<uint32_t> m_rasterSliceSize; ///< raster-scan slice sizes in units of tiles (last size will be repeated uniformly to cover any remaining tiles in the picture) + bool m_disableLFCrossTileBoundaryFlag; ///< 0: filter across tile boundaries 1: do not filter across tile boundaries + bool m_disableLFCrossSliceBoundaryFlag; ///< 0: filter across slice boundaries 1: do not filter across slice boundaries + uint32_t m_numSlicesInPic; ///< derived number of rectangular slices in the picture (raster-scan slice specified at slice level) + bool m_tileIdxDeltaPresentFlag; ///< derived tile index delta present flag + std::vector<RectSlice> m_rectSlices; ///< derived list of rectangular slice signalling parameters + uint32_t m_numTileCols; ///< derived number of tile columns + uint32_t m_numTileRows; ///< derived number of tile rows + bool m_subPicPartitionFlag; + bool m_singleSlicePerSubPicFlag; bool m_entropyCodingSyncEnabledFlag; -#endif - bool m_bUseConstrainedIntraPred; ///< flag for using constrained intra prediction + bool m_bFastUDIUseMPMEnabled; bool m_bFastMEForGenBLowDelayEnabled; bool m_bUseBLambdaForNonKeyLowDelayPictures; HashType m_decodedPictureHashSEIType; ///< Checksum mode for decoded picture hash SEI message +#if HEVC_SEI bool m_recoveryPointSEIEnabled; +#endif bool m_bufferingPeriodSEIEnabled; bool m_pictureTimingSEIEnabled; - bool m_toneMappingInfoSEIEnabled; - bool m_chromaResamplingFilterSEIenabled; - int m_chromaResamplingHorFilterIdc; - int m_chromaResamplingVerFilterIdc; - int m_toneMapId; - bool m_toneMapCancelFlag; - bool m_toneMapPersistenceFlag; - int m_toneMapCodedDataBitDepth; - int m_toneMapTargetBitDepth; - int m_toneMapModelId; - int m_toneMapMinValue; - int m_toneMapMaxValue; - int m_sigmoidMidpoint; - int m_sigmoidWidth; - int m_numPivots; - int m_cameraIsoSpeedIdc; - int m_cameraIsoSpeedValue; - int m_exposureIndexIdc; - int m_exposureIndexValue; - bool m_exposureCompensationValueSignFlag; - int m_exposureCompensationValueNumerator; - int m_exposureCompensationValueDenomIdc; - int m_refScreenLuminanceWhite; - int m_extendedRangeWhiteLevel; - int m_nominalBlackLevelLumaCodeValue; - int m_nominalWhiteLevelLumaCodeValue; - int m_extendedWhiteLevelLumaCodeValue; - int* m_startOfCodedInterval; - int* m_codedPivotValue; - int* m_targetPivotValue; + bool m_bpDeltasGOPStructure; + bool m_decodingUnitInfoSEIEnabled; + bool m_frameFieldInfoSEIEnabled; bool m_framePackingSEIEnabled; int m_framePackingSEIType; int m_framePackingSEIId; int m_framePackingSEIQuincunx; int m_framePackingSEIInterpretation; - bool m_segmentedRectFramePackingSEIEnabled; - bool m_segmentedRectFramePackingSEICancel; - int m_segmentedRectFramePackingSEIType; - bool m_segmentedRectFramePackingSEIPersistence; - int m_displayOrientationSEIAngle; - bool m_temporalLevel0IndexSEIEnabled; - bool m_gradualDecodingRefreshInfoEnabled; - int m_noDisplaySEITLayer; - bool m_decodingUnitInfoSEIEnabled; - bool m_SOPDescriptionSEIEnabled; - bool m_scalableNestingSEIEnabled; - bool m_tmctsSEIEnabled; - bool m_timeCodeSEIEnabled; - int m_timeCodeSEINumTs; - SEITimeSet m_timeSetArray[MAX_TIMECODE_SEI_SETS]; - bool m_kneeSEIEnabled; - int m_kneeSEIId; - bool m_kneeSEICancelFlag; - bool m_kneeSEIPersistenceFlag; - int m_kneeSEIInputDrange; - int m_kneeSEIInputDispLuminance; - int m_kneeSEIOutputDrange; - int m_kneeSEIOutputDispLuminance; - int m_kneeSEINumKneePointsMinus1; - int* m_kneeSEIInputKneePoint; - int* m_kneeSEIOutputKneePoint; #if U0033_ALTERNATIVE_TRANSFER_CHARACTERISTICS_SEI int m_preferredTransferCharacteristics; #endif - uint32_t m_greenMetadataType; - uint32_t m_xsdMetricType; + // film grain characterstics sei + bool m_fgcSEIEnabled; + bool m_fgcSEICancelFlag; + bool m_fgcSEIPersistenceFlag; + uint32_t m_fgcSEIModelID; + bool m_fgcSEISepColourDescPresentFlag; + uint32_t m_fgcSEIBlendingModeID; + uint32_t m_fgcSEILog2ScaleFactor; + bool m_fgcSEICompModelPresent[MAX_NUM_COMPONENT]; + // content light level SEI + bool m_cllSEIEnabled; + uint32_t m_cllSEIMaxContentLevel; + uint32_t m_cllSEIMaxPicAvgLevel; + // ambient viewing environment sei + bool m_aveSEIEnabled; + uint32_t m_aveSEIAmbientIlluminance; + uint32_t m_aveSEIAmbientLightX; + uint32_t m_aveSEIAmbientLightY; + // content colour volume sei + bool m_ccvSEIEnabled; + bool m_ccvSEICancelFlag; + bool m_ccvSEIPersistenceFlag; + bool m_ccvSEIPrimariesPresentFlag; + bool m_ccvSEIMinLuminanceValuePresentFlag; + bool m_ccvSEIMaxLuminanceValuePresentFlag; + bool m_ccvSEIAvgLuminanceValuePresentFlag; + double m_ccvSEIPrimariesX[MAX_NUM_COMPONENT]; + double m_ccvSEIPrimariesY[MAX_NUM_COMPONENT]; + double m_ccvSEIMinLuminanceValue; + double m_ccvSEIMaxLuminanceValue; + double m_ccvSEIAvgLuminanceValue; + + bool m_erpSEIEnabled; + bool m_erpSEICancelFlag; + bool m_erpSEIPersistenceFlag; + bool m_erpSEIGuardBandFlag; + uint32_t m_erpSEIGuardBandType; + uint32_t m_erpSEILeftGuardBandWidth; + uint32_t m_erpSEIRightGuardBandWidth; + + bool m_sphereRotationSEIEnabled; + bool m_sphereRotationSEICancelFlag; + bool m_sphereRotationSEIPersistenceFlag; + int m_sphereRotationSEIYaw; + int m_sphereRotationSEIPitch; + int m_sphereRotationSEIRoll; + + bool m_omniViewportSEIEnabled; + uint32_t m_omniViewportSEIId; + bool m_omniViewportSEICancelFlag; + bool m_omniViewportSEIPersistenceFlag; + uint32_t m_omniViewportSEICntMinus1; + std::vector<int> m_omniViewportSEIAzimuthCentre; + std::vector<int> m_omniViewportSEIElevationCentre; + std::vector<int> m_omniViewportSEITiltCentre; + std::vector<uint32_t> m_omniViewportSEIHorRange; + std::vector<uint32_t> m_omniViewportSEIVerRange; + bool m_rwpSEIEnabled; + bool m_rwpSEIRwpCancelFlag; + bool m_rwpSEIRwpPersistenceFlag; + bool m_rwpSEIConstituentPictureMatchingFlag; + int m_rwpSEINumPackedRegions; + int m_rwpSEIProjPictureWidth; + int m_rwpSEIProjPictureHeight; + int m_rwpSEIPackedPictureWidth; + int m_rwpSEIPackedPictureHeight; + std::vector<uint8_t> m_rwpSEIRwpTransformType; + std::vector<bool> m_rwpSEIRwpGuardBandFlag; + std::vector<uint32_t> m_rwpSEIProjRegionWidth; + std::vector<uint32_t> m_rwpSEIProjRegionHeight; + std::vector<uint32_t> m_rwpSEIRwpSEIProjRegionTop; + std::vector<uint32_t> m_rwpSEIProjRegionLeft; + std::vector<uint16_t> m_rwpSEIPackedRegionWidth; + std::vector<uint16_t> m_rwpSEIPackedRegionHeight; + std::vector<uint16_t> m_rwpSEIPackedRegionTop; + std::vector<uint16_t> m_rwpSEIPackedRegionLeft; + std::vector<uint8_t> m_rwpSEIRwpLeftGuardBandWidth; + std::vector<uint8_t> m_rwpSEIRwpRightGuardBandWidth; + std::vector<uint8_t> m_rwpSEIRwpTopGuardBandHeight; + std::vector<uint8_t> m_rwpSEIRwpBottomGuardBandHeight; + std::vector<bool> m_rwpSEIRwpGuardBandNotUsedForPredFlag; + std::vector<uint8_t> m_rwpSEIRwpGuardBandType; + + bool m_gcmpSEIEnabled; + bool m_gcmpSEICancelFlag; + bool m_gcmpSEIPersistenceFlag; + uint32_t m_gcmpSEIPackingType; + uint32_t m_gcmpSEIMappingFunctionType; + std::vector<uint8_t> m_gcmpSEIFaceIndex; + std::vector<uint8_t> m_gcmpSEIFaceRotation; + std::vector<double> m_gcmpSEIFunctionCoeffU; + std::vector<bool> m_gcmpSEIFunctionUAffectedByVFlag; + std::vector<double> m_gcmpSEIFunctionCoeffV; + std::vector<bool> m_gcmpSEIFunctionVAffectedByUFlag; + bool m_gcmpSEIGuardBandFlag; + bool m_gcmpSEIGuardBandBoundaryType; + uint32_t m_gcmpSEIGuardBandSamplesMinus1; + + bool m_subpicureLevelInfoSEIEnabled; + + bool m_sampleAspectRatioInfoSEIEnabled; + bool m_sariCancelFlag; + bool m_sariPersistenceFlag; + int m_sariAspectRatioIdc; + int m_sariSarWidth; + int m_sariSarHeight; bool m_MCTSEncConstraint; @@ -465,15 +581,27 @@ protected: bool m_useWeightedBiPred; ///< Use of bi-directional weighted prediction in B slices WeightedPredictionMethod m_weightedPredictionMethod; - uint32_t m_log2ParallelMergeLevel; ///< Parallel merge estimation region uint32_t m_maxNumMergeCand; ///< Max number of merge candidates uint32_t m_maxNumAffineMergeCand; ///< Max number of affine merge candidates + uint32_t m_maxNumTriangleCand; + uint32_t m_maxNumIBCMergeCand; ///< Max number of IBC merge candidates + bool m_sliceLevelRpl; ///< code reference picture lists in slice headers rather than picture header + bool m_sliceLevelDblk; ///< code deblocking filter parameters in slice headers rather than picture header + bool m_sliceLevelSao; ///< code SAO parameters in slice headers rather than picture header + bool m_sliceLevelAlf; ///< code ALF parameters in slice headers rather than picture header int m_TMVPModeId; + int m_PPSorSliceMode; + bool m_constantSliceHeaderParamsEnabledFlag; + int m_PPSDepQuantEnabledIdc; + int m_PPSRefPicListSPSIdc0; + int m_PPSRefPicListSPSIdc1; + int m_PPSMvdL1ZeroIdc; + int m_PPSCollocatedFromL0Idc; + uint32_t m_PPSSixMinusMaxNumMergeCandPlus1; + uint32_t m_PPSMaxNumMergeCandMinusMaxNumTriangleCandPlus1; bool m_depQuantEnabledFlag; -#if HEVC_USE_SIGN_HIDING bool m_signDataHidingEnabledFlag; -#endif bool m_RCEnableRateControl; ///< enable rate control or not int m_RCTargetBitrate; ///< target bitrate when rate control is enabled int m_RCKeepHierarchicalBit; ///< 0: equal bit allocation; 1: fixed ratio bit allocation; 2: adaptive ratio bit allocation @@ -486,30 +614,23 @@ protected: uint32_t m_RCCpbSize; ///< CPB size double m_RCInitialCpbFullness; ///< initial CPB fullness #endif -#if HEVC_USE_SCALING_LISTS ScalingListMode m_useScalingListId; ///< using quantization matrix std::string m_scalingListFileName; ///< quantization matrix file name -#endif - bool m_TransquantBypassEnabledFlag; ///< transquant_bypass_enabled_flag setting in PPS. - bool m_CUTransquantBypassFlagForce; ///< if transquant_bypass_enabled_flag, then, if true, all CU transquant bypass flags will be set to true. + bool m_disableScalingMatrixForLfnstBlks; CostMode m_costMode; ///< Cost mode to use bool m_recalculateQPAccordingToLambda; ///< recalculate QP value according to the lambda value -#if HEVC_USE_INTRA_SMOOTHING_T32 || HEVC_USE_INTRA_SMOOTHING_T64 - bool m_useStrongIntraSmoothing; ///< enable strong intra smoothing for 32x32 blocks where the reference samples are flat -#endif +#if HEVC_SEI int m_activeParameterSetsSEIEnabled; +#endif + bool m_decodingParameterSetEnabled; ///< enable decoding parameter set + bool m_hrdParametersPresentFlag; ///< enable generation of HRD parameters bool m_vuiParametersPresentFlag; ///< enable generation of VUI parameters bool m_aspectRatioInfoPresentFlag; ///< Signals whether aspect_ratio_idc is present int m_aspectRatioIdc; ///< aspect_ratio_idc int m_sarWidth; ///< horizontal size of the sample aspect ratio int m_sarHeight; ///< vertical size of the sample aspect ratio - bool m_overscanInfoPresentFlag; ///< Signals whether overscan_appropriate_flag is present - bool m_overscanAppropriateFlag; ///< Indicates whether conformant decoded pictures are suitable for display using overscan - bool m_videoSignalTypePresentFlag; ///< Signals whether video_format, video_full_range_flag, and colour_description_present_flag are present - int m_videoFormat; ///< Indicates representation of pictures - bool m_videoFullRangeFlag; ///< Indicates the black level and range of luma and chroma signals bool m_colourDescriptionPresentFlag; ///< Signals whether colour_primaries, transfer_characteristics and matrix_coefficients are present int m_colourPrimaries; ///< Indicates chromaticity coordinates of the source primaries int m_transferCharacteristics; ///< Indicates the opto-electronic transfer characteristics of the source @@ -517,28 +638,15 @@ protected: bool m_chromaLocInfoPresentFlag; ///< Signals whether chroma_sample_loc_type_top_field and chroma_sample_loc_type_bottom_field are present int m_chromaSampleLocTypeTopField; ///< Specifies the location of chroma samples for top field int m_chromaSampleLocTypeBottomField; ///< Specifies the location of chroma samples for bottom field - bool m_neutralChromaIndicationFlag; ///< Indicates that the value of all decoded chroma samples is equal to 1<<(BitDepthCr-1) - bool m_defaultDisplayWindowFlag; ///< Indicates the presence of the default window parameters - int m_defDispWinLeftOffset; ///< Specifies the left offset from the conformance window of the default window - int m_defDispWinRightOffset; ///< Specifies the right offset from the conformance window of the default window - int m_defDispWinTopOffset; ///< Specifies the top offset from the conformance window of the default window - int m_defDispWinBottomOffset; ///< Specifies the bottom offset from the conformance window of the default window - bool m_frameFieldInfoPresentFlag; ///< Indicates that pic_struct values are present in picture timing SEI messages - bool m_pocProportionalToTimingFlag; ///< Indicates that the POC value is proportional to the output time w.r.t. first picture in CVS - int m_numTicksPocDiffOneMinus1; ///< Number of ticks minus 1 that for a POC difference of one - bool m_bitstreamRestrictionFlag; ///< Signals whether bitstream restriction parameters are present -#if HEVC_TILES_WPP - bool m_tilesFixedStructureFlag; ///< Indicates that each active picture parameter set has the same values of the syntax elements related to tiles -#endif - bool m_motionVectorsOverPicBoundariesFlag; ///< Indicates that no samples outside the picture boundaries are used for inter prediction - int m_minSpatialSegmentationIdc; ///< Indicates the maximum size of the spatial segments in the pictures in the coded video sequence - int m_maxBytesPerPicDenom; ///< Indicates a number of bytes not exceeded by the sum of the sizes of the VCL NAL units associated with any coded picture - int m_maxBitsPerMinCuDenom; ///< Indicates an upper bound for the number of bits of coding_unit() data - int m_log2MaxMvLengthHorizontal; ///< Indicate the maximum absolute value of a decoded horizontal MV component in quarter-pel luma units - int m_log2MaxMvLengthVertical; ///< Indicate the maximum absolute value of a decoded vertical MV component in quarter-pel luma units + int m_chromaSampleLocType; ///< Specifies the location of chroma samples for progressive content + bool m_overscanInfoPresentFlag; ///< Signals whether overscan_appropriate_flag is present + bool m_overscanAppropriateFlag; ///< Indicates whether conformant decoded pictures are suitable for display using overscan + bool m_videoFullRangeFlag; ///< Indicates the black level and range of luma and chroma signals int m_ImvMode; ///< imv mode int m_Imv4PelFast; ///< imv 4-Pel fast mode +#if HEVC_SEI std::string m_colourRemapSEIFileRoot; +#endif std::string m_summaryOutFilename; ///< filename to use for producing summary output file. std::string m_summaryPicFilenameBase; ///< Base filename to use for producing summary picture output files. The actual filenames used will have I.txt, P.txt and B.txt appended. @@ -555,7 +663,32 @@ protected: bool m_bs2ModPOCAndType; bool m_forceDecodeBitstream1; - bool m_alf; ///> Adaptive Loop Filter + bool m_alf; ///< Adaptive Loop Filter + + double m_scalingRatioHor; + double m_scalingRatioVer; + bool m_rprEnabled; + double m_fractionOfFrames; ///< encode a fraction of the frames as specified in FramesToBeEncoded + int m_switchPocPeriod; + int m_upscaledOutput; ////< Output upscaled (2), decoded cropped but in full resolution buffer (1) or decoded cropped (0, default) picture for RPR. + + bool m_gopBasedTemporalFilterEnabled; ///< GOP-based Temporal Filter enable/disable + bool m_gopBasedTemporalFilterFutureReference; ///< Enable/disable future frame references in the GOP-based Temporal Filter + std::map<int, double> m_gopBasedTemporalFilterStrengths; ///< Filter strength per frame for the GOP-based Temporal Filter + + int m_maxLayers; + + int m_layerId[MAX_VPS_LAYERS]; + int m_layerIdx; + int m_maxSublayers; + bool m_allLayersSameNumSublayersFlag; + bool m_allIndependentLayersFlag; + int m_numRefLayers[MAX_VPS_LAYERS]; + std::string m_refLayerIdxStr[MAX_VPS_LAYERS]; + bool m_eachLayerIsAnOlsFlag; + int m_olsModeIdc; + int m_numOutputLayerSets; + std::string m_olsOutputLayerStr[MAX_VPS_LAYERS]; #if EXTENSION_360_VIDEO TExt360AppEncCfg m_ext360; @@ -563,11 +696,32 @@ protected: friend class TExt360AppEncTop; #endif +#if JVET_O0756_CONFIG_HDRMETRICS || JVET_O0756_CALCULATE_HDRMETRICS +#if JVET_O0756_CALCULATE_HDRMETRICS + double m_whitePointDeltaE[hdrtoolslib::NB_REF_WHITE]; +#else + double m_whitePointDeltaE[3]; +#endif + double m_maxSampleValue; + int m_sampleRange; + int m_colorPrimaries; + bool m_enableTFunctionLUT; + int m_chromaLocation; + int m_chromaUPFilter; + int m_cropOffsetLeft; + int m_cropOffsetTop; + int m_cropOffsetRight; + int m_cropOffsetBottom; + bool m_calculateHdrMetrics; +#endif // internal member functions bool xCheckParameter (); ///< check validity of configuration values void xPrintParameter (); ///< print configuration values void xPrintUsage (); ///< print usage + bool xHasNonZeroTemporalID(); ///< check presence of constant temporal ID in GOP structure + bool xHasLeadingPicture(); ///< check presence of leading pictures in GOP structure + int xAutoDetermineProfile(); ///< auto determine the profile to use given the other configuration settings. Returns 1 if erred. Can select profile 'NONE' public: EncAppCfg(); virtual ~EncAppCfg(); diff --git a/source/App/EncoderApp/encmain.cpp b/source/App/EncoderApp/encmain.cpp index 444c6aff32bce814cc0e227a38a7d60eeff4acc3..1b752b0c1483df3d535d5ebadd53b5e206c09c4c 100644 --- a/source/App/EncoderApp/encmain.cpp +++ b/source/App/EncoderApp/encmain.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -40,6 +40,7 @@ #include <chrono> #include <ctime> +#include "EncoderLib/EncLibCommon.h" #include "EncApp.h" #include "Utilities/program_options_lite.h" @@ -104,10 +105,7 @@ int main(int argc, char* argv[]) #if ENABLE_SPLIT_PARALLELISM fprintf( stdout, "[SPLIT_PARALLEL (%d jobs)]", PARL_SPLIT_MAX_NUM_JOBS ); #endif -#if ENABLE_WPP_PARALLELISM - fprintf( stdout, "[WPP_PARALLEL]" ); -#endif -#if ENABLE_WPP_PARALLELISM || ENABLE_SPLIT_PARALLELISM +#if ENABLE_SPLIT_PARALLELISM const char* waitPolicy = getenv( "OMP_WAIT_POLICY" ); const char* maxThLim = getenv( "OMP_THREAD_LIMIT" ); fprintf( stdout, waitPolicy ? "[OMP: WAIT_POLICY=%s," : "[OMP: WAIT_POLICY=,", waitPolicy ); @@ -116,24 +114,72 @@ int main(int argc, char* argv[]) #endif fprintf( stdout, "\n" ); - EncApp* pcEncApp = new EncApp; - // create application encoder class - pcEncApp->create(); + std::fstream bitstream; + EncLibCommon encLibCommon; + + std::vector<EncApp*> pcEncApp(1); + bool resized = false; + int layerIdx = 0; - // parse configuration - try + initROM(); + TComHash::initBlockSizeToIndex(); + + char** layerArgv = new char*[argc]; + + do { - if(!pcEncApp->parseCfg( argc, argv )) + pcEncApp[layerIdx] = new EncApp( bitstream, &encLibCommon ); + // create application encoder class per layer + pcEncApp[layerIdx]->create(); + + // parse configuration per layer + try { - pcEncApp->destroy(); + int j = 0; + for( int i = 0; i < argc; i++ ) + { + if( argv[i][0] == '-' && argv[i][1] == 'l' ) + { + if( argv[i][2] == std::to_string( layerIdx ).c_str()[0] ) + { + layerArgv[j] = argv[i + 1]; + layerArgv[j + 1] = argv[i + 2]; + j += 2; + } + i += 2; + } + else + { + layerArgv[j] = argv[i]; + j++; + } + } + + if( !pcEncApp[layerIdx]->parseCfg( j, layerArgv ) ) + { + pcEncApp[layerIdx]->destroy(); + return 1; + } + } + catch( df::program_options_lite::ParseFailure &e ) + { + std::cerr << "Error parsing option \"" << e.arg << "\" with argument \"" << e.val << "\"." << std::endl; return 1; } - } - catch (df::program_options_lite::ParseFailure &e) - { - std::cerr << "Error parsing option \""<< e.arg <<"\" with argument \""<< e.val <<"\"." << std::endl; - return 1; - } + + int layerId = layerIdx; //VS: layerIdx should be converted to layerId after VPS is implemented + pcEncApp[layerIdx]->createLib( layerId ); + + if( !resized ) + { + pcEncApp.resize( pcEncApp[layerIdx]->getMaxLayers() ); + resized = true; + } + + layerIdx++; + } while( layerIdx < pcEncApp.size() ); + + delete[] layerArgv; #if PRINT_MACRO_VALUES printMacroSettings(); @@ -145,40 +191,111 @@ int main(int argc, char* argv[]) fprintf(stdout, " started @ %s", std::ctime(&startTime2) ); clock_t startClock = clock(); - // call encoding function -#ifndef _DEBUG - try + // call encoding function per layer + bool eos = false; + + while( !eos ) { + // read GOP + bool keepLoop = true; + while( keepLoop ) + { + for( auto & encApp : pcEncApp ) + { +#ifndef _DEBUG + try + { #endif - pcEncApp->encode(); + keepLoop = encApp->encodePrep( eos ); #ifndef _DEBUG - } - catch( Exception &e ) - { - std::cerr << e.what() << std::endl; - return 1; - } - catch( ... ) - { - std::cerr << "Unspecified error occurred" << std::endl; - return 1; - } + } + catch( Exception &e ) + { + std::cerr << e.what() << std::endl; + return EXIT_FAILURE; + } + catch( const std::bad_alloc &e ) + { + std::cout << "Memory allocation failed: " << e.what() << std::endl; + return EXIT_FAILURE; + } +#endif + } + } + + // encode GOP + keepLoop = true; + while( keepLoop ) + { + for( auto & encApp : pcEncApp ) + { +#ifndef _DEBUG + try + { +#endif + keepLoop = encApp->encode(); +#ifndef _DEBUG + } + catch( Exception &e ) + { + std::cerr << e.what() << std::endl; + return EXIT_FAILURE; + } + catch( const std::bad_alloc &e ) + { + std::cout << "Memory allocation failed: " << e.what() << std::endl; + return EXIT_FAILURE; + } #endif + } + } + } // ending time clock_t endClock = clock(); auto endTime = std::chrono::steady_clock::now(); std::time_t endTime2 = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()); - auto encTime = std::chrono::duration_cast<std::chrono::milliseconds>( endTime- startTime ).count(); - // destroy application encoder class - pcEncApp->destroy(); +#if JVET_O0756_CALCULATE_HDRMETRICS + auto metricTime = pcEncApp[0]->getMetricTime(); - delete pcEncApp; + for( int layerIdx = 1; layerIdx < pcEncApp.size(); layerIdx++ ) + { + metricTime += pcEncApp[layerIdx]->getMetricTime(); + } + auto totalTime = std::chrono::duration_cast<std::chrono::milliseconds>( endTime - startTime ).count(); + auto encTime = std::chrono::duration_cast<std::chrono::milliseconds>( endTime - startTime - metricTime ).count(); + auto metricTimeuser = std::chrono::duration_cast<std::chrono::milliseconds>( metricTime ).count(); +#else + auto encTime = std::chrono::duration_cast<std::chrono::milliseconds>( endTime - startTime).count(); +#endif + + for( auto & encApp : pcEncApp ) + { + encApp->destroyLib(); + + // destroy application encoder class per layer + encApp->destroy(); + + delete encApp; + } + + // destroy ROM + destroyROM(); + + pcEncApp.clear(); printf( "\n finished @ %s", std::ctime(&endTime2) ); +#if JVET_O0756_CALCULATE_HDRMETRICS + printf(" Encoding Time (Total Time): %12.3f ( %12.3f ) sec. [user] %12.3f ( %12.3f ) sec. [elapsed]\n", + ((endClock - startClock) * 1.0 / CLOCKS_PER_SEC) - (metricTimeuser/1000.0), + (endClock - startClock) * 1.0 / CLOCKS_PER_SEC, + encTime / 1000.0, + totalTime / 1000.0); +#else printf(" Total Time: %12.3f sec. [user] %12.3f sec. [elapsed]\n", (endClock - startClock) * 1.0 / CLOCKS_PER_SEC, encTime / 1000.0); +#endif return 0; } diff --git a/source/App/Parcat/CMakeLists.txt b/source/App/Parcat/CMakeLists.txt index 55b144cc0671b5c2bd0e8c279bf689aabe2f7abf..12edc317ad223c8274d0234332fe9b2edf109db7 100644 --- a/source/App/Parcat/CMakeLists.txt +++ b/source/App/Parcat/CMakeLists.txt @@ -10,12 +10,12 @@ file( GLOB INC_FILES "*.h" ) # add executable add_executable( ${EXE_NAME} ${SRC_FILES} ${INC_FILES} ) -target_link_libraries( ${EXE_NAME} Threads::Threads ${ADDITIONAL_LIBS} ) +target_link_libraries( ${EXE_NAME} CommonLib DecoderLib Utilities Threads::Threads ${ADDITIONAL_LIBS} ) # include the output directory, where the svnrevision.h file is generated include_directories(${CMAKE_CURRENT_BINARY_DIR}) -include_directories(${CMAKE_SOURCE_DIR}/source/Lib/CommonLib) +include_directories(${CMAKE_SOURCE_DIR}/source/Lib) if( CMAKE_SYSTEM_NAME STREQUAL "Linux" ) add_custom_command( TARGET ${EXE_NAME} POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy diff --git a/source/App/Parcat/parcat.cpp b/source/App/Parcat/parcat.cpp index d85c356d804143423a7cce3315fd047bfb3f5313..03997c6004a81b63899c7fdb60fcbdb3649e7701 100644 --- a/source/App/Parcat/parcat.cpp +++ b/source/App/Parcat/parcat.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -36,10 +36,26 @@ #include <cstdlib> #include <cstdio> #include <cassert> -#include "CommonDef.h" +#include "CommonLib/CommonDef.h" +#include "DecoderLib/NALread.h" +#include "VLCReader.h" +#if ENABLE_TRACING +#include "CommonLib/dtrace_next.h" +#endif + +#define PRINT_NALUS 1 -#define PRINT_NALUS 0 +class ParcatHLSyntaxReader : public VLCReader +{ + public: + void parseSliceHeaderUpToPoc ( ParameterSetManager *parameterSetManager ); +}; +void ParcatHLSyntaxReader::parseSliceHeaderUpToPoc ( ParameterSetManager *parameterSetManager ) +{ + // POC is first syntax element in slice header + return; +} /** Find the beginning and end of a NAL (Network Abstraction Layer) unit in a byte buffer containing H264 bitstream data. @@ -102,56 +118,6 @@ const bool verbose = false; const char * NALU_TYPE[] = { -#if !JVET_M0101_HLS - "TRAIL_N", - "TRAIL_R", - "TSA_N", - "TSA_R", - "STSA_N", - "STSA_R", - "RADL_N", - "RADL_R", - "RASL_N", - "RASL_R", - "RSV_VCL_N10", - "RSV_VCL_N12", - "RSV_VCL_N14", - "RSV_VCL_R11", - "RSV_VCL_R13", - "RSV_VCL_R15", - "BLA_W_LP", - "BLA_W_RADL", - "BLA_N_LP", - "IDR_W_RADL", - "IDR_N_LP", - "CRA_NUT", - "RSV_IRAP_VCL22", - "RSV_IRAP_VCL23", - "unk", - "unk", - "unk", - "unk", - "unk", - "unk", - "unk", - "unk", -#if HEVC_VPS - "VPS_NUT", -#else - "unk", -#endif - "SPS_NUT", - "PPS_NUT", -#if JVET_M0132 - "APS_NUT", -#endif - "AUD_NUT", - "EOS_NUT", - "EOB_NUT", - "FD_NUT", - "PREFIX_SEI_NUT", - "SUFFIX_SEI_NUT", -#else "NAL_UNIT_CODED_SLICE_TRAIL", "NAL_UNIT_CODED_SLICE_STSA", "NAL_UNIT_CODED_SLICE_RADL", @@ -159,43 +125,31 @@ const char * NALU_TYPE[] = "NAL_UNIT_RESERVED_VCL_4", "NAL_UNIT_RESERVED_VCL_5", "NAL_UNIT_RESERVED_VCL_6", - "NAL_UNIT_RESERVED_VCL_7", - "NAL_UNIT_CODED_SLICE_IDR_W_RADL", "NAL_UNIT_CODED_SLICE_IDR_N_LP", "NAL_UNIT_CODED_SLICE_CRA", - + "NAL_UNIT_CODED_SLICE_GDR", "NAL_UNIT_RESERVED_IRAP_VCL11", "NAL_UNIT_RESERVED_IRAP_VCL12", - "NAL_UNIT_RESERVED_IRAP_VCL13", - - "NAL_UNIT_RESERVED_VCL14", - -#if HEVC_VPS + "NAL_UNIT_DPS", "NAL_UNIT_VPS", -#else - "NAL_UNIT_RESERVED_VCL15", -#endif - - "NAL_UNIT_RESERVED_NVCL16", - "NAL_UNIT_SPS", "NAL_UNIT_PPS", - "NAL_UNIT_APS", + "NAL_UNIT_PREFIX_APS", + "NAL_UNIT_SUFFIX_APS", + "NAL_UNIT_PH", "NAL_UNIT_ACCESS_UNIT_DELIMITER", "NAL_UNIT_EOS", "NAL_UNIT_EOB", "NAL_UNIT_PREFIX_SEI", "NAL_UNIT_SUFFIX_SEI", - "NAL_UNIT_FILLER_DATA", - + "NAL_UNIT_FD", "NAL_UNIT_RESERVED_NVCL26", "NAL_UNIT_RESERVED_NVCL27", "NAL_UNIT_UNSPECIFIED_28", "NAL_UNIT_UNSPECIFIED_29", "NAL_UNIT_UNSPECIFIED_30", "NAL_UNIT_UNSPECIFIED_31" -#endif }; int calc_poc(int iPOClsb, int prevTid0POC, int getBitsForPOC, int nalu_type) @@ -217,15 +171,6 @@ int calc_poc(int iPOClsb, int prevTid0POC, int getBitsForPOC, int nalu_type) { iPOCmsb = iPrevPOCmsb; } -#if !JVET_M0101_HLS - if ( nalu_type == NAL_UNIT_CODED_SLICE_BLA_W_LP - || nalu_type == NAL_UNIT_CODED_SLICE_BLA_W_RADL - || nalu_type == NAL_UNIT_CODED_SLICE_BLA_N_LP ) - { - // For BLA picture types, POCmsb is set to 0. - iPOCmsb = 0; - } -#endif return iPOCmsb + iPOClsb; } @@ -245,6 +190,7 @@ std::vector<uint8_t> filter_segment(const std::vector<uint8_t> & v, int idx, int int bits_for_poc = 8; bool skip_next_sei = false; + bool first_slice_segment_in_pic_flag = false; while(find_nal_unit(p, sz, &nal_start, &nal_end) > 0) { @@ -260,50 +206,57 @@ std::vector<uint8_t> filter_segment(const std::vector<uint8_t> & v, int idx, int p += nal_start; std::vector<uint8_t> nalu(p, p + nal_end - nal_start); - int nalu_type = nalu[0] >> 1; + int nalu_type = nalu[1] >> 3; +#if ENABLE_TRACING + printf ("NALU Type: %d (%s)\n", nalu_type, NALU_TYPE[nalu_type]); +#endif int poc = -1; int poc_lsb = -1; int new_poc = -1; - + + HLSyntaxReader HLSReader; + static ParameterSetManager parameterSetManager; + ParcatHLSyntaxReader parcatHLSReader; + InputNALUnit inp_nalu; + std::vector<uint8_t> & nalu_bs = inp_nalu.getBitstream().getFifo(); + nalu_bs = nalu; + read(inp_nalu); + + if( inp_nalu.m_nalUnitType == NAL_UNIT_SPS ) + { + SPS* sps = new SPS(); + HLSReader.setBitstream( &inp_nalu.getBitstream() ); + HLSReader.parseSPS( sps ); + parameterSetManager.storeSPS( sps, inp_nalu.getBitstream().getFifo() ); + } + + if( inp_nalu.m_nalUnitType == NAL_UNIT_PPS ) + { + PPS* pps = new PPS(); + HLSReader.setBitstream( &inp_nalu.getBitstream() ); + HLSReader.parsePPS( pps, ¶meterSetManager ); + parameterSetManager.storePPS( pps, inp_nalu.getBitstream().getFifo() ); + } + if( inp_nalu.m_nalUnitType == NAL_UNIT_PH ) + { + first_slice_segment_in_pic_flag = true; + } + if(nalu_type == NAL_UNIT_CODED_SLICE_IDR_W_RADL || nalu_type == NAL_UNIT_CODED_SLICE_IDR_N_LP) { poc = 0; new_poc = *poc_base + poc; + first_slice_segment_in_pic_flag = false; } - -#if !JVET_M0101_HLS - if(nalu_type < 32 && nalu_type != NAL_UNIT_CODED_SLICE_IDR_W_RADL && nalu_type != NAL_UNIT_CODED_SLICE_IDR_N_LP) -#else - if(nalu_type < 15 && nalu_type != NAL_UNIT_CODED_SLICE_IDR_W_RADL && nalu_type != NAL_UNIT_CODED_SLICE_IDR_N_LP) -#endif + if((nalu_type < NAL_UNIT_CODED_SLICE_IDR_W_RADL) || (nalu_type > NAL_UNIT_CODED_SLICE_IDR_N_LP && nalu_type <= NAL_UNIT_RESERVED_IRAP_VCL_12) ) { - int offset = 16; + parcatHLSReader.setBitstream( &inp_nalu.getBitstream() ); + + // beginning of slice header parsing, taken from VLCReader + parcatHLSReader.parseSliceHeaderUpToPoc( ¶meterSetManager ); + int num_bits_up_to_poc_lsb = parcatHLSReader.getBitstream()->getNumBitsRead(); + int offset = num_bits_up_to_poc_lsb; - offset += 1; //first_slice_segment_in_pic_flag -#if !JVET_M0101_HLS - if (nalu_type >= NAL_UNIT_CODED_SLICE_BLA_W_LP && nalu_type <= NAL_UNIT_RESERVED_IRAP_VCL23) -#else - if (nalu_type >= NAL_UNIT_CODED_SLICE_IDR_W_RADL && nalu_type <= NAL_UNIT_RESERVED_IRAP_VCL13) -#endif - { - offset += 1; //no_output_of_prior_pics_flag - } - - // determine offset for slice_pic_parameter_set_id TODO: ue(v) - int byte_offset2 = offset / 8; - int hi_bits2 = offset % 8; - uint16_t data2 = (nalu[byte_offset2] << 8) | nalu[byte_offset2 + 1]; - int low_bits2 = 16 - hi_bits2 - 1; - if(((data2 >> low_bits2) % 2)) - offset += 1; // PPSId=0 - else - offset += 3; // PPSId=1 - offset += 1; // slice_type TODO: ue(v) - // separate_colour_plane_flag is not supported in JEM1.0 - if (nalu_type == NAL_UNIT_CODED_SLICE_CRA) - { - offset += 2; - } int byte_offset = offset / 8; int hi_bits = offset % 8; uint16_t data = (nalu[byte_offset] << 8) | nalu[byte_offset + 1]; @@ -315,14 +268,21 @@ std::vector<uint8_t> filter_segment(const std::vector<uint8_t> & v, int idx, int // int picOrderCntLSB = (pcSlice->getPOC()-pcSlice->getLastIDR()+(1<<pcSlice->getSPS()->getBitsForPOC())) & ((1<<pcSlice->getSPS()->getBitsForPOC())-1); unsigned picOrderCntLSB = (new_poc - *last_idr_poc +(1 << bits_for_poc)) & ((1<<bits_for_poc)-1); - int low = data & ((1 << (low_bits + 1)) - 1); + int low = data & ((1 << low_bits) - 1); int hi = data >> (16 - hi_bits); data = (hi << (16 - hi_bits)) | (picOrderCntLSB << low_bits) | low; nalu[byte_offset] = data >> 8; nalu[byte_offset + 1] = data & 0xff; - ++cnt; + if( first_slice_segment_in_pic_flag ) + { +#if ENABLE_TRACING + std::cout << "Changed poc " << poc << " to " << new_poc << std::endl; +#endif + ++cnt; + first_slice_segment_in_pic_flag = false; + } } if(idx > 1 && (nalu_type == NAL_UNIT_CODED_SLICE_IDR_W_RADL || nalu_type == NAL_UNIT_CODED_SLICE_IDR_N_LP)) @@ -331,11 +291,7 @@ std::vector<uint8_t> filter_segment(const std::vector<uint8_t> & v, int idx, int idr_found = true; } -#if HEVC_VPS - if((idx > 1 && (nalu_type == NAL_UNIT_CODED_SLICE_IDR_W_RADL || nalu_type == NAL_UNIT_CODED_SLICE_IDR_N_LP )) || ((idx>1 && !idr_found) && ( nalu_type == NAL_UNIT_VPS || nalu_type == NAL_UNIT_SPS || nalu_type == NAL_UNIT_PPS)) -#else - if((idx > 1 && (nalu_type == NAL_UNIT_CODED_SLICE_IDR_W_RADL || nalu_type == NAL_UNIT_CODED_SLICE_IDR_N_LP)) || ((idx > 1 && !idr_found) && (nalu_type == NAL_UNIT_SPS || nalu_type == NAL_UNIT_PPS || nalu_type == NAL_UNIT_APS)) -#endif + if( ( idx > 1 && ( nalu_type == NAL_UNIT_CODED_SLICE_IDR_W_RADL || nalu_type == NAL_UNIT_CODED_SLICE_IDR_N_LP ) ) || ( ( idx > 1 && !idr_found ) && ( nalu_type == NAL_UNIT_DPS || nalu_type == NAL_UNIT_VPS || nalu_type == NAL_UNIT_SPS || nalu_type == NAL_UNIT_PPS || nalu_type == NAL_UNIT_PREFIX_APS || nalu_type == NAL_UNIT_SUFFIX_APS || nalu_type == NAL_UNIT_PH || nalu_type == NAL_UNIT_ACCESS_UNIT_DELIMITER ) ) || (nalu_type == NAL_UNIT_SUFFIX_SEI && skip_next_sei)) { } @@ -389,6 +345,12 @@ std::vector<uint8_t> process_segment(const char * path, int idx, int * poc_base, int main(int argc, char * argv[]) { +#if ENABLE_TRACING + std::string tracingFile; + std::string tracingRule; + + g_trace_ctx = tracing_init(tracingFile, tracingRule); +#endif if(argc < 3) { printf("parcat version VTM %s\n", VTM_VERSION); @@ -405,6 +367,8 @@ int main(int argc, char * argv[]) int poc_base = 0; int last_idr_poc = 0; + initROM(); + for(int i = 1; i < argc - 1; ++i) { std::vector<uint8_t> v = process_segment(argv[i], i, &poc_base, &last_idr_poc); @@ -413,4 +377,7 @@ int main(int argc, char * argv[]) } fclose(fdo); +#if ENABLE_TRACING + tracing_uninit(g_trace_ctx); +#endif } diff --git a/source/App/SEIRemovalApp/SEIRemovalApp.cpp b/source/App/SEIRemovalApp/SEIRemovalApp.cpp index dd22baa5291c4d962591b8354e98a8e19be63843..e10c99da882d7df9985db68d04239c880f4d251a 100644 --- a/source/App/SEIRemovalApp/SEIRemovalApp.cpp +++ b/source/App/SEIRemovalApp/SEIRemovalApp.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -72,11 +72,11 @@ void read2(InputNALUnit& nalu) { InputBitstream& bs = nalu.getBitstream(); - bool forbidden_zero_bit = bs.read(1); // forbidden_zero_bit - if(forbidden_zero_bit != 0) { THROW( "Forbidden zero-bit not '0'" );} - nalu.m_nalUnitType = (NalUnitType) bs.read(6); // nal_unit_type - nalu.m_nuhLayerId = bs.read(6); // nuh_layer_id - nalu.m_temporalId = bs.read(3) - 1; // nuh_temporal_id_plus1 + nalu.m_forbiddenZeroBit = bs.read(1); // forbidden zero bit + nalu.m_nuhReservedZeroBit = bs.read(1); // nuh_reserved_zero_bit + nalu.m_nuhLayerId = bs.read(6); // nuh_layer_id + nalu.m_nalUnitType = (NalUnitType) bs.read(5); // nal_unit_type + nalu.m_temporalId = bs.read(3) - 1; // nuh_temporal_id_plus1 } uint32_t SEIRemovalApp::decode() @@ -129,7 +129,7 @@ uint32_t SEIRemovalApp::decode() // just kick out all suffix SEIS bWrite &= (( !m_discardSuffixSEIs || nalu.m_nalUnitType != NAL_UNIT_SUFFIX_SEI ) && ( !m_discardPrefixSEIs || nalu.m_nalUnitType != NAL_UNIT_PREFIX_SEI )); bWrite &= unitCnt >= m_numNALUnitsToSkip; - bWrite &= m_numNALUnitsToWrite > 0 && unitCnt <= m_numNALUnitsToWrite; + bWrite &= m_numNALUnitsToWrite < 0 || unitCnt <= m_numNALUnitsToWrite; if( bWrite ) { diff --git a/source/App/SEIRemovalApp/SEIRemovalApp.h b/source/App/SEIRemovalApp/SEIRemovalApp.h index ec585ea5610cac993616f1836d446eeaace6bba9..32bb23ac26d974025866f8df9eb28d359d40e65c 100644 --- a/source/App/SEIRemovalApp/SEIRemovalApp.h +++ b/source/App/SEIRemovalApp/SEIRemovalApp.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/source/App/SEIRemovalApp/SEIRemovalAppCfg.cpp b/source/App/SEIRemovalApp/SEIRemovalAppCfg.cpp index 8f03fa9df5a2881a42cc36528b81e01c613c494f..f4ab6e1f0c46ff63065465c81edacb81d144bf38 100644 --- a/source/App/SEIRemovalApp/SEIRemovalAppCfg.cpp +++ b/source/App/SEIRemovalApp/SEIRemovalAppCfg.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/source/App/SEIRemovalApp/SEIRemovalAppCfg.h b/source/App/SEIRemovalApp/SEIRemovalAppCfg.h index 7daa16afe5161bc87e79dc4bb5aec16f1993dc10..593ba838316762c62ac1bae26d23d0e6cff21d12 100644 --- a/source/App/SEIRemovalApp/SEIRemovalAppCfg.h +++ b/source/App/SEIRemovalApp/SEIRemovalAppCfg.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/source/App/SEIRemovalApp/seiremovalmain.cpp b/source/App/SEIRemovalApp/seiremovalmain.cpp index 572ed84fa8bc0709f8a61d1f060579410d1d636f..5fba16f98c0820dc7d1273f8adcff2ddbe9b9432 100644 --- a/source/App/SEIRemovalApp/seiremovalmain.cpp +++ b/source/App/SEIRemovalApp/seiremovalmain.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/source/App/StreamMergeApp/CMakeLists.txt b/source/App/StreamMergeApp/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..77c53ece6b5e192580b86a7f3b7dac9e4c0b6290 --- /dev/null +++ b/source/App/StreamMergeApp/CMakeLists.txt @@ -0,0 +1,84 @@ +# executable +set( EXE_NAME StreamMergeApp ) + +# get source files +file( GLOB SRC_FILES "*.cpp" ) + +# get include files +file( GLOB INC_FILES "*.h" ) + +# get additional libs for gcc on Ubuntu systems +if( CMAKE_SYSTEM_NAME STREQUAL "Linux" ) + if( CMAKE_CXX_COMPILER_ID STREQUAL "GNU" ) + if( USE_ADDRESS_SANITIZER ) + set( ADDITIONAL_LIBS asan ) + endif() + endif() +endif() + +# NATVIS files for Visual Studio +if( MSVC ) + file( GLOB NATVIS_FILES "../../VisualStudio/*.natvis" ) +endif() + +# add executable +add_executable( ${EXE_NAME} ${SRC_FILES} ${INC_FILES} ${NATVIS_FILES} ) +include_directories(${CMAKE_CURRENT_BINARY_DIR}) + +if( SET_ENABLE_TRACING ) + if( ENABLE_TRACING ) + target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_TRACING=1 ) + else() + target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_TRACING=0 ) + endif() +endif() + +if( OpenMP_FOUND ) + if( SET_ENABLE_SPLIT_PARALLELISM ) + if( ENABLE_SPLIT_PARALLELISM ) + target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=1 ) + else() + target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=0 ) + endif() + endif() + if( SET_ENABLE_WPP_PARALLELISM ) + if( ENABLE_WPP_PARALLELISM ) + target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_WPP_PARALLELISM=1 ) + else() + target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_WPP_PARALLELISM=0 ) + endif() + endif() +else() + target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=0 ) + target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_WPP_PARALLELISM=0 ) +endif() + +if( CMAKE_COMPILER_IS_GNUCC AND BUILD_STATIC ) + set( ADDITIONAL_LIBS ${ADDITIONAL_LIBS} -static -static-libgcc -static-libstdc++ ) + target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_WPP_STATIC_LINK=1 ) +endif() + +target_link_libraries( ${EXE_NAME} CommonLib EncoderLib DecoderLib Utilities Threads::Threads ${ADDITIONAL_LIBS} ) + +# lldb custom data formatters +if( XCODE ) + add_dependencies( ${EXE_NAME} Install${PROJECT_NAME}LldbFiles ) +endif() + +if( CMAKE_SYSTEM_NAME STREQUAL "Linux" ) + add_custom_command( TARGET ${EXE_NAME} POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy + $<$<CONFIG:Debug>:${CMAKE_RUNTIME_OUTPUT_DIRECTORY_DEBUG}/StreamMergeApp> + $<$<CONFIG:Release>:${CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE}/StreamMergeApp> + $<$<CONFIG:RelWithDebInfo>:${CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELWITHDEBINFO}/StreamMergeApp> + $<$<CONFIG:MinSizeRel>:${CMAKE_RUNTIME_OUTPUT_DIRECTORY_MINSIZEREL}/StreamMergeApp> + $<$<CONFIG:Debug>:${CMAKE_SOURCE_DIR}/bin/StreamMergeAppStaticd> + $<$<CONFIG:Release>:${CMAKE_SOURCE_DIR}/bin/StreamMergeAppStatic> + $<$<CONFIG:RelWithDebInfo>:${CMAKE_SOURCE_DIR}/bin/StreamMergeAppStaticp> + $<$<CONFIG:MinSizeRel>:${CMAKE_SOURCE_DIR}/bin/StreamMergeAppStaticm> ) +endif() + +# example: place header files in different folders +source_group( "Natvis Files" FILES ${NATVIS_FILES} ) + +# set the folder where to place the projects +set_target_properties( ${EXE_NAME} PROPERTIES FOLDER app LINKER_LANGUAGE CXX ) diff --git a/source/App/StreamMergeApp/StreamMergeApp.cpp b/source/App/StreamMergeApp/StreamMergeApp.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b9943e8ec3ee89f0de0ca8856ec57e93c3ca0b18 --- /dev/null +++ b/source/App/StreamMergeApp/StreamMergeApp.cpp @@ -0,0 +1,345 @@ +/* The copyright in this software is being made available under the BSD + * License, included below. This software may be subject to other third party + * and contributor rights, including patent rights, and no such rights are + * granted under this license. + * + * Copyright (c) 2010-2020, ITU/ISO/IEC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * * Neither the name of the ITU/ISO/IEC nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + + /** \file StreamMergeApp.cpp + \brief Decoder application class + */ + +#include <list> +#include <vector> +#include <stdio.h> +#include <fcntl.h> + +#include "StreamMergeApp.h" +#include "DecoderLib/AnnexBread.h" +#include "DecoderLib/NALread.h" +#if RExt__DECODER_DEBUG_BIT_STATISTICS +#include "CommonLib/CodingStatistics.h" +#endif + + //! \ingroup DecoderApp + //! \{ + + // ==================================================================================================================== + // Constructor / destructor / initialization / destroy + // ==================================================================================================================== + +StreamMergeApp::StreamMergeApp() +{ + +} + +// ==================================================================================================================== +// Public member functions +// ==================================================================================================================== + +/** + - create internal class + - initialize internal class + - until the end of the bitstream, call decoding function in StreamMergeApp class + - delete allocated buffers + - destroy internal class + - returns the number of mismatching pictures + */ + +void read2(InputNALUnit& nalu) +{ + InputBitstream& bs = nalu.getBitstream(); + + nalu.m_forbiddenZeroBit = bs.read(1); // forbidden zero bit + nalu.m_nuhReservedZeroBit = bs.read(1); // nuh_reserved_zero_bit + nalu.m_nuhLayerId = bs.read(6); // nuh_layer_id + nalu.m_nalUnitType = (NalUnitType) bs.read(5); // nal_unit_type + nalu.m_temporalId = bs.read(3) - 1; // nuh_temporal_id_plus1 +} + +static void +_byteStreamNALUnit( + SingleLayerStream& bs, + std::istream& istream, + vector<uint8_t>& nalUnit, + AnnexBStats& stats) +{ + /* At the beginning of the decoding process, the decoder initialises its + * current position in the byte stream to the beginning of the byte stream. + * It then extracts and discards each leading_zero_8bits syntax element (if + * present), moving the current position in the byte stream forward one + * byte at a time, until the current position in the byte stream is such + * that the next four bytes in the bitstream form the four-byte sequence + * 0x00000001. + */ +#if RExt__DECODER_DEBUG_BIT_STATISTICS + CodingStatistics::SStat &statBits = CodingStatistics::GetStatisticEP(STATS__NAL_UNIT_PACKING); +#endif + while ((bs.eofBeforeNBytes(24 / 8, istream) || bs.peekBytes(24 / 8, istream) != 0x000001) + && (bs.eofBeforeNBytes(32 / 8, istream) || bs.peekBytes(32 / 8, istream) != 0x00000001)) + { + uint8_t leading_zero_8bits = bs.readByte(istream); +#if RExt__DECODER_DEBUG_BIT_STATISTICS + statBits.bits += 8; statBits.count++; +#endif + if (leading_zero_8bits != 0) { THROW("Leading zero bits not zero"); } + stats.m_numLeadingZero8BitsBytes++; + } + + /* 1. When the next four bytes in the bitstream form the four-byte sequence + * 0x00000001, the next byte in the byte stream (which is a zero_byte + * syntax element) is extracted and discarded and the current position in + * the byte stream is set equal to the position of the byte following this + * discarded byte. + */ + /* NB, the previous step guarantees this will succeed -- if EOF was + * encountered, an exception will stop execution getting this far */ + if (bs.peekBytes(24 / 8, istream) != 0x000001) + { + uint8_t zero_byte = bs.readByte(istream); +#if RExt__DECODER_DEBUG_BIT_STATISTICS + statBits.bits += 8; statBits.count++; +#endif + CHECK(zero_byte != 0, "Zero byte not '0'"); + stats.m_numZeroByteBytes++; + } + + /* 2. The next three-byte sequence in the byte stream (which is a + * start_code_prefix_one_3bytes) is extracted and discarded and the current + * position in the byte stream is set equal to the position of the byte + * following this three-byte sequence. + */ + /* NB, (1) guarantees that the next three bytes are 0x00 00 01 */ + uint32_t start_code_prefix_one_3bytes = bs.readBytes(24 / 8, istream); +#if RExt__DECODER_DEBUG_BIT_STATISTICS + statBits.bits += 24; statBits.count += 3; +#endif + if (start_code_prefix_one_3bytes != 0x000001) { THROW("Invalid code prefix"); } + stats.m_numStartCodePrefixBytes += 3; + + /* 3. NumBytesInNALunit is set equal to the number of bytes starting with + * the byte at the current position in the byte stream up to and including + * the last byte that precedes the location of any of the following + * conditions: + * a. A subsequent byte-aligned three-byte sequence equal to 0x000000, or + * b. A subsequent byte-aligned three-byte sequence equal to 0x000001, or + * c. The end of the byte stream, as determined by unspecified means. + */ + /* 4. NumBytesInNALunit bytes are removed from the bitstream and the + * current position in the byte stream is advanced by NumBytesInNALunit + * bytes. This sequence of bytes is nal_unit( NumBytesInNALunit ) and is + * decoded using the NAL unit decoding process + */ + /* NB, (unsigned)x > 2 implies n!=0 && n!=1 */ +#if RExt__DECODER_DEBUG_BIT_STATISTICS + CodingStatistics::SStat &bodyStats = CodingStatistics::GetStatisticEP(STATS__NAL_UNIT_TOTAL_BODY); +#endif + while (bs.eofBeforeNBytes(24 / 8, istream) || bs.peekBytes(24 / 8, istream) > 2) + { +#if RExt__DECODER_DEBUG_BIT_STATISTICS + uint8_t thebyte = bs.readByte(istream); bodyStats.bits += 8; bodyStats.count++; + nalUnit.push_back(thebyte); +#else + nalUnit.push_back(bs.readByte(istream)); +#endif + } + + /* 5. When the current position in the byte stream is: + * - not at the end of the byte stream (as determined by unspecified means) + * - and the next bytes in the byte stream do not start with a three-byte + * sequence equal to 0x000001 + * - and the next bytes in the byte stream do not start with a four byte + * sequence equal to 0x00000001, + * the decoder extracts and discards each trailing_zero_8bits syntax + * element, moving the current position in the byte stream forward one byte + * at a time, until the current position in the byte stream is such that: + * - the next bytes in the byte stream form the four-byte sequence + * 0x00000001 or + * - the end of the byte stream has been encountered (as determined by + * unspecified means). + */ + /* NB, (3) guarantees there are at least three bytes available or none */ + while ((bs.eofBeforeNBytes(24 / 8, istream) || bs.peekBytes(24 / 8, istream) != 0x000001) + && (bs.eofBeforeNBytes(32 / 8, istream) || bs.peekBytes(32 / 8, istream) != 0x00000001)) + { + uint8_t trailing_zero_8bits = bs.readByte(istream); +#if RExt__DECODER_DEBUG_BIT_STATISTICS + statBits.bits += 8; statBits.count++; +#endif + CHECK(trailing_zero_8bits != 0, "Trailing zero bits not '0'"); + stats.m_numTrailingZero8BitsBytes++; + } +} + +/** + * Parse an AVC AnnexB Bytestream bs to extract a single nalUnit + * while accumulating bytestream statistics into stats. + * + * Returns false if EOF was reached (NB, nalunit data may be valid), + * otherwise true. + */ +bool +byteStreamNALUnit( + SingleLayerStream& bs, + std::istream& istream, + vector<uint8_t>& nalUnit, + AnnexBStats& stats) +{ + bool eof = false; + try + { + _byteStreamNALUnit(bs, istream, nalUnit, stats); + } + catch (...) + { + eof = true; + } + stats.m_numBytesInNALUnit = uint32_t(nalUnit.size()); + return eof; +} + +void StreamMergeApp::writeNewVPS(ostream& out, int nLayerId, int nTemporalId) +{ + //write NALU header + OutputBitstream bsNALUHeader; + static const uint8_t start_code_prefix[] = { 0,0,0,1 }; + + int forbiddenZero = 0; + bsNALUHeader.write(forbiddenZero, 1); // forbidden_zero_bit + int nuhReservedZeroBit = 0; + bsNALUHeader.write(nuhReservedZeroBit, 1); // nuh_reserved_zero_bit + bsNALUHeader.write(nLayerId, 6); // nuh_layer_id + bsNALUHeader.write(NAL_UNIT_VPS, 5); // nal_unit_type + bsNALUHeader.write(nTemporalId + 1, 3); // nuh_temporal_id_plus1 + + out.write(reinterpret_cast<const char*>(start_code_prefix), 4); + out.write(reinterpret_cast<const char*>(bsNALUHeader.getByteStream()), bsNALUHeader.getByteStreamLength()); + + //write VPS + OutputBitstream bsVPS; + HLSWriter m_HLSWriter; + + m_HLSWriter.setBitstream(&bsVPS); + m_HLSWriter.codeVPS(&vps); + + out.write(reinterpret_cast<const char*>(bsVPS.getByteStream()), bsVPS.getByteStreamLength()); + + return; +} + +uint32_t StreamMergeApp::mergeStreams() +{ + ifstream bitstreamFileIn[MAX_VPS_LAYERS]; + ofstream bitstreamFileOut(m_bitstreamFileNameOut.c_str(), ifstream::out | ifstream::binary); + int nNumValidStr = m_numInputStreams; + + for (int i = 0; i < m_numInputStreams; i++) + { + bitstreamFileIn[i].open(m_bitstreamFileNameIn[i].c_str(), ifstream::in | ifstream::binary); + + if (!bitstreamFileIn[i]) + { + EXIT("failed to open bitstream file " << m_bitstreamFileNameIn[i].c_str() << " for reading"); + } + + bitstreamFileIn[i].clear(); + bitstreamFileIn[i].seekg(0, ios::beg); + } + + SingleLayerStream bytestream[MAX_VPS_LAYERS]; + + for (int i = 0; i < m_numInputStreams; i++) + bytestream[i].init(bitstreamFileIn[i]); + + //set VPS which will be replicated for all layers but with differnt nul_layer_id + vps.setMaxLayers(m_numInputStreams); + vps.setVPSExtensionFlag(false); + + //Loop all input bitstreams to interleave their NALUs + while (nNumValidStr) + { + //loop over all input streams + for (int i = 0; i < m_numInputStreams; i++) + { + uint8_t layerId = i < 63 ? i : i + 1; + + if (!bitstreamFileIn[i]) + continue; + + AnnexBStats stats = AnnexBStats(); + + InputNALUnit nalu; + + byteStreamNALUnit(bytestream[i], bitstreamFileIn[i], nalu.getBitstream().getFifo(), stats); + + // call actual decoding function + if (nalu.getBitstream().getFifo().empty()) + { + /* this can happen if the following occur: + * - empty input file + * - two back-to-back start_code_prefixes + * - start_code_prefix immediately followed by EOF + */ + std::cerr << "Warning: Attempt to decode an empty NAL unit" << std::endl; + } + else + { + read2(nalu); + + if (nalu.m_nalUnitType == NAL_UNIT_VPS) + { + writeNewVPS(bitstreamFileOut, layerId, nalu.m_temporalId); + printf("Write new VPS for stream %d\n", i); + + continue; + } + + int iNumZeros = stats.m_numLeadingZero8BitsBytes + stats.m_numZeroByteBytes + stats.m_numStartCodePrefixBytes - 1; + char ch = 0; + for (int i = 0; i < iNumZeros; i++) { bitstreamFileOut.write(&ch, 1); } + ch = 1; bitstreamFileOut.write(&ch, 1); + + //update the nul_layer_id + uint8_t *p = (uint8_t*)nalu.getBitstream().getFifo().data(); + p[1] = ((layerId + 1) << 1) & 0xff; + + bitstreamFileOut.write((const char*)p, nalu.getBitstream().getFifo().size()); + + printf("Merge NALU type %d from stream %d\n", nalu.m_nalUnitType, i); + } + + if (!bitstreamFileIn[i]) + nNumValidStr--; + } + } + + return 0; +} + +//! \} diff --git a/source/App/StreamMergeApp/StreamMergeApp.h b/source/App/StreamMergeApp/StreamMergeApp.h new file mode 100644 index 0000000000000000000000000000000000000000..b4dc15ae8673800ec65c470ed6a948e56ad0fd84 --- /dev/null +++ b/source/App/StreamMergeApp/StreamMergeApp.h @@ -0,0 +1,196 @@ +/* The copyright in this software is being made available under the BSD + * License, included below. This software may be subject to other third party + * and contributor rights, including patent rights, and no such rights are + * granted under this license. + * + * Copyright (c) 2010-2020, ITU/ISO/IEC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * * Neither the name of the ITU/ISO/IEC nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file SEIRemovalApp.h + \brief Decoder application class (header) +*/ + +#ifndef __STREAMMERGEAPP__ +#define __STREAMMERGEAPP__ + +#if _MSC_VER > 1000 +#pragma once +#endif // _MSC_VER > 1000 + +#include <stdio.h> +#include <fstream> +#include <iostream> +#include "CommonLib/CommonDef.h" +#include "VLCWriter.h" +#include "CABACWriter.h" +#include "AnnexBread.h" +#include "StreamMergeAppCfg.h" + +using namespace std; + +// ==================================================================================================================== +// Class definition +// ==================================================================================================================== + +/// decoder application class +class StreamMergeApp : public StreamMergeAppCfg +{ + +public: + StreamMergeApp(); + virtual ~StreamMergeApp () {} + + VPS vps; + + uint32_t mergeStreams (); ///< main stream merging function + void writeNewVPS (ostream& out, int nNumLayers, int nTemporalId); +}; + + +class SingleLayerStream +{ +public: + /** + * Create a bytestream reader that will extract bytes from + * istream. + * + * NB, it isn't safe to access istream while in use by a + * InputByteStream. + * + * Side-effects: the exception mask of istream is set to eofbit + */ + SingleLayerStream() + : m_numFutureBytes(0) + , m_futureBytes(0) + { + } + + /** + * Reset the internal state. Must be called if input stream is + * modified externally to this class + */ + void reset() + { + m_numFutureBytes = 0; + m_futureBytes = 0; + } + + void init(std::istream& istream) + { + istream.exceptions(std::istream::eofbit | std::istream::badbit); + } + + /** + * returns true if an EOF will be encountered within the next + * n bytes. + */ + bool eofBeforeNBytes(uint32_t n, std::istream& m_Input) + { + CHECK(n > 4, "Unsupported look-ahead value"); + if (m_numFutureBytes >= n) + { + return false; + } + + n -= m_numFutureBytes; + try + { + for (uint32_t i = 0; i < n; i++) + { + m_futureBytes = (m_futureBytes << 8) | m_Input.get(); + m_numFutureBytes++; + } + } + catch (...) + { + return true; + } + return false; + } + + /** + * return the next n bytes in the stream without advancing + * the stream pointer. + * + * Returns: an unsigned integer representing an n byte bigendian + * word. + * + * If an attempt is made to read past EOF, an n-byte word is + * returned, but the portion that required input bytes beyond EOF + * is undefined. + * + */ + uint32_t peekBytes(uint32_t n, std::istream& m_Input) + { + eofBeforeNBytes(n, m_Input); + return m_futureBytes >> 8 * (m_numFutureBytes - n); + } + + /** + * consume and return one byte from the input. + * + * If bytestream is already at EOF prior to a call to readByte(), + * an exception std::ios_base::failure is thrown. + */ + uint8_t readByte(std::istream& m_Input) + { + if (!m_numFutureBytes) + { + uint8_t byte = m_Input.get(); + return byte; + } + m_numFutureBytes--; + uint8_t wanted_byte = m_futureBytes >> 8 * m_numFutureBytes; + m_futureBytes &= ~(0xff << 8 * m_numFutureBytes); + return wanted_byte; + } + + /** + * consume and return n bytes from the input. n bytes from + * bytestream are interpreted as bigendian when assembling + * the return value. + */ + uint32_t readBytes(uint32_t n, std::istream& m_Input) + { + uint32_t val = 0; + for (uint32_t i = 0; i < n; i++) + { + val = (val << 8) | readByte(m_Input); + } + return val; + } + +private: + uint32_t m_numFutureBytes; /* number of valid bytes in m_futureBytes */ + uint32_t m_futureBytes; /* bytes that have been peeked */ +}; + +bool byteStreamNALUnit(SingleLayerStream& bs, std::istream& istream, vector<uint8_t>& nalUnit, AnnexBStats& stats); + +#endif // __STREAMMERGEAPP__ + diff --git a/source/App/StreamMergeApp/StreamMergeAppCfg.cpp b/source/App/StreamMergeApp/StreamMergeAppCfg.cpp new file mode 100644 index 0000000000000000000000000000000000000000..88432c360fd9a5e9304961c2872fae8a841119f3 --- /dev/null +++ b/source/App/StreamMergeApp/StreamMergeAppCfg.cpp @@ -0,0 +1,86 @@ +/* The copyright in this software is being made available under the BSD + * License, included below. This software may be subject to other third party + * and contributor rights, including patent rights, and no such rights are + * granted under this license. + * + * Copyright (c) 2010-2020, ITU/ISO/IEC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * * Neither the name of the ITU/ISO/IEC nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + + /** \file StreamMergeAppCfg.cpp + \brief Decoder configuration class + */ + +#include <cstdio> +#include <cstring> +#include <string> +#include "StreamMergeAppCfg.h" +#include "Utilities/program_options_lite.h" + +using namespace std; +namespace po = df::program_options_lite; + +//! \ingroup DecoderApp +//! \{ + +// ==================================================================================================================== +// Public member functions +// ==================================================================================================================== + +/** \param argc number of arguments + \param argv array of arguments + */ +bool StreamMergeAppCfg::parseCfg(int argc, char* argv[]) +{ + int i; + + m_numInputStreams = argc - 2; + + for (i = 0; i < m_numInputStreams; i++) + { + m_bitstreamFileNameIn[i] = argv[i + 1]; + } + + m_bitstreamFileNameOut = argv[i + 1]; + + return true; +} + +StreamMergeAppCfg::StreamMergeAppCfg() + : m_bitstreamFileNameOut() + , m_numInputStreams(0) +{ + for (int i = 0; i < MAX_VPS_LAYERS; i++) + m_bitstreamFileNameIn[i] = ""; +} + +StreamMergeAppCfg::~StreamMergeAppCfg() +{ + +} + +//! \} diff --git a/source/App/StreamMergeApp/StreamMergeAppCfg.h b/source/App/StreamMergeApp/StreamMergeAppCfg.h new file mode 100644 index 0000000000000000000000000000000000000000..6ef3e791ce8f0fbe708759cfd5026f99c5bf4c34 --- /dev/null +++ b/source/App/StreamMergeApp/StreamMergeAppCfg.h @@ -0,0 +1,74 @@ +/* The copyright in this software is being made available under the BSD + * License, included below. This software may be subject to other third party + * and contributor rights, including patent rights, and no such rights are + * granted under this license. + * + * Copyright (c) 2010-2020, ITU/ISO/IEC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * * Neither the name of the ITU/ISO/IEC nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file StreamMergeAppCfg.h + \brief Stream merge app configuration class (header) +*/ + +#ifndef __STREAMMERGEAPPCFG__ +#define __STREAMMERGEAPPCFG__ + +#if _MSC_VER > 1000 +#pragma once +#endif // _MSC_VER > 1000 + +#include "CommonLib/CommonDef.h" +#include <vector> + +//! \ingroup DecoderApp +//! \{ + +// ==================================================================================================================== +// Class definition +// ==================================================================================================================== + +/// Decoder configuration class +class StreamMergeAppCfg +{ +protected: + std::string m_bitstreamFileNameIn[MAX_VPS_LAYERS]; ///< output bitstream file name + std::string m_bitstreamFileNameOut; ///< input bitstream file name + int m_numInputStreams; ///< number of input bitstreams + +public: + StreamMergeAppCfg(); + virtual ~StreamMergeAppCfg(); + + bool parseCfg ( int argc, char* argv[] ); ///< initialize option class from configuration +}; + +//! \} + +#endif // __STREAMMERGEAPPCFG__ + + diff --git a/source/App/StreamMergeApp/StreamMergeMain.cpp b/source/App/StreamMergeApp/StreamMergeMain.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7404d8751ae8f398ca4559d9afab2eccca11fe31 --- /dev/null +++ b/source/App/StreamMergeApp/StreamMergeMain.cpp @@ -0,0 +1,94 @@ +/* The copyright in this software is being made available under the BSD + * License, included below. This software may be subject to other third party + * and contributor rights, including patent rights, and no such rights are + * granted under this license. + * + * Copyright (c) 2010-2020, ITU/ISO/IEC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * * Neither the name of the ITU/ISO/IEC nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + + /** \file StreamMergeMain.cpp + \brief Stream merge application main + */ + +#include <stdlib.h> +#include <stdio.h> +#include <time.h> +#include "StreamMergeApp.h" +#include "program_options_lite.h" + + //! \ingroup DecoderApp + //! \{ + + // ==================================================================================================================== + // Main function + // ==================================================================================================================== + +int main(int argc, char* argv[]) +{ + int returnCode = EXIT_SUCCESS; + + if (argc < 4) + { + printf("usage: %s <bitstream1> <bitstream2> [<bitstream3> ...] <outfile>\n", argv[0]); + return -1; + } + + // print information + fprintf(stdout, "\n"); + fprintf(stdout, "VVCSoftware: VTM Version %s ", VTM_VERSION); + fprintf(stdout, "\n"); + + StreamMergeApp *pStrMergeApp = new StreamMergeApp; + // parse configuration + if (!pStrMergeApp->parseCfg(argc, argv)) + { + returnCode = EXIT_FAILURE; + return returnCode; + } + + // starting time + double dResult; + clock_t lBefore = clock(); + + // call decoding function + if (0 != pStrMergeApp->mergeStreams()) + { + printf("\n\n***ERROR*** A merge error happened\n"); + returnCode = EXIT_FAILURE; + } + + // ending time + dResult = (double)(clock() - lBefore) / CLOCKS_PER_SEC; + printf("\n Total Time: %12.3f sec.\n", dResult); + + delete pStrMergeApp; + + return returnCode; +} + +//! \} diff --git a/source/App/utils/BitrateTargeting/ExtractBitrates.cpp b/source/App/utils/BitrateTargeting/ExtractBitrates.cpp index c13fa5e73218919c77157b732f94b38232528d93..1740a22d0c356fabf455da6dee91a7a5a9264a3f 100644 --- a/source/App/utils/BitrateTargeting/ExtractBitrates.cpp +++ b/source/App/utils/BitrateTargeting/ExtractBitrates.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/source/App/utils/BitrateTargeting/ExtractBitrates.h b/source/App/utils/BitrateTargeting/ExtractBitrates.h index dd73ffe9588fec1540ba4412a0c23a437c22075f..a3732fc9f03414445d06827246becb1c9a3e3703 100644 --- a/source/App/utils/BitrateTargeting/ExtractBitrates.h +++ b/source/App/utils/BitrateTargeting/ExtractBitrates.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/source/App/utils/BitrateTargeting/ExtractBitratesMain.cpp b/source/App/utils/BitrateTargeting/ExtractBitratesMain.cpp index 562096a895cd60af1656407cbbcc47f8b7087e15..f44ac2d7b932f74b6d43dffb4296cdf51886c024 100644 --- a/source/App/utils/BitrateTargeting/ExtractBitratesMain.cpp +++ b/source/App/utils/BitrateTargeting/ExtractBitratesMain.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/source/App/utils/BitrateTargeting/GuessLambdaModifiers.cpp b/source/App/utils/BitrateTargeting/GuessLambdaModifiers.cpp index 2e591804d47e86369e2ebec446d0513141ab6b61..039a1ff98f3fe3608c658e954e791d46fded04d7 100644 --- a/source/App/utils/BitrateTargeting/GuessLambdaModifiers.cpp +++ b/source/App/utils/BitrateTargeting/GuessLambdaModifiers.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/source/App/utils/BitrateTargeting/GuessLambdaModifiers.h b/source/App/utils/BitrateTargeting/GuessLambdaModifiers.h index 151e9b830a16e3b0022c9f1b95e487c71411e5de..16e4a6a036562ec77050d6141a03bd09a7eaebfe 100644 --- a/source/App/utils/BitrateTargeting/GuessLambdaModifiers.h +++ b/source/App/utils/BitrateTargeting/GuessLambdaModifiers.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/source/App/utils/BitrateTargeting/GuessLambdaModifiersMain.cpp b/source/App/utils/BitrateTargeting/GuessLambdaModifiersMain.cpp index cddb75b46f3f72f68f540bfa995267f4965729c8..c4ad9388ac896836b07d32289d6efb32d6e30846 100644 --- a/source/App/utils/BitrateTargeting/GuessLambdaModifiersMain.cpp +++ b/source/App/utils/BitrateTargeting/GuessLambdaModifiersMain.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/source/App/utils/BitrateTargeting/RuntimeError.h b/source/App/utils/BitrateTargeting/RuntimeError.h index e777654e87cbbbe08921ed2b601337f1642a6e35..01f720a9f85aa1052fc5d07e56d6101887c0423c 100644 --- a/source/App/utils/BitrateTargeting/RuntimeError.h +++ b/source/App/utils/BitrateTargeting/RuntimeError.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/source/App/utils/annexBbytecount.cpp b/source/App/utils/annexBbytecount.cpp index 2f6c1095943c6b34a3aa09e13e500b1ba8c27705..593a9a12d96e05064d310071699848286308866a 100644 --- a/source/App/utils/annexBbytecount.cpp +++ b/source/App/utils/annexBbytecount.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/source/App/utils/convert_NtoMbit_YCbCr.cpp b/source/App/utils/convert_NtoMbit_YCbCr.cpp index 843b1ecc8b0ebfca445e33b601655abd711b0da0..79663cfc4da71080afe9777a3a46e6b09708a2f6 100644 --- a/source/App/utils/convert_NtoMbit_YCbCr.cpp +++ b/source/App/utils/convert_NtoMbit_YCbCr.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/source/Lib/CommonAnalyserLib/CMakeLists.txt b/source/Lib/CommonAnalyserLib/CMakeLists.txt index 0fd16c8367e1c2756ebb970de74309327d285668..7f5f76558de44e7170c31fe438e444568bd6b87c 100644 --- a/source/Lib/CommonAnalyserLib/CMakeLists.txt +++ b/source/Lib/CommonAnalyserLib/CMakeLists.txt @@ -46,12 +46,17 @@ set( INC_FILES ${BASE_INC_FILES} ${X86_INC_FILES} ${MD5_INC_FILES} ) # library add_library( ${LIB_NAME} STATIC ${SRC_FILES} ${INC_FILES} ${NATVIS_FILES} ) +target_compile_definitions( ${LIB_NAME} PUBLIC RExt__DECODER_DEBUG_BIT_STATISTICS=1 ) target_compile_definitions( ${LIB_NAME} PUBLIC RExt__DECODER_DEBUG_TOOL_STATISTICS=1 ) if( EXTENSION_360_VIDEO ) target_compile_definitions( ${LIB_NAME} PUBLIC EXTENSION_360_VIDEO=1 ) endif() +if( EXTENSION_HDRTOOLS ) + target_compile_definitions( ${LIB_NAME} PUBLIC EXTENSION_HDRTOOLS=1 ) +endif() + if( SET_ENABLE_TRACING ) if( ENABLE_TRACING ) target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_TRACING=1 ) diff --git a/source/Lib/CommonLib/AdaptiveLoopFilter.cpp b/source/Lib/CommonLib/AdaptiveLoopFilter.cpp index b1aef8429b7e98beaf45a5abfacc7aa74a85e8bb..60beb504f1719e3b7dbfa659184e455782bb83f8 100644 --- a/source/Lib/CommonLib/AdaptiveLoopFilter.cpp +++ b/source/Lib/CommonLib/AdaptiveLoopFilter.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -39,18 +39,27 @@ #include "CodingStructure.h" #include "Picture.h" +#include <array> +#include <cmath> + +constexpr int AdaptiveLoopFilter::AlfNumClippingValues[]; AdaptiveLoopFilter::AdaptiveLoopFilter() : m_classifier( nullptr ) { - for( int i = 0; i < NUM_DIRECTIONS; i++ ) + for (size_t i = 0; i < NUM_DIRECTIONS; i++) { - m_laplacian[i] = nullptr; + m_laplacian[i] = m_laplacianPtr[i]; + for (size_t j = 0; j < sizeof(m_laplacianPtr[i]) / sizeof(m_laplacianPtr[i][0]); j++) + { + m_laplacianPtr[i][j] = m_laplacianData[i][j]; + } } for( int compIdx = 0; compIdx < MAX_NUM_COMPONENT; compIdx++ ) { m_ctuEnableFlag[compIdx] = nullptr; + m_ctuAlternative[compIdx] = nullptr; } m_deriveClassificationBlk = deriveClassificationBlk; @@ -64,15 +73,220 @@ AdaptiveLoopFilter::AdaptiveLoopFilter() #endif } -void AdaptiveLoopFilter::ALFProcess( CodingStructure& cs, AlfSliceParam& alfSliceParam ) +bool AdaptiveLoopFilter::isCrossedByVirtualBoundaries( const CodingStructure& cs, const int xPos, const int yPos, const int width, const int height, bool& clipTop, bool& clipBottom, bool& clipLeft, bool& clipRight, int& numHorVirBndry, int& numVerVirBndry, int horVirBndryPos[], int verVirBndryPos[], int& rasterSliceAlfPad ) { - if( !alfSliceParam.enabledFlag[COMPONENT_Y] && !alfSliceParam.enabledFlag[COMPONENT_Cb] && !alfSliceParam.enabledFlag[COMPONENT_Cr] ) + clipTop = false; clipBottom = false; clipLeft = false; clipRight = false; + numHorVirBndry = 0; numVerVirBndry = 0; + const PPS* pps = cs.pps; + const PicHeader* picHeader = cs.picHeader; + + if( picHeader->getLoopFilterAcrossVirtualBoundariesDisabledFlag() ) { - return; + for( int i = 0; i < picHeader->getNumHorVirtualBoundaries(); i++ ) + { + if( picHeader->getVirtualBoundariesPosY(i) == yPos ) + { + clipTop = true; + } + else if( picHeader->getVirtualBoundariesPosY(i) == yPos + height ) + { + clipBottom = true; + } + else if( yPos < picHeader->getVirtualBoundariesPosY(i) && picHeader->getVirtualBoundariesPosY(i) < yPos + height ) + { + horVirBndryPos[numHorVirBndry++] = picHeader->getVirtualBoundariesPosY(i); + } + } + for( int i = 0; i < picHeader->getNumVerVirtualBoundaries(); i++ ) + { + if( picHeader->getVirtualBoundariesPosX(i) == xPos ) + { + clipLeft = true; + } + else if( picHeader->getVirtualBoundariesPosX(i) == xPos + width ) + { + clipRight = true; + } + else if( xPos < picHeader->getVirtualBoundariesPosX(i) && picHeader->getVirtualBoundariesPosX(i) < xPos + width ) + { + verVirBndryPos[numVerVirBndry++] = picHeader->getVirtualBoundariesPosX(i); + } + } } - // set available filter shapes - alfSliceParam.filterShapes = m_filterShapes; + const Slice& slice = *(cs.slice); + int ctuSize = slice.getSPS()->getCTUSize(); + const Position currCtuPos(xPos, yPos); + const CodingUnit *currCtu = cs.getCU(currCtuPos, CHANNEL_TYPE_LUMA); + //top + if (yPos >= ctuSize && clipTop == false) + { + const Position prevCtuPos(xPos, yPos - ctuSize); + const CodingUnit *prevCtu = cs.getCU(prevCtuPos, CHANNEL_TYPE_LUMA); + if ((!pps->getLoopFilterAcrossSlicesEnabledFlag() && !CU::isSameSlice(*currCtu, *prevCtu)) || + (!pps->getLoopFilterAcrossTilesEnabledFlag() && !CU::isSameTile(*currCtu, *prevCtu))) + { + clipTop = true; + } + } + + //bottom + if (yPos + ctuSize < cs.pcv->lumaHeight && clipBottom == false) + { + const Position nextCtuPos(xPos, yPos + ctuSize); + const CodingUnit *nextCtu = cs.getCU(nextCtuPos, CHANNEL_TYPE_LUMA); + if ((!pps->getLoopFilterAcrossSlicesEnabledFlag() && !CU::isSameSlice(*currCtu, *nextCtu)) || + (!pps->getLoopFilterAcrossTilesEnabledFlag() && !CU::isSameTile(*currCtu, *nextCtu))) + { + clipBottom = true; + } + } + + //left + if (xPos >= ctuSize && clipLeft == false) + { + const Position prevCtuPos(xPos - ctuSize, yPos); + const CodingUnit *prevCtu = cs.getCU(prevCtuPos, CHANNEL_TYPE_LUMA); + if ((!pps->getLoopFilterAcrossSlicesEnabledFlag() && !CU::isSameSlice(*currCtu, *prevCtu)) || + (!pps->getLoopFilterAcrossTilesEnabledFlag() && !CU::isSameTile(*currCtu, *prevCtu))) + { + clipLeft = true; + } + } + + //right + if (xPos + ctuSize < cs.pcv->lumaWidth && clipRight == false) + { + const Position nextCtuPos(xPos + ctuSize, yPos); + const CodingUnit *nextCtu = cs.getCU(nextCtuPos, CHANNEL_TYPE_LUMA); + if ((!pps->getLoopFilterAcrossSlicesEnabledFlag() && !CU::isSameSlice(*currCtu, *nextCtu)) || + (!pps->getLoopFilterAcrossTilesEnabledFlag() && !CU::isSameTile(*currCtu, *nextCtu))) + { + clipRight = true; + } + } + + rasterSliceAlfPad = 0; + if ( !clipTop && !clipLeft ) + { + //top-left CTU + if ( xPos >= ctuSize && yPos >= ctuSize ) + { + const Position prevCtuPos( xPos - ctuSize, yPos - ctuSize ); + const CodingUnit *prevCtu = cs.getCU( prevCtuPos, CHANNEL_TYPE_LUMA ); + if ( !pps->getLoopFilterAcrossSlicesEnabledFlag() && !CU::isSameSlice( *currCtu, *prevCtu ) ) + { + rasterSliceAlfPad = 1; + } + } + } + + if ( !clipBottom && !clipRight ) + { + //bottom-right CTU + if ( xPos + ctuSize < cs.pcv->lumaWidth && yPos + ctuSize < cs.pcv->lumaHeight ) + { + const Position nextCtuPos( xPos + ctuSize, yPos + ctuSize ); + const CodingUnit *nextCtu = cs.getCU( nextCtuPos, CHANNEL_TYPE_LUMA ); + if ( !pps->getLoopFilterAcrossSlicesEnabledFlag() && !CU::isSameSlice( *currCtu, *nextCtu ) ) + { + rasterSliceAlfPad += 2; + } + } + } + + return numHorVirBndry > 0 || numVerVirBndry > 0 || clipTop || clipBottom || clipLeft || clipRight || rasterSliceAlfPad; +} + +const int AdaptiveLoopFilter::m_fixedFilterSetCoeff[ALF_FIXED_FILTER_NUM][MAX_NUM_ALF_LUMA_COEFF] = +{ + { 0, 0, 2, -3, 1, -4, 1, 7, -1, 1, -1, 5, 0 }, + { 0, 0, 0, 0, 0, -1, 0, 1, 0, 0, -1, 2, 0 }, + { 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0 }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 1, 0 }, + { 2, 2, -7, -3, 0, -5, 13, 22, 12, -3, -3, 17, 0 }, + { -1, 0, 6, -8, 1, -5, 1, 23, 0, 2, -5, 10, 0 }, + { 0, 0, -1, -1, 0, -1, 2, 1, 0, 0, -1, 4, 0 }, + { 0, 0, 3, -11, 1, 0, -1, 35, 5, 2, -9, 9, 0 }, + { 0, 0, 8, -8, -2, -7, 4, 4, 2, 1, -1, 25, 0 }, + { 0, 0, 1, -1, 0, -3, 1, 3, -1, 1, -1, 3, 0 }, + { 0, 0, 3, -3, 0, -6, 5, -1, 2, 1, -4, 21, 0 }, + { -7, 1, 5, 4, -3, 5, 11, 13, 12, -8, 11, 12, 0 }, + { -5, -3, 6, -2, -3, 8, 14, 15, 2, -7, 11, 16, 0 }, + { 2, -1, -6, -5, -2, -2, 20, 14, -4, 0, -3, 25, 0 }, + { 3, 1, -8, -4, 0, -8, 22, 5, -3, 2, -10, 29, 0 }, + { 2, 1, -7, -1, 2, -11, 23, -5, 0, 2, -10, 29, 0 }, + { -6, -3, 8, 9, -4, 8, 9, 7, 14, -2, 8, 9, 0 }, + { 2, 1, -4, -7, 0, -8, 17, 22, 1, -1, -4, 23, 0 }, + { 3, 0, -5, -7, 0, -7, 15, 18, -5, 0, -5, 27, 0 }, + { 2, 0, 0, -7, 1, -10, 13, 13, -4, 2, -7, 24, 0 }, + { 3, 3, -13, 4, -2, -5, 9, 21, 25, -2, -3, 12, 0 }, + { -5, -2, 7, -3, -7, 9, 8, 9, 16, -2, 15, 12, 0 }, + { 0, -1, 0, -7, -5, 4, 11, 11, 8, -6, 12, 21, 0 }, + { 3, -2, -3, -8, -4, -1, 16, 15, -2, -3, 3, 26, 0 }, + { 2, 1, -5, -4, -1, -8, 16, 4, -2, 1, -7, 33, 0 }, + { 2, 1, -4, -2, 1, -10, 17, -2, 0, 2, -11, 33, 0 }, + { 1, -2, 7, -15, -16, 10, 8, 8, 20, 11, 14, 11, 0 }, + { 2, 2, 3, -13, -13, 4, 8, 12, 2, -3, 16, 24, 0 }, + { 1, 4, 0, -7, -8, -4, 9, 9, -2, -2, 8, 29, 0 }, + { 1, 1, 2, -4, -1, -6, 6, 3, -1, -1, -3, 30, 0 }, + { -7, 3, 2, 10, -2, 3, 7, 11, 19, -7, 8, 10, 0 }, + { 0, -2, -5, -3, -2, 4, 20, 15, -1, -3, -1, 22, 0 }, + { 3, -1, -8, -4, -1, -4, 22, 8, -4, 2, -8, 28, 0 }, + { 0, 3, -14, 3, 0, 1, 19, 17, 8, -3, -7, 20, 0 }, + { 0, 2, -1, -8, 3, -6, 5, 21, 1, 1, -9, 13, 0 }, + { -4, -2, 8, 20, -2, 2, 3, 5, 21, 4, 6, 1, 0 }, + { 2, -2, -3, -9, -4, 2, 14, 16, 3, -6, 8, 24, 0 }, + { 2, 1, 5, -16, -7, 2, 3, 11, 15, -3, 11, 22, 0 }, + { 1, 2, 3, -11, -2, -5, 4, 8, 9, -3, -2, 26, 0 }, + { 0, -1, 10, -9, -1, -8, 2, 3, 4, 0, 0, 29, 0 }, + { 1, 2, 0, -5, 1, -9, 9, 3, 0, 1, -7, 20, 0 }, + { -2, 8, -6, -4, 3, -9, -8, 45, 14, 2, -13, 7, 0 }, + { 1, -1, 16, -19, -8, -4, -3, 2, 19, 0, 4, 30, 0 }, + { 1, 1, -3, 0, 2, -11, 15, -5, 1, 2, -9, 24, 0 }, + { 0, 1, -2, 0, 1, -4, 4, 0, 0, 1, -4, 7, 0 }, + { 0, 1, 2, -5, 1, -6, 4, 10, -2, 1, -4, 10, 0 }, + { 3, 0, -3, -6, -2, -6, 14, 8, -1, -1, -3, 31, 0 }, + { 0, 1, 0, -2, 1, -6, 5, 1, 0, 1, -5, 13, 0 }, + { 3, 1, 9, -19, -21, 9, 7, 6, 13, 5, 15, 21, 0 }, + { 2, 4, 3, -12, -13, 1, 7, 8, 3, 0, 12, 26, 0 }, + { 3, 1, -8, -2, 0, -6, 18, 2, -2, 3, -10, 23, 0 }, + { 1, 1, -4, -1, 1, -5, 8, 1, -1, 2, -5, 10, 0 }, + { 0, 1, -1, 0, 0, -2, 2, 0, 0, 1, -2, 3, 0 }, + { 1, 1, -2, -7, 1, -7, 14, 18, 0, 0, -7, 21, 0 }, + { 0, 1, 0, -2, 0, -7, 8, 1, -2, 0, -3, 24, 0 }, + { 0, 1, 1, -2, 2, -10, 10, 0, -2, 1, -7, 23, 0 }, + { 0, 2, 2, -11, 2, -4, -3, 39, 7, 1, -10, 9, 0 }, + { 1, 0, 13, -16, -5, -6, -1, 8, 6, 0, 6, 29, 0 }, + { 1, 3, 1, -6, -4, -7, 9, 6, -3, -2, 3, 33, 0 }, + { 4, 0, -17, -1, -1, 5, 26, 8, -2, 3, -15, 30, 0 }, + { 0, 1, -2, 0, 2, -8, 12, -6, 1, 1, -6, 16, 0 }, + { 0, 0, 0, -1, 1, -4, 4, 0, 0, 0, -3, 11, 0 }, + { 0, 1, 2, -8, 2, -6, 5, 15, 0, 2, -7, 9, 0 }, + { 1, -1, 12, -15, -7, -2, 3, 6, 6, -1, 7, 30, 0 }, +}; +const int AdaptiveLoopFilter::m_classToFilterMapping[NUM_FIXED_FILTER_SETS][MAX_NUM_ALF_CLASSES] = +{ + { 8, 2, 2, 2, 3, 4, 53, 9, 9, 52, 4, 4, 5, 9, 2, 8, 10, 9, 1, 3, 39, 39, 10, 9, 52 }, + { 11, 12, 13, 14, 15, 30, 11, 17, 18, 19, 16, 20, 20, 4, 53, 21, 22, 23, 14, 25, 26, 26, 27, 28, 10 }, + { 16, 12, 31, 32, 14, 16, 30, 33, 53, 34, 35, 16, 20, 4, 7, 16, 21, 36, 18, 19, 21, 26, 37, 38, 39 }, + { 35, 11, 13, 14, 43, 35, 16, 4, 34, 62, 35, 35, 30, 56, 7, 35, 21, 38, 24, 40, 16, 21, 48, 57, 39 }, + { 11, 31, 32, 43, 44, 16, 4, 17, 34, 45, 30, 20, 20, 7, 5, 21, 22, 46, 40, 47, 26, 48, 63, 58, 10 }, + { 12, 13, 50, 51, 52, 11, 17, 53, 45, 9, 30, 4, 53, 19, 0, 22, 23, 25, 43, 44, 37, 27, 28, 10, 55 }, + { 30, 33, 62, 51, 44, 20, 41, 56, 34, 45, 20, 41, 41, 56, 5, 30, 56, 38, 40, 47, 11, 37, 42, 57, 8 }, + { 35, 11, 23, 32, 14, 35, 20, 4, 17, 18, 21, 20, 20, 20, 4, 16, 21, 36, 46, 25, 41, 26, 48, 49, 58 }, + { 12, 31, 59, 59, 3, 33, 33, 59, 59, 52, 4, 33, 17, 59, 55, 22, 36, 59, 59, 60, 22, 36, 59, 25, 55 }, + { 31, 25, 15, 60, 60, 22, 17, 19, 55, 55, 20, 20, 53, 19, 55, 22, 46, 25, 43, 60, 37, 28, 10, 55, 52 }, + { 12, 31, 32, 50, 51, 11, 33, 53, 19, 45, 16, 4, 4, 53, 5, 22, 36, 18, 25, 43, 26, 27, 27, 28, 10 }, + { 5, 2, 44, 52, 3, 4, 53, 45, 9, 3, 4, 56, 5, 0, 2, 5, 10, 47, 52, 3, 63, 39, 10, 9, 52 }, + { 12, 34, 44, 44, 3, 56, 56, 62, 45, 9, 56, 56, 7, 5, 0, 22, 38, 40, 47, 52, 48, 57, 39, 10, 9 }, + { 35, 11, 23, 14, 51, 35, 20, 41, 56, 62, 16, 20, 41, 56, 7, 16, 21, 38, 24, 40, 26, 26, 42, 57, 39 }, + { 33, 34, 51, 51, 52, 41, 41, 34, 62, 0, 41, 41, 56, 7, 5, 56, 38, 38, 40, 44, 37, 42, 57, 39, 10 }, + { 16, 31, 32, 15, 60, 30, 4, 17, 19, 25, 22, 20, 4, 53, 19, 21, 22, 46, 25, 55, 26, 48, 63, 58, 55 }, +}; + +void AdaptiveLoopFilter::ALFProcess(CodingStructure& cs) +{ // set clipping range m_clpRngs = cs.slice->getClpRngs(); @@ -81,9 +295,10 @@ void AdaptiveLoopFilter::ALFProcess( CodingStructure& cs, AlfSliceParam& alfSlic for( int compIdx = 0; compIdx < MAX_NUM_COMPONENT; compIdx++ ) { m_ctuEnableFlag[compIdx] = cs.picture->getAlfCtuEnableFlag( compIdx ); + m_ctuAlternative[compIdx] = cs.picture->getAlfCtuAlternativeData( compIdx ); } - reconstructCoeff( alfSliceParam, CHANNEL_TYPE_LUMA ); - reconstructCoeff( alfSliceParam, CHANNEL_TYPE_CHROMA ); + short* alfCtuFilterIndex = nullptr; + uint32_t lastSliceIdx = 0xFFFFFFFF; PelUnitBuf recYuv = cs.getRecoBuf(); m_tempBuf.copyFrom( recYuv ); @@ -93,20 +308,147 @@ void AdaptiveLoopFilter::ALFProcess( CodingStructure& cs, AlfSliceParam& alfSlic const PreCalcValues& pcv = *cs.pcv; int ctuIdx = 0; + bool clipTop = false, clipBottom = false, clipLeft = false, clipRight = false; + int numHorVirBndry = 0, numVerVirBndry = 0; + int horVirBndryPos[] = { 0, 0, 0 }; + int verVirBndryPos[] = { 0, 0, 0 }; + for( int yPos = 0; yPos < pcv.lumaHeight; yPos += pcv.maxCUHeight ) { for( int xPos = 0; xPos < pcv.lumaWidth; xPos += pcv.maxCUWidth ) { + // get first CU in CTU + const CodingUnit *cu = cs.getCU( Position(xPos, yPos), CHANNEL_TYPE_LUMA ); + + // skip this CTU if ALF is disabled + if (!cu->slice->getTileGroupAlfEnabledFlag(COMPONENT_Y) && !cu->slice->getTileGroupAlfEnabledFlag(COMPONENT_Cb) && !cu->slice->getTileGroupAlfEnabledFlag(COMPONENT_Cr)) + { + ctuIdx++; + continue; + } + + // reload ALF APS each time the slice changes during raster scan filtering + if(ctuIdx == 0 || lastSliceIdx != cu->slice->getSliceID() || alfCtuFilterIndex==nullptr) + { + cs.slice = cu->slice; + reconstructCoeffAPSs(cs, true, cu->slice->getTileGroupAlfEnabledFlag(COMPONENT_Cb) || cu->slice->getTileGroupAlfEnabledFlag(COMPONENT_Cr), false); + alfCtuFilterIndex = cu->slice->getPic()->getAlfCtbFilterIndex(); + } + lastSliceIdx = cu->slice->getSliceID(); + const int width = ( xPos + pcv.maxCUWidth > pcv.lumaWidth ) ? ( pcv.lumaWidth - xPos ) : pcv.maxCUWidth; const int height = ( yPos + pcv.maxCUHeight > pcv.lumaHeight ) ? ( pcv.lumaHeight - yPos ) : pcv.maxCUHeight; + bool ctuEnableFlag = m_ctuEnableFlag[COMPONENT_Y][ctuIdx]; + for( int compIdx = 1; compIdx < MAX_NUM_COMPONENT; compIdx++ ) + { + ctuEnableFlag |= m_ctuEnableFlag[compIdx][ctuIdx] > 0; + } + int rasterSliceAlfPad = 0; + if( ctuEnableFlag && isCrossedByVirtualBoundaries( cs, xPos, yPos, width, height, clipTop, clipBottom, clipLeft, clipRight, numHorVirBndry, numVerVirBndry, horVirBndryPos, verVirBndryPos, rasterSliceAlfPad ) ) + { + int yStart = yPos; + for( int i = 0; i <= numHorVirBndry; i++ ) + { + const int yEnd = i == numHorVirBndry ? yPos + height : horVirBndryPos[i]; + const int h = yEnd - yStart; + const bool clipT = ( i == 0 && clipTop ) || ( i > 0 ) || ( yStart == 0 ); + const bool clipB = ( i == numHorVirBndry && clipBottom ) || ( i < numHorVirBndry ) || ( yEnd == pcv.lumaHeight ); + int xStart = xPos; + for( int j = 0; j <= numVerVirBndry; j++ ) + { + const int xEnd = j == numVerVirBndry ? xPos + width : verVirBndryPos[j]; + const int w = xEnd - xStart; + const bool clipL = ( j == 0 && clipLeft ) || ( j > 0 ) || ( xStart == 0 ); + const bool clipR = ( j == numVerVirBndry && clipRight ) || ( j < numVerVirBndry ) || ( xEnd == pcv.lumaWidth ); + const int wBuf = w + (clipL ? 0 : MAX_ALF_PADDING_SIZE) + (clipR ? 0 : MAX_ALF_PADDING_SIZE); + const int hBuf = h + (clipT ? 0 : MAX_ALF_PADDING_SIZE) + (clipB ? 0 : MAX_ALF_PADDING_SIZE); + PelUnitBuf buf = m_tempBuf2.subBuf( UnitArea( cs.area.chromaFormat, Area( 0, 0, wBuf, hBuf ) ) ); + buf.copyFrom( tmpYuv.subBuf( UnitArea( cs.area.chromaFormat, Area( xStart - (clipL ? 0 : MAX_ALF_PADDING_SIZE), yStart - (clipT ? 0 : MAX_ALF_PADDING_SIZE), wBuf, hBuf ) ) ) ); + // pad top-left unavailable samples for raster slice + if ( xStart == xPos && yStart == yPos && ( rasterSliceAlfPad & 1 ) ) + { + buf.padBorderPel( MAX_ALF_PADDING_SIZE, 1 ); + } + + // pad bottom-right unavailable samples for raster slice + if ( xEnd == xPos + width && yEnd == yPos + height && ( rasterSliceAlfPad & 2 ) ) + { + buf.padBorderPel( MAX_ALF_PADDING_SIZE, 2 ); + } + buf.extendBorderPel( MAX_ALF_PADDING_SIZE ); + buf = buf.subBuf( UnitArea ( cs.area.chromaFormat, Area( clipL ? 0 : MAX_ALF_PADDING_SIZE, clipT ? 0 : MAX_ALF_PADDING_SIZE, w, h ) ) ); + + if( m_ctuEnableFlag[COMPONENT_Y][ctuIdx] ) + { + const Area blkSrc( 0, 0, w, h ); + const Area blkDst( xStart, yStart, w, h ); + deriveClassification( m_classifier, buf.get(COMPONENT_Y), blkDst, blkSrc ); + short filterSetIndex = alfCtuFilterIndex[ctuIdx]; + short *coeff; + short *clip; + if (filterSetIndex >= NUM_FIXED_FILTER_SETS) + { + coeff = m_coeffApsLuma[filterSetIndex - NUM_FIXED_FILTER_SETS]; + clip = m_clippApsLuma[filterSetIndex - NUM_FIXED_FILTER_SETS]; + } + else + { + coeff = m_fixedFilterSetCoeffDec[filterSetIndex]; + clip = m_clipDefault; + } + m_filter7x7Blk(m_classifier, recYuv, buf, blkDst, blkSrc, COMPONENT_Y, coeff, clip, m_clpRngs.comp[COMPONENT_Y], cs + , m_alfVBLumaCTUHeight + , m_alfVBLumaPos + ); + } + + for( int compIdx = 1; compIdx < MAX_NUM_COMPONENT; compIdx++ ) + { + ComponentID compID = ComponentID( compIdx ); + const int chromaScaleX = getComponentScaleX( compID, tmpYuv.chromaFormat ); + const int chromaScaleY = getComponentScaleY( compID, tmpYuv.chromaFormat ); + + if( m_ctuEnableFlag[compIdx][ctuIdx] ) + { + const Area blkSrc( 0, 0, w >> chromaScaleX, h >> chromaScaleY ); + const Area blkDst( xStart >> chromaScaleX, yStart >> chromaScaleY, w >> chromaScaleX, h >> chromaScaleY ); + uint8_t alt_num = m_ctuAlternative[compIdx][ctuIdx]; + m_filter5x5Blk(m_classifier, recYuv, buf, blkDst, blkSrc, compID, m_chromaCoeffFinal[alt_num], m_chromaClippFinal[alt_num], m_clpRngs.comp[compIdx], cs + , m_alfVBChmaCTUHeight + , m_alfVBChmaPos ); + } + } + + xStart = xEnd; + } + + yStart = yEnd; + } + } + else + { const UnitArea area( cs.area.chromaFormat, Area( xPos, yPos, width, height ) ); if( m_ctuEnableFlag[COMPONENT_Y][ctuIdx] ) { Area blk( xPos, yPos, width, height ); - deriveClassification( m_classifier, tmpYuv.get( COMPONENT_Y ), blk ); - Area blkPCM(xPos, yPos, width, height); - resetPCMBlkClassInfo(cs, m_classifier, tmpYuv.get(COMPONENT_Y), blkPCM); - m_filter7x7Blk(m_classifier, recYuv, tmpYuv, blk, COMPONENT_Y, m_coeffFinal, m_clpRngs.comp[COMPONENT_Y], cs ); + deriveClassification( m_classifier, tmpYuv.get( COMPONENT_Y ), blk, blk ); + short filterSetIndex = alfCtuFilterIndex[ctuIdx]; + short *coeff; + short *clip; + if (filterSetIndex >= NUM_FIXED_FILTER_SETS) + { + coeff = m_coeffApsLuma[filterSetIndex - NUM_FIXED_FILTER_SETS]; + clip = m_clippApsLuma[filterSetIndex - NUM_FIXED_FILTER_SETS]; + } + else + { + coeff = m_fixedFilterSetCoeffDec[filterSetIndex]; + clip = m_clipDefault; + } + m_filter7x7Blk(m_classifier, recYuv, tmpYuv, blk, blk, COMPONENT_Y, coeff, clip, m_clpRngs.comp[COMPONENT_Y], cs + , m_alfVBLumaCTUHeight + , m_alfVBLumaPos + ); } for( int compIdx = 1; compIdx < MAX_NUM_COMPONENT; compIdx++ ) @@ -118,71 +460,116 @@ void AdaptiveLoopFilter::ALFProcess( CodingStructure& cs, AlfSliceParam& alfSlic if( m_ctuEnableFlag[compIdx][ctuIdx] ) { Area blk( xPos >> chromaScaleX, yPos >> chromaScaleY, width >> chromaScaleX, height >> chromaScaleY ); - - m_filter5x5Blk( m_classifier, recYuv, tmpYuv, blk, compID, alfSliceParam.chromaCoeff, m_clpRngs.comp[compIdx], cs ); + uint8_t alt_num = m_ctuAlternative[compIdx][ctuIdx]; + m_filter5x5Blk(m_classifier, recYuv, tmpYuv, blk, blk, compID, m_chromaCoeffFinal[alt_num], m_chromaClippFinal[alt_num], m_clpRngs.comp[compIdx], cs + , m_alfVBChmaCTUHeight + , m_alfVBChmaPos); } } + } ctuIdx++; } } } -void AdaptiveLoopFilter::reconstructCoeff( AlfSliceParam& alfSliceParam, ChannelType channel, const bool bRedo ) +void AdaptiveLoopFilter::reconstructCoeffAPSs(CodingStructure& cs, bool luma, bool chroma, bool isRdo) { - int factor = ( 1 << ( m_NUM_BITS - 1 ) ); - AlfFilterType filterType = isLuma( channel ) ? ALF_FILTER_7 : ALF_FILTER_5; - int numClasses = isLuma( channel ) ? MAX_NUM_ALF_CLASSES : 1; - int numCoeff = filterType == ALF_FILTER_5 ? 7 : 13; - int numCoeffMinus1 = numCoeff - 1; - int numFilters = isLuma( channel ) ? alfSliceParam.numLumaFilters : 1; - short* coeff = isLuma( channel ) ? alfSliceParam.lumaCoeff : alfSliceParam.chromaCoeff; - - if( alfSliceParam.alfLumaCoeffDeltaPredictionFlag && isLuma( channel ) ) + //luma + APS** aps = cs.slice->getAlfAPSs(); + AlfParam alfParamTmp; + APS* curAPS; + if (luma) { - for( int i = 1; i < numFilters; i++ ) + for (int i = 0; i < cs.slice->getTileGroupNumAps(); i++) { - for( int j = 0; j < numCoeffMinus1; j++ ) - { - coeff[i * MAX_NUM_ALF_LUMA_COEFF + j] += coeff[( i - 1 ) * MAX_NUM_ALF_LUMA_COEFF + j]; - } + int apsIdx = cs.slice->getTileGroupApsIdLuma()[i]; + curAPS = aps[apsIdx]; + CHECK(curAPS == NULL, "invalid APS"); + alfParamTmp = curAPS->getAlfAPSParam(); + reconstructCoeff(alfParamTmp, CHANNEL_TYPE_LUMA, isRdo, true); + memcpy(m_coeffApsLuma[i], m_coeffFinal, sizeof(m_coeffFinal)); + memcpy(m_clippApsLuma[i], m_clippFinal, sizeof(m_clippFinal)); } } - for( int filterIdx = 0; filterIdx < numFilters; filterIdx++ ) + //chroma + if (chroma) { - int sum = 0; - for( int i = 0; i < numCoeffMinus1; i++ ) - { - sum += ( coeff[filterIdx* MAX_NUM_ALF_LUMA_COEFF + i] << 1 ); - } - coeff[filterIdx* MAX_NUM_ALF_LUMA_COEFF + numCoeffMinus1] = factor - sum; + int apsIdxChroma = cs.slice->getTileGroupApsIdChroma(); + curAPS = aps[apsIdxChroma]; + m_alfParamChroma = &curAPS->getAlfAPSParam(); + alfParamTmp = *m_alfParamChroma; + reconstructCoeff(alfParamTmp, CHANNEL_TYPE_CHROMA, isRdo, true); } +} - if( isChroma( channel ) ) - { - return; - } +void AdaptiveLoopFilter::reconstructCoeff( AlfParam& alfParam, ChannelType channel, const bool isRdo, const bool isRedo ) +{ + int factor = isRdo ? 0 : (1 << (m_NUM_BITS - 1)); + AlfFilterType filterType = isLuma( channel ) ? ALF_FILTER_7 : ALF_FILTER_5; + int numClasses = isLuma( channel ) ? MAX_NUM_ALF_CLASSES : 1; + int numCoeff = filterType == ALF_FILTER_5 ? 7 : 13; + int numCoeffMinus1 = numCoeff - 1; + const int numAlts = isLuma( channel ) ? 1 : alfParam.numAlternativesChroma; - for( int classIdx = 0; classIdx < numClasses; classIdx++ ) + for( int altIdx = 0; altIdx < numAlts; ++ altIdx ) { - int filterIdx = alfSliceParam.filterCoeffDeltaIdx[classIdx]; - memcpy( m_coeffFinal + classIdx * MAX_NUM_ALF_LUMA_COEFF, coeff + filterIdx * MAX_NUM_ALF_LUMA_COEFF, sizeof( short ) * numCoeff ); - } + int numFilters = isLuma( channel ) ? alfParam.numLumaFilters : 1; + short* coeff = isLuma( channel ) ? alfParam.lumaCoeff : alfParam.chromaCoeff[altIdx]; + short* clipp = isLuma( channel ) ? alfParam.lumaClipp : alfParam.chromaClipp[altIdx]; - if( bRedo && alfSliceParam.alfLumaCoeffDeltaPredictionFlag ) - { - for( int i = numFilters - 1; i > 0; i-- ) + for( int filterIdx = 0; filterIdx < numFilters; filterIdx++ ) + { + coeff[filterIdx* MAX_NUM_ALF_LUMA_COEFF + numCoeffMinus1] = factor; + } + + if( isChroma( channel ) ) { - for( int j = 0; j < numCoeffMinus1; j++ ) + for( int coeffIdx = 0; coeffIdx < numCoeffMinus1; ++coeffIdx ) { - coeff[i * MAX_NUM_ALF_LUMA_COEFF + j] = coeff[i * MAX_NUM_ALF_LUMA_COEFF + j] - coeff[( i - 1 ) * MAX_NUM_ALF_LUMA_COEFF + j]; + m_chromaCoeffFinal[altIdx][coeffIdx] = coeff[coeffIdx]; +#if JVET_Q0249_ALF_CHROMA_CLIPFLAG + int clipIdx = alfParam.nonLinearFlag[channel] ? clipp[coeffIdx] : 0; +#else + int clipIdx = alfParam.nonLinearFlag[channel][altIdx] ? clipp[coeffIdx] : 0; +#endif + m_chromaClippFinal[altIdx][coeffIdx] = isRdo ? clipIdx : m_alfClippingValues[channel][clipIdx]; + } + m_chromaCoeffFinal[altIdx][numCoeffMinus1] = factor; + m_chromaClippFinal[altIdx][numCoeffMinus1] = isRdo ? 0 : m_alfClippingValues[channel][0]; + continue; + } + for( int classIdx = 0; classIdx < numClasses; classIdx++ ) + { + int filterIdx = alfParam.filterCoeffDeltaIdx[classIdx]; + + CHECK(!(filterIdx >= 0 && filterIdx < alfParam.numLumaFilters), "Bad coeff delta idx in ALF"); + for (int coeffIdx = 0; coeffIdx < numCoeffMinus1; ++coeffIdx) + { + m_coeffFinal[classIdx * MAX_NUM_ALF_LUMA_COEFF + coeffIdx] = coeff[filterIdx * MAX_NUM_ALF_LUMA_COEFF + coeffIdx]; + } + m_coeffFinal[classIdx* MAX_NUM_ALF_LUMA_COEFF + numCoeffMinus1] = factor; + m_clippFinal[classIdx* MAX_NUM_ALF_LUMA_COEFF + numCoeffMinus1] = isRdo ? 0 : m_alfClippingValues[channel][0]; + for( int coeffIdx = 0; coeffIdx < numCoeffMinus1; ++coeffIdx ) + { +#if JVET_Q0249_ALF_CHROMA_CLIPFLAG + int clipIdx = alfParam.nonLinearFlag[channel] ? clipp[filterIdx * MAX_NUM_ALF_LUMA_COEFF + coeffIdx] : 0; +#else + int clipIdx = alfParam.nonLinearFlag[channel][altIdx] ? clipp[filterIdx * MAX_NUM_ALF_LUMA_COEFF + coeffIdx] : 0; +#endif + CHECK(!(clipIdx >= 0 && clipIdx < MaxAlfNumClippingValues), "Bad clip idx in ALF"); + m_clippFinal[classIdx * MAX_NUM_ALF_LUMA_COEFF + coeffIdx] = isRdo ? clipIdx : m_alfClippingValues[channel][clipIdx]; } + m_clippFinal[classIdx* MAX_NUM_ALF_LUMA_COEFF + numCoeffMinus1] = + isRdo ? 0 : + m_alfClippingValues[channel][0]; } } } void AdaptiveLoopFilter::create( const int picWidth, const int picHeight, const ChromaFormat format, const int maxCUWidth, const int maxCUHeight, const int maxCUDepth, const int inputBitDepth[MAX_NUM_CHANNEL_TYPE] ) { + destroy(); std::memcpy( m_inputBitDepth, inputBitDepth, sizeof( m_inputBitDepth ) ); m_picWidth = picWidth; m_picHeight = picHeight; @@ -196,68 +583,87 @@ void AdaptiveLoopFilter::create( const int picWidth, const int picHeight, const m_numCTUsInPic = m_numCTUsInHeight * m_numCTUsInWidth; m_filterShapes[CHANNEL_TYPE_LUMA].push_back( AlfFilterShape( 7 ) ); m_filterShapes[CHANNEL_TYPE_CHROMA].push_back( AlfFilterShape( 5 ) ); + m_alfVBLumaPos = m_maxCUHeight - ALF_VB_POS_ABOVE_CTUROW_LUMA; + m_alfVBChmaPos = (m_maxCUHeight >> ((m_chromaFormat == CHROMA_420) ? 1 : 0)) - ALF_VB_POS_ABOVE_CTUROW_CHMA; - m_tempBuf.destroy(); - m_tempBuf.create( format, Area( 0, 0, picWidth, picHeight ), maxCUWidth, MAX_ALF_FILTER_LENGTH >> 1, 0, false ); + m_alfVBLumaCTUHeight = m_maxCUHeight; + m_alfVBChmaCTUHeight = (m_maxCUHeight >> ((m_chromaFormat == CHROMA_420) ? 1 : 0)); - // Laplacian based activity - for( int i = 0; i < NUM_DIRECTIONS; i++ ) + static_assert( AlfNumClippingValues[CHANNEL_TYPE_LUMA] > 0, "AlfNumClippingValues[CHANNEL_TYPE_LUMA] must be at least one" ); + for( int i = 0; i < AlfNumClippingValues[CHANNEL_TYPE_LUMA]; ++i ) { - if ( m_laplacian[i] == nullptr ) - { - m_laplacian[i] = new int*[m_CLASSIFICATION_BLK_SIZE + 5]; + m_alfClippingValues[CHANNEL_TYPE_LUMA][i] = (Pel)std::round( std::pow(2., double(m_inputBitDepth[CHANNEL_TYPE_LUMA] - 2.35*i)) ); + } + static_assert( AlfNumClippingValues[CHANNEL_TYPE_CHROMA] > 0, "AlfNumClippingValues[CHANNEL_TYPE_CHROMA] must be at least one" ); + m_alfClippingValues[CHANNEL_TYPE_CHROMA][0] = 1 << m_inputBitDepth[CHANNEL_TYPE_CHROMA]; + for( int i = 1; i < AlfNumClippingValues[CHANNEL_TYPE_CHROMA]; ++i ) + { + m_alfClippingValues[CHANNEL_TYPE_CHROMA][i] = (Pel)std::round( std::pow(2., double(m_inputBitDepth[CHANNEL_TYPE_CHROMA] - 2.35*i)) ); + } - for( int y = 0; y < m_CLASSIFICATION_BLK_SIZE + 5; y++ ) - { - m_laplacian[i][y] = new int[m_CLASSIFICATION_BLK_SIZE + 5]; - } - } + if (m_created) + { + return; } + m_tempBuf.destroy(); + m_tempBuf.create( format, Area( 0, 0, picWidth, picHeight ), maxCUWidth, MAX_ALF_FILTER_LENGTH >> 1, 0, false ); + m_tempBuf2.destroy(); + m_tempBuf2.create( format, Area( 0, 0, maxCUWidth + (MAX_ALF_PADDING_SIZE << 1), maxCUHeight + (MAX_ALF_PADDING_SIZE << 1) ), maxCUWidth, MAX_ALF_PADDING_SIZE, 0, false ); + // Classification if ( m_classifier == nullptr ) { m_classifier = new AlfClassifier*[picHeight]; - for( int i = 0; i < picHeight; i++ ) + m_classifier[0] = new AlfClassifier[picWidth * picHeight]; + + for (int i = 1; i < picHeight; i++) { - m_classifier[i] = new AlfClassifier[picWidth]; + m_classifier[i] = m_classifier[0] + i * picWidth; } } -} -void AdaptiveLoopFilter::destroy() -{ - for( int i = 0; i < NUM_DIRECTIONS; i++ ) + for (int filterSetIndex = 0; filterSetIndex < NUM_FIXED_FILTER_SETS; filterSetIndex++) { - if( m_laplacian[i] ) + for (int classIdx = 0; classIdx < MAX_NUM_ALF_CLASSES; classIdx++) { - for( int y = 0; y < m_CLASSIFICATION_BLK_SIZE + 5; y++ ) + int fixedFilterIdx = m_classToFilterMapping[filterSetIndex][classIdx]; + for (int i = 0; i < MAX_NUM_ALF_LUMA_COEFF - 1; i++) { - delete[] m_laplacian[i][y]; - m_laplacian[i][y] = nullptr; + m_fixedFilterSetCoeffDec[filterSetIndex][classIdx * MAX_NUM_ALF_LUMA_COEFF + i] = m_fixedFilterSetCoeff[fixedFilterIdx][i]; } - - delete[] m_laplacian[i]; - m_laplacian[i] = nullptr; + m_fixedFilterSetCoeffDec[filterSetIndex][classIdx * MAX_NUM_ALF_LUMA_COEFF + MAX_NUM_ALF_LUMA_COEFF - 1] = (1 << (m_NUM_BITS - 1)); } } + for (int i = 0; i < MAX_NUM_ALF_LUMA_COEFF * MAX_NUM_ALF_CLASSES; i++) + { + m_clipDefault[i] = m_alfClippingValues[CHANNEL_TYPE_LUMA][0]; + } + m_created = true; +} - if( m_classifier ) +void AdaptiveLoopFilter::destroy() +{ + if (!m_created) { - for( int i = 0; i < m_picHeight; i++ ) - { - delete[] m_classifier[i]; - m_classifier[i] = nullptr; - } + return; + } + if( m_classifier ) + { + delete[] m_classifier[0]; delete[] m_classifier; m_classifier = nullptr; } m_tempBuf.destroy(); + m_tempBuf2.destroy(); + m_filterShapes[CHANNEL_TYPE_LUMA].clear(); + m_filterShapes[CHANNEL_TYPE_CHROMA].clear(); + m_created = false; } -void AdaptiveLoopFilter::deriveClassification( AlfClassifier** classifier, const CPelBuf& srcLuma, const Area& blk ) +void AdaptiveLoopFilter::deriveClassification( AlfClassifier** classifier, const CPelBuf& srcLuma, const Area& blkDst, const Area& blk ) { int height = blk.pos().y + blk.height; int width = blk.pos().x + blk.width; @@ -269,63 +675,20 @@ void AdaptiveLoopFilter::deriveClassification( AlfClassifier** classifier, const for( int j = blk.pos().x; j < width; j += m_CLASSIFICATION_BLK_SIZE ) { int nWidth = std::min( j + m_CLASSIFICATION_BLK_SIZE, width ) - j; - - m_deriveClassificationBlk( classifier, m_laplacian, srcLuma, Area( j, i, nWidth, nHeight ), m_inputBitDepth[CHANNEL_TYPE_LUMA] + 4 ); + m_deriveClassificationBlk(classifier, m_laplacian, srcLuma, Area( j - blk.pos().x + blkDst.pos().x, i - blk.pos().y + blkDst.pos().y, nWidth, nHeight ), Area(j, i, nWidth, nHeight), m_inputBitDepth[CHANNEL_TYPE_LUMA] + 4 + , m_alfVBLumaCTUHeight + , m_alfVBLumaPos + ); } } } -void AdaptiveLoopFilter::resetPCMBlkClassInfo(CodingStructure & cs, AlfClassifier** classifier, const CPelBuf& srcLuma, const Area& blk) -{ - if ( !cs.sps->getPCMFilterDisableFlag() ) - { - return; - } - - int height = blk.pos().y + blk.height; - int width = blk.pos().x + blk.width; - const int clsSizeY = 4; - const int clsSizeX = 4; - int classIdx = m_ALF_UNUSED_CLASSIDX; - int transposeIdx = m_ALF_UNUSED_TRANSPOSIDX; - for( int i = blk.pos().y; i < height; i += m_CLASSIFICATION_BLK_SIZE ) - { - int nHeight = std::min(i + m_CLASSIFICATION_BLK_SIZE, height) - i; - - for( int j = blk.pos().x; j < width; j += m_CLASSIFICATION_BLK_SIZE ) - { - int nWidth = std::min(j + m_CLASSIFICATION_BLK_SIZE, width) - j; - int posX = j; - int posY = i; - - for( int subi = 0; subi < nHeight; subi += clsSizeY ) - { - for( int subj = 0; subj < nWidth; subj += clsSizeX ) - { - int yOffset = subi + posY; - int xOffset = subj + posX; - Position pos(xOffset, yOffset); - - const CodingUnit* cu = cs.getCU(pos, CH_L); - if ( cu->ipcm ) - { - AlfClassifier *cl0 = classifier[yOffset] + xOffset; - AlfClassifier *cl1 = classifier[yOffset + 1] + xOffset; - AlfClassifier *cl2 = classifier[yOffset + 2] + xOffset; - AlfClassifier *cl3 = classifier[yOffset + 3] + xOffset; - cl0[0] = cl0[1] = cl0[2] = cl0[3] = - cl1[0] = cl1[1] = cl1[2] = cl1[3] = - cl2[0] = cl2[1] = cl2[2] = cl2[3] = - cl3[0] = cl3[1] = cl3[2] = cl3[3] = AlfClassifier(classIdx, transposeIdx); - } - } - } - } - } -} - -void AdaptiveLoopFilter::deriveClassificationBlk( AlfClassifier** classifier, int** laplacian[NUM_DIRECTIONS], const CPelBuf& srcLuma, const Area& blk, const int shift ) +void AdaptiveLoopFilter::deriveClassificationBlk(AlfClassifier **classifier, int **laplacian[NUM_DIRECTIONS], + const CPelBuf &srcLuma, const Area &blkDst, const Area &blk, + const int shift, const int vbCTUHeight, int vbPos) { + CHECK((vbCTUHeight & (vbCTUHeight - 1)) != 0, "vbCTUHeight must be a power of 2"); + static const int th[16] = { 0, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4 }; const int stride = srcLuma.stride; const Pel* src = srcLuma.buf; @@ -352,6 +715,15 @@ void AdaptiveLoopFilter::deriveClassificationBlk( AlfClassifier** classifier, in const Pel *src2 = &src[yoffset + stride]; const Pel *src3 = &src[yoffset + stride * 2]; + const int y = blkDst.pos().y - 2 + i; + if (y > 0 && (y & (vbCTUHeight - 1)) == vbPos - 2) + { + src3 = &src[yoffset + stride]; + } + else if (y > 0 && (y & (vbCTUHeight - 1)) == vbPos) + { + src0 = &src[yoffset]; + } int* pYver = laplacian[VER][i]; int* pYhor = laplacian[HOR][i]; int* pYdig0 = laplacian[DIAG0][i]; @@ -378,7 +750,6 @@ void AdaptiveLoopFilter::deriveClassificationBlk( AlfClassifier** classifier, in int jM6 = j - 6; int jM4 = j - 4; int jM2 = j - 2; - pYver[jM6] += pYver[jM4] + pYver[jM2] + pYver[j]; pYhor[jM6] += pYhor[jM4] + pYhor[jM2] + pYhor[j]; pYdig0[jM6] += pYdig0[jM4] + pYdig0[jM2] + pYdig0[j]; @@ -415,13 +786,41 @@ void AdaptiveLoopFilter::deriveClassificationBlk( AlfClassifier** classifier, in for( int j = 0; j < blk.width; j += clsSizeX ) { - int sumV = pYver[j] + pYver2[j] + pYver4[j] + pYver6[j]; - int sumH = pYhor[j] + pYhor2[j] + pYhor4[j] + pYhor6[j]; - int sumD0 = pYdig0[j] + pYdig02[j] + pYdig04[j] + pYdig06[j]; - int sumD1 = pYdig1[j] + pYdig12[j] + pYdig14[j] + pYdig16[j]; + int sumV = 0; int sumH = 0; int sumD0 = 0; int sumD1 = 0; + if (((i + blkDst.pos().y) % vbCTUHeight) == (vbPos - 4)) + { + sumV = pYver[j] + pYver2[j] + pYver4[j]; + sumH = pYhor[j] + pYhor2[j] + pYhor4[j]; + sumD0 = pYdig0[j] + pYdig02[j] + pYdig04[j]; + sumD1 = pYdig1[j] + pYdig12[j] + pYdig14[j]; + } + else if (((i + blkDst.pos().y) % vbCTUHeight) == vbPos) + { + sumV = pYver2[j] + pYver4[j] + pYver6[j]; + sumH = pYhor2[j] + pYhor4[j] + pYhor6[j]; + sumD0 = pYdig02[j] + pYdig04[j] + pYdig06[j]; + sumD1 = pYdig12[j] + pYdig14[j] + pYdig16[j]; + } + else + { + sumV = pYver[j] + pYver2[j] + pYver4[j] + pYver6[j]; + sumH = pYhor[j] + pYhor2[j] + pYhor4[j] + pYhor6[j]; + sumD0 = pYdig0[j] + pYdig02[j] + pYdig04[j] + pYdig06[j]; + sumD1 = pYdig1[j] + pYdig12[j] + pYdig14[j] + pYdig16[j]; + } int tempAct = sumV + sumH; - int activity = (Pel)Clip3<int>( 0, maxActivity, ( tempAct * 64 ) >> shift ); + int activity = 0; + + const int y = (i + blkDst.pos().y) & (vbCTUHeight - 1); + if (y == vbPos - 4 || y == vbPos) + { + activity = (Pel)Clip3<int>(0, maxActivity, (tempAct * 96) >> shift); + } + else + { + activity = (Pel)Clip3<int>(0, maxActivity, (tempAct * 64) >> shift); + } int classIdx = th[activity]; int hv1, hv0, d1, d0, hvd1, hvd0; @@ -450,7 +849,7 @@ void AdaptiveLoopFilter::deriveClassificationBlk( AlfClassifier** classifier, in d0 = sumD0; dirTempD = 2; } - if( d1*hv0 > hv1*d0 ) + if( (uint32_t)d1 * (uint32_t)hv0 > (uint32_t)hv1 * (uint32_t)d0 ) { hvd1 = d1; hvd0 = d0; @@ -483,8 +882,8 @@ void AdaptiveLoopFilter::deriveClassificationBlk( AlfClassifier** classifier, in static const int transposeTable[8] = { 0, 1, 0, 2, 2, 3, 1, 3 }; int transposeIdx = transposeTable[mainDirection * 2 + ( secondaryDirection >> 1 )]; - int yOffset = i + posY; - int xOffset = j + posX; + int yOffset = i + blkDst.pos().y; + int xOffset = j + blkDst.pos().x; AlfClassifier *cl0 = classifier[yOffset] + xOffset; AlfClassifier *cl1 = classifier[yOffset + 1] + xOffset; @@ -496,17 +895,18 @@ void AdaptiveLoopFilter::deriveClassificationBlk( AlfClassifier** classifier, in } template<AlfFilterType filtType> -void AdaptiveLoopFilter::filterBlk( AlfClassifier** classifier, const PelUnitBuf &recDst, const CPelUnitBuf& recSrc, const Area& blk, const ComponentID compId, short* filterSet, const ClpRng& clpRng, CodingStructure& cs ) +void AdaptiveLoopFilter::filterBlk(AlfClassifier **classifier, const PelUnitBuf &recDst, const CPelUnitBuf &recSrc, + const Area &blkDst, const Area &blk, const ComponentID compId, + const short *filterSet, const short *fClipSet, const ClpRng &clpRng, + CodingStructure &cs, const int vbCTUHeight, int vbPos) { + CHECK((vbCTUHeight & (vbCTUHeight - 1)) != 0, "vbCTUHeight must be a power of 2"); + const bool bChroma = isChroma( compId ); if( bChroma ) { CHECK( filtType != 0, "Chroma needs to have filtType == 0" ); } - const SPS* sps = cs.slice->getSPS(); - bool isDualTree =CS::isDualITree(cs); - bool isPCMFilterDisabled = sps->getPCMFilterDisableFlag(); - ChromaFormat nChromaFormat = sps->getChromaFormatIdc(); const CPelBuf srcLuma = recSrc.get( compId ); PelBuf dstLuma = recDst.get( compId ); @@ -520,12 +920,13 @@ void AdaptiveLoopFilter::filterBlk( AlfClassifier** classifier, const PelUnitBuf const int endWidth = blk.x + blk.width; const Pel* src = srcLuma.buf; - Pel* dst = dstLuma.buf + startHeight * dstStride; + Pel* dst = dstLuma.buf + blkDst.y * dstStride; const Pel *pImgYPad0, *pImgYPad1, *pImgYPad2, *pImgYPad3, *pImgYPad4, *pImgYPad5, *pImgYPad6; const Pel *pImg0, *pImg1, *pImg2, *pImg3, *pImg4, *pImg5, *pImg6; - short *coef = filterSet; + const short *coef = filterSet; + const short *clip = fClipSet; const int shift = m_NUM_BITS - 1; @@ -535,7 +936,6 @@ void AdaptiveLoopFilter::filterBlk( AlfClassifier** classifier, const PelUnitBuf const int clsSizeY = 4; const int clsSizeX = 4; - bool pcmFlags2x2[4] = {0,0,0,0}; CHECK( startHeight % clsSizeY, "Wrong startHeight in filtering" ); CHECK( startWidth % clsSizeX, "Wrong startWidth in filtering" ); @@ -547,7 +947,8 @@ void AdaptiveLoopFilter::filterBlk( AlfClassifier** classifier, const PelUnitBuf int dstStride2 = dstStride * clsSizeY; int srcStride2 = srcStride * clsSizeY; - std::vector<Pel> filterCoeff( MAX_NUM_ALF_LUMA_COEFF ); + std::array<int, MAX_NUM_ALF_LUMA_COEFF> filterCoeff; + std::array<int, MAX_NUM_ALF_LUMA_COEFF> filterClipp; pImgYPad0 = src + startHeight * srcStride + startWidth; pImgYPad1 = pImgYPad0 + srcStride; @@ -557,14 +958,14 @@ void AdaptiveLoopFilter::filterBlk( AlfClassifier** classifier, const PelUnitBuf pImgYPad5 = pImgYPad3 + srcStride; pImgYPad6 = pImgYPad4 - srcStride; - Pel* pRec0 = dst + startWidth; + Pel* pRec0 = dst + blkDst.x; Pel* pRec1 = pRec0 + dstStride; for( int i = 0; i < endHeight - startHeight; i += clsSizeY ) { if( !bChroma ) { - pClass = classifier[startHeight + i] + startWidth; + pClass = classifier[blkDst.y + i] + blkDst.x; } for( int j = 0; j < endWidth - startWidth; j += clsSizeX ) @@ -573,54 +974,31 @@ void AdaptiveLoopFilter::filterBlk( AlfClassifier** classifier, const PelUnitBuf { AlfClassifier& cl = pClass[j]; transposeIdx = cl.transposeIdx; - if( isPCMFilterDisabled && cl.classIdx== m_ALF_UNUSED_CLASSIDX && transposeIdx== m_ALF_UNUSED_TRANSPOSIDX ) - { - continue; - } coef = filterSet + cl.classIdx * MAX_NUM_ALF_LUMA_COEFF; - } - else if( isPCMFilterDisabled ) - { - int blkX, blkY; - bool *flags = pcmFlags2x2; - - // check which chroma 2x2 blocks use PCM - // chroma PCM may not be aligned with 4x4 ALF processing grid - for( blkY=0; blkY<4; blkY+=2 ) - { - for( blkX=0; blkX<4; blkX+=2 ) - { - Position pos(j+startWidth+blkX, i+startHeight+blkY); - CodingUnit* cu = isDualTree ? cs.getCU(pos, CH_C) : cs.getCU(recalcPosition(nChromaFormat, CH_C, CH_L, pos), CH_L); - *flags++ = cu->ipcm ? 1 : 0; - } - } - - // skip entire 4x4 if all chroma 2x2 blocks use PCM - if( pcmFlags2x2[0] && pcmFlags2x2[1] && pcmFlags2x2[2] && pcmFlags2x2[3] ) - { - continue; - } + clip = fClipSet + cl.classIdx * MAX_NUM_ALF_LUMA_COEFF; } - if( filtType == ALF_FILTER_7 ) { if( transposeIdx == 1 ) { filterCoeff = { coef[9], coef[4], coef[10], coef[8], coef[1], coef[5], coef[11], coef[7], coef[3], coef[0], coef[2], coef[6], coef[12] }; + filterClipp = { clip[9], clip[4], clip[10], clip[8], clip[1], clip[5], clip[11], clip[7], clip[3], clip[0], clip[2], clip[6], clip[12] }; } else if( transposeIdx == 2 ) { filterCoeff = { coef[0], coef[3], coef[2], coef[1], coef[8], coef[7], coef[6], coef[5], coef[4], coef[9], coef[10], coef[11], coef[12] }; + filterClipp = { clip[0], clip[3], clip[2], clip[1], clip[8], clip[7], clip[6], clip[5], clip[4], clip[9], clip[10], clip[11], clip[12] }; } else if( transposeIdx == 3 ) { filterCoeff = { coef[9], coef[8], coef[10], coef[4], coef[3], coef[7], coef[11], coef[5], coef[1], coef[0], coef[2], coef[6], coef[12] }; + filterClipp = { clip[9], clip[8], clip[10], clip[4], clip[3], clip[7], clip[11], clip[5], clip[1], clip[0], clip[2], clip[6], clip[12] }; } else { filterCoeff = { coef[0], coef[1], coef[2], coef[3], coef[4], coef[5], coef[6], coef[7], coef[8], coef[9], coef[10], coef[11], coef[12] }; + filterClipp = { clip[0], clip[1], clip[2], clip[3], clip[4], clip[5], clip[6], clip[7], clip[8], clip[9], clip[10], clip[11], clip[12] }; } } else @@ -628,18 +1006,22 @@ void AdaptiveLoopFilter::filterBlk( AlfClassifier** classifier, const PelUnitBuf if( transposeIdx == 1 ) { filterCoeff = { coef[4], coef[1], coef[5], coef[3], coef[0], coef[2], coef[6] }; + filterClipp = { clip[4], clip[1], clip[5], clip[3], clip[0], clip[2], clip[6] }; } else if( transposeIdx == 2 ) { filterCoeff = { coef[0], coef[3], coef[2], coef[1], coef[4], coef[5], coef[6] }; + filterClipp = { clip[0], clip[3], clip[2], clip[1], clip[4], clip[5], clip[6] }; } else if( transposeIdx == 3 ) { filterCoeff = { coef[4], coef[3], coef[5], coef[1], coef[0], coef[2], coef[6] }; + filterClipp = { clip[4], clip[3], clip[5], clip[1], clip[0], clip[2], clip[6] }; } else { filterCoeff = { coef[0], coef[1], coef[2], coef[3], coef[4], coef[5], coef[6] }; + filterClipp = { clip[0], clip[1], clip[2], clip[3], clip[4], clip[5], clip[6] }; } } @@ -655,59 +1037,79 @@ void AdaptiveLoopFilter::filterBlk( AlfClassifier** classifier, const PelUnitBuf pRec1 = pRec0 + j + ii * dstStride; + const int yVb = (blkDst.y + i + ii) & (vbCTUHeight - 1); + if (yVb < vbPos && (yVb >= vbPos - (bChroma ? 2 : 4))) // above + { + pImg1 = (yVb == vbPos - 1) ? pImg0 : pImg1; + pImg3 = (yVb >= vbPos - 2) ? pImg1 : pImg3; + pImg5 = (yVb >= vbPos - 3) ? pImg3 : pImg5; + + pImg2 = (yVb == vbPos - 1) ? pImg0 : pImg2; + pImg4 = (yVb >= vbPos - 2) ? pImg2 : pImg4; + pImg6 = (yVb >= vbPos - 3) ? pImg4 : pImg6; + } + else if (yVb >= vbPos && (yVb <= vbPos + (bChroma ? 1 : 3))) // bottom + { + pImg2 = (yVb == vbPos) ? pImg0 : pImg2; + pImg4 = (yVb <= vbPos + 1) ? pImg2 : pImg4; + pImg6 = (yVb <= vbPos + 2) ? pImg4 : pImg6; + + pImg1 = (yVb == vbPos) ? pImg0 : pImg1; + pImg3 = (yVb <= vbPos + 1) ? pImg1 : pImg3; + pImg5 = (yVb <= vbPos + 2) ? pImg3 : pImg5; + } +#if JVET_Q0150 + bool isNearVBabove = yVb < vbPos && (yVb >= vbPos - 1); + bool isNearVBbelow = yVb >= vbPos && (yVb <= vbPos); +#endif for( int jj = 0; jj < clsSizeX; jj++ ) { - // skip 2x2 PCM chroma blocks - if( bChroma && isPCMFilterDisabled ) - { - if( pcmFlags2x2[2*(ii>>1) + (jj>>1)] ) - { - pImg0++; - pImg1++; - pImg2++; - pImg3++; - pImg4++; - pImg5++; - pImg6++; - continue; - } - } int sum = 0; + const Pel curr = pImg0[+0]; if( filtType == ALF_FILTER_7 ) { - sum += filterCoeff[0] * ( pImg5[0] + pImg6[0] ); - - sum += filterCoeff[1] * ( pImg3[+1] + pImg4[-1] ); - sum += filterCoeff[2] * ( pImg3[+0] + pImg4[+0] ); - sum += filterCoeff[3] * ( pImg3[-1] + pImg4[+1] ); - - sum += filterCoeff[4] * ( pImg1[+2] + pImg2[-2] ); - sum += filterCoeff[5] * ( pImg1[+1] + pImg2[-1] ); - sum += filterCoeff[6] * ( pImg1[+0] + pImg2[+0] ); - sum += filterCoeff[7] * ( pImg1[-1] + pImg2[+1] ); - sum += filterCoeff[8] * ( pImg1[-2] + pImg2[+2] ); - - sum += filterCoeff[9] * ( pImg0[+3] + pImg0[-3] ); - sum += filterCoeff[10] * ( pImg0[+2] + pImg0[-2] ); - sum += filterCoeff[11] * ( pImg0[+1] + pImg0[-1] ); - sum += filterCoeff[12] * ( pImg0[+0] ); + sum += filterCoeff[0] * ( clipALF(filterClipp[0], curr, pImg5[+0], pImg6[+0]) ); + + sum += filterCoeff[1] * ( clipALF(filterClipp[1], curr, pImg3[+1], pImg4[-1]) ); + sum += filterCoeff[2] * ( clipALF(filterClipp[2], curr, pImg3[+0], pImg4[+0]) ); + sum += filterCoeff[3] * ( clipALF(filterClipp[3], curr, pImg3[-1], pImg4[+1]) ); + + sum += filterCoeff[4] * ( clipALF(filterClipp[4], curr, pImg1[+2], pImg2[-2]) ); + sum += filterCoeff[5] * ( clipALF(filterClipp[5], curr, pImg1[+1], pImg2[-1]) ); + sum += filterCoeff[6] * ( clipALF(filterClipp[6], curr, pImg1[+0], pImg2[+0]) ); + sum += filterCoeff[7] * ( clipALF(filterClipp[7], curr, pImg1[-1], pImg2[+1]) ); + sum += filterCoeff[8] * ( clipALF(filterClipp[8], curr, pImg1[-2], pImg2[+2]) ); + + sum += filterCoeff[9] * ( clipALF(filterClipp[9], curr, pImg0[+3], pImg0[-3]) ); + sum += filterCoeff[10] * ( clipALF(filterClipp[10], curr, pImg0[+2], pImg0[-2]) ); + sum += filterCoeff[11] * ( clipALF(filterClipp[11], curr, pImg0[+1], pImg0[-1]) ); } else { - sum += filterCoeff[0] * ( pImg3[+0] + pImg4[+0] ); + sum += filterCoeff[0] * ( clipALF(filterClipp[0], curr, pImg3[+0], pImg4[+0]) ); - sum += filterCoeff[1] * ( pImg1[+1] + pImg2[-1] ); - sum += filterCoeff[2] * ( pImg1[+0] + pImg2[+0] ); - sum += filterCoeff[3] * ( pImg1[-1] + pImg2[+1] ); + sum += filterCoeff[1] * ( clipALF(filterClipp[1], curr, pImg1[+1], pImg2[-1]) ); + sum += filterCoeff[2] * ( clipALF(filterClipp[2], curr, pImg1[+0], pImg2[+0]) ); + sum += filterCoeff[3] * ( clipALF(filterClipp[3], curr, pImg1[-1], pImg2[+1]) ); - sum += filterCoeff[4] * ( pImg0[+2] + pImg0[-2] ); - sum += filterCoeff[5] * ( pImg0[+1] + pImg0[-1] ); - sum += filterCoeff[6] * ( pImg0[+0] ); + sum += filterCoeff[4] * ( clipALF(filterClipp[4], curr, pImg0[+2], pImg0[-2]) ); + sum += filterCoeff[5] * ( clipALF(filterClipp[5], curr, pImg0[+1], pImg0[-1]) ); } - +#if JVET_Q0150 + if (!(isNearVBabove || isNearVBbelow)) + { + sum = (sum + offset) >> shift; + } + else + { + sum = (sum + offset) >> (shift + 3); + } +#else sum = ( sum + offset ) >> shift; +#endif + sum += curr; pRec1[jj] = ClipPel( sum, clpRng ); pImg0++; diff --git a/source/Lib/CommonLib/AdaptiveLoopFilter.h b/source/Lib/CommonLib/AdaptiveLoopFilter.h index 92928fee096080ce0efaf7ab6c5eaef23aae135e..f93fd8e6c72fb1a6fc1b391c4da527acf33945cd 100644 --- a/source/Lib/CommonLib/AdaptiveLoopFilter.h +++ b/source/Lib/CommonLib/AdaptiveLoopFilter.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -42,6 +42,7 @@ #include "Unit.h" #include "UnitTools.h" + struct AlfClassifier { AlfClassifier() {} @@ -66,6 +67,14 @@ enum Direction class AdaptiveLoopFilter { public: + static inline int clipALF(const int clip, const short ref, const short val0, const short val1) + { + return Clip3<int>(-clip, +clip, val0-ref) + Clip3<int>(-clip, +clip, val1-ref); + } + + static constexpr int AlfNumClippingValues[MAX_NUM_CHANNEL_TYPE] = { 4, 4 }; + static constexpr int MaxAlfNumClippingValues = 4; + static constexpr int m_NUM_BITS = 8; static constexpr int m_CLASSIFICATION_BLK_SIZE = 32; //non-normative, local buffer size static constexpr int m_ALF_UNUSED_CLASSIDX = 255; @@ -73,24 +82,32 @@ public: AdaptiveLoopFilter(); virtual ~AdaptiveLoopFilter() {} - - void ALFProcess( CodingStructure& cs, AlfSliceParam& alfSliceParam ); - void reconstructCoeff( AlfSliceParam& alfSliceParam, ChannelType channel, const bool bRedo = false ); + void reconstructCoeffAPSs(CodingStructure& cs, bool luma, bool chroma, bool isRdo); + void reconstructCoeff(AlfParam& alfParam, ChannelType channel, const bool isRdo, const bool isRedo = false); + void ALFProcess(CodingStructure& cs); void create( const int picWidth, const int picHeight, const ChromaFormat format, const int maxCUWidth, const int maxCUHeight, const int maxCUDepth, const int inputBitDepth[MAX_NUM_CHANNEL_TYPE] ); void destroy(); - static void deriveClassificationBlk( AlfClassifier** classifier, int** laplacian[NUM_DIRECTIONS], const CPelBuf& srcLuma, const Area& blk, const int shift ); - void deriveClassification( AlfClassifier** classifier, const CPelBuf& srcLuma, const Area& blk ); - void resetPCMBlkClassInfo(CodingStructure & cs, AlfClassifier** classifier, const CPelBuf& srcLuma, const Area& blk); + static void deriveClassificationBlk(AlfClassifier **classifier, int **laplacian[NUM_DIRECTIONS], + const CPelBuf &srcLuma, const Area &blkDst, const Area &blk, const int shift, + const int vbCTUHeight, int vbPos); + void deriveClassification( AlfClassifier** classifier, const CPelBuf& srcLuma, const Area& blkDst, const Area& blk ); template<AlfFilterType filtType> - static void filterBlk( AlfClassifier** classifier, const PelUnitBuf &recDst, const CPelUnitBuf& recSrc, const Area& blk, const ComponentID compId, short* filterSet, const ClpRng& clpRng, CodingStructure& cs ); - inline static int getMaxGolombIdx( AlfFilterType filterType ) - { - return filterType == ALF_FILTER_5 ? 2 : 3; - } + static void filterBlk(AlfClassifier **classifier, const PelUnitBuf &recDst, const CPelUnitBuf &recSrc, + const Area &blkDst, const Area &blk, const ComponentID compId, const short *filterSet, + const short *fClipSet, const ClpRng &clpRng, CodingStructure &cs, const int vbCTUHeight, + int vbPos); + void (*m_deriveClassificationBlk)(AlfClassifier **classifier, int **laplacian[NUM_DIRECTIONS], const CPelBuf &srcLuma, + const Area &blkDst, const Area &blk, const int shift, const int vbCTUHeight, + int vbPos); - void( *m_deriveClassificationBlk )( AlfClassifier** classifier, int** laplacian[NUM_DIRECTIONS], const CPelBuf& srcLuma, const Area& blk, const int shift ); - void( *m_filter5x5Blk )( AlfClassifier** classifier, const PelUnitBuf &recDst, const CPelUnitBuf& recSrc, const Area& blk, const ComponentID compId, short* filterSet, const ClpRng& clpRng, CodingStructure& cs ); - void( *m_filter7x7Blk )( AlfClassifier** classifier, const PelUnitBuf &recDst, const CPelUnitBuf& recSrc, const Area& blk, const ComponentID compId, short* filterSet, const ClpRng& clpRng, CodingStructure& cs ); + void (*m_filter5x5Blk)(AlfClassifier **classifier, const PelUnitBuf &recDst, const CPelUnitBuf &recSrc, + const Area &blkDst, const Area &blk, const ComponentID compId, const short *filterSet, + const short *fClipSet, const ClpRng &clpRng, CodingStructure &cs, const int vbCTUHeight, + int vbPos); + void (*m_filter7x7Blk)(AlfClassifier **classifier, const PelUnitBuf &recDst, const CPelUnitBuf &recSrc, + const Area &blkDst, const Area &blk, const ComponentID compId, const short *filterSet, + const short *fClipSet, const ClpRng &clpRng, CodingStructure &cs, const int vbCTUHeight, + int vbPos); #ifdef TARGET_SIMD_X86 void initAdaptiveLoopFilterX86(); @@ -99,12 +116,29 @@ public: #endif protected: + bool isCrossedByVirtualBoundaries( const CodingStructure& cs, const int xPos, const int yPos, const int width, const int height, bool& clipTop, bool& clipBottom, bool& clipLeft, bool& clipRight, int& numHorVirBndry, int& numVerVirBndry, int horVirBndryPos[], int verVirBndryPos[], int& rasterSliceAlfPad ); + static const int m_classToFilterMapping[NUM_FIXED_FILTER_SETS][MAX_NUM_ALF_CLASSES]; + static const int m_fixedFilterSetCoeff[ALF_FIXED_FILTER_NUM][MAX_NUM_ALF_LUMA_COEFF]; + short m_fixedFilterSetCoeffDec[NUM_FIXED_FILTER_SETS][MAX_NUM_ALF_CLASSES * MAX_NUM_ALF_LUMA_COEFF]; + short m_coeffApsLuma[ALF_CTB_MAX_NUM_APS][MAX_NUM_ALF_LUMA_COEFF * MAX_NUM_ALF_CLASSES]; + short m_clippApsLuma[ALF_CTB_MAX_NUM_APS][MAX_NUM_ALF_LUMA_COEFF * MAX_NUM_ALF_CLASSES]; + short m_clipDefault[MAX_NUM_ALF_CLASSES * MAX_NUM_ALF_LUMA_COEFF]; + bool m_created = false; + short m_chromaCoeffFinal[MAX_NUM_ALF_ALTERNATIVES_CHROMA][MAX_NUM_ALF_CHROMA_COEFF]; + AlfParam* m_alfParamChroma; + Pel m_alfClippingValues[MAX_NUM_CHANNEL_TYPE][MaxAlfNumClippingValues]; std::vector<AlfFilterShape> m_filterShapes[MAX_NUM_CHANNEL_TYPE]; AlfClassifier** m_classifier; short m_coeffFinal[MAX_NUM_ALF_CLASSES * MAX_NUM_ALF_LUMA_COEFF]; + short m_clippFinal[MAX_NUM_ALF_CLASSES * MAX_NUM_ALF_LUMA_COEFF]; + short m_chromaClippFinal[MAX_NUM_ALF_ALTERNATIVES_CHROMA][MAX_NUM_ALF_CHROMA_COEFF]; int** m_laplacian[NUM_DIRECTIONS]; - uint8_t* m_ctuEnableFlag[MAX_NUM_COMPONENT]; + int * m_laplacianPtr[NUM_DIRECTIONS][m_CLASSIFICATION_BLK_SIZE + 5]; + int m_laplacianData[NUM_DIRECTIONS][m_CLASSIFICATION_BLK_SIZE + 5][m_CLASSIFICATION_BLK_SIZE + 5]; + uint8_t* m_ctuEnableFlag[MAX_NUM_COMPONENT]; + uint8_t* m_ctuAlternative[MAX_NUM_COMPONENT]; PelStorage m_tempBuf; + PelStorage m_tempBuf2; int m_inputBitDepth[MAX_NUM_CHANNEL_TYPE]; int m_picWidth; int m_picHeight; @@ -114,6 +148,10 @@ protected: int m_numCTUsInWidth; int m_numCTUsInHeight; int m_numCTUsInPic; + int m_alfVBLumaPos; + int m_alfVBChmaPos; + int m_alfVBLumaCTUHeight; + int m_alfVBChmaCTUHeight; ChromaFormat m_chromaFormat; ClpRngs m_clpRngs; }; diff --git a/source/Lib/CommonLib/AffineGradientSearch.cpp b/source/Lib/CommonLib/AffineGradientSearch.cpp index d91e938d9d18026868d9fced6d4795fc9dd834a5..90d939ac738f2a25b125f1952e300b6e4fa5f857 100644 --- a/source/Lib/CommonLib/AffineGradientSearch.cpp +++ b/source/Lib/CommonLib/AffineGradientSearch.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/source/Lib/CommonLib/AffineGradientSearch.h b/source/Lib/CommonLib/AffineGradientSearch.h index 40adbcdf0189c5c5d4191349a9407f3d4b5da588..380db32074491e625fce3d06ec4e02bd18e0ab9f 100644 --- a/source/Lib/CommonLib/AffineGradientSearch.h +++ b/source/Lib/CommonLib/AffineGradientSearch.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/source/Lib/CommonLib/AlfParameters.h b/source/Lib/CommonLib/AlfParameters.h new file mode 100644 index 0000000000000000000000000000000000000000..abaef3a4968ab6d5dd6535656592652056abe129 --- /dev/null +++ b/source/Lib/CommonLib/AlfParameters.h @@ -0,0 +1,236 @@ +/* The copyright in this software is being made available under the BSD + * License, included below. This software may be subject to other third party + * and contributor rights, including patent rights, and no such rights are + * granted under this license. + * + * Copyright (c) 2010-2020, ITU/ISO/IEC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * * Neither the name of the ITU/ISO/IEC nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file AlfParameters.h + \brief Define types for storing ALF parameters +*/ + +#ifndef __ALFPARAMETERS__ +#define __ALFPARAMETERS__ + +#include <vector> +#include "CommonDef.h" + +//! \ingroup AlfParameters +//! \{ + +enum AlfFilterType +{ + ALF_FILTER_5, + ALF_FILTER_7, + ALF_NUM_OF_FILTER_TYPES +}; + +struct AlfFilterShape +{ + AlfFilterShape( int size ) + : filterLength( size ), + numCoeff( size * size / 4 + 1 ), + filterSize( size * size / 2 + 1 ) + { + if( size == 5 ) + { + pattern = { + 0, + 1, 2, 3, + 4, 5, 6, 5, 4, + 3, 2, 1, + 0 + }; + + weights = { + 2, + 2, 2, 2, + 2, 2, 1, 1 + }; + + filterType = ALF_FILTER_5; + } + else if( size == 7 ) + { + pattern = { + 0, + 1, 2, 3, + 4, 5, 6, 7, 8, + 9, 10, 11, 12, 11, 10, 9, + 8, 7, 6, 5, 4, + 3, 2, 1, + 0 + }; + + weights = { + 2, + 2, 2, 2, + 2, 2, 2, 2, 2, + 2, 2, 2, 1, 1 + }; + + filterType = ALF_FILTER_7; + } + else + { + filterType = ALF_NUM_OF_FILTER_TYPES; + CHECK( 0, "Wrong ALF filter shape" ); + } + } + + AlfFilterType filterType; + int filterLength; + int numCoeff; //TO DO: check whether we need both numCoeff and filterSize + int filterSize; + std::vector<int> pattern; + std::vector<int> weights; +}; + +struct AlfParam +{ + bool enabledFlag[MAX_NUM_COMPONENT]; // alf_slice_enable_flag, alf_chroma_idc +#if JVET_Q0249_ALF_CHROMA_CLIPFLAG + bool nonLinearFlag[MAX_NUM_CHANNEL_TYPE]; // alf_[luma/chroma]_clip_flag +#else + bool nonLinearFlag[MAX_NUM_CHANNEL_TYPE][MAX_NUM_ALF_ALTERNATIVES_CHROMA]; // alf_[luma/chroma]_clip_flag +#endif + short lumaCoeff[MAX_NUM_ALF_CLASSES * MAX_NUM_ALF_LUMA_COEFF]; // alf_coeff_luma_delta[i][j] + short lumaClipp[MAX_NUM_ALF_CLASSES * MAX_NUM_ALF_LUMA_COEFF]; // alf_clipp_luma_[i][j] + int numAlternativesChroma; // alf_chroma_num_alts_minus_one + 1 + short chromaCoeff[MAX_NUM_ALF_ALTERNATIVES_CHROMA][MAX_NUM_ALF_CHROMA_COEFF]; // alf_coeff_chroma[i] + short chromaClipp[MAX_NUM_ALF_ALTERNATIVES_CHROMA][MAX_NUM_ALF_CHROMA_COEFF]; // alf_clipp_chroma[i] + short filterCoeffDeltaIdx[MAX_NUM_ALF_CLASSES]; // filter_coeff_delta[i] + bool alfLumaCoeffFlag[MAX_NUM_ALF_CLASSES]; // alf_luma_coeff_flag[i] + int numLumaFilters; // number_of_filters_minus1 + 1 + bool alfLumaCoeffDeltaFlag; // alf_luma_coeff_delta_flag + std::vector<AlfFilterShape>* filterShapes; + bool newFilterFlag[MAX_NUM_CHANNEL_TYPE]; + + AlfParam() + { + reset(); + } + + void reset() + { + std::memset( enabledFlag, false, sizeof( enabledFlag ) ); + std::memset( nonLinearFlag, false, sizeof( nonLinearFlag ) ); + std::memset( lumaCoeff, 0, sizeof( lumaCoeff ) ); + std::memset( lumaClipp, 0, sizeof( lumaClipp ) ); + numAlternativesChroma = 1; + std::memset( chromaCoeff, 0, sizeof( chromaCoeff ) ); + std::memset( chromaClipp, 0, sizeof( chromaClipp ) ); + std::memset( filterCoeffDeltaIdx, 0, sizeof( filterCoeffDeltaIdx ) ); + std::memset( alfLumaCoeffFlag, true, sizeof( alfLumaCoeffFlag ) ); + numLumaFilters = 1; + alfLumaCoeffDeltaFlag = false; + memset(newFilterFlag, 0, sizeof(newFilterFlag)); + } + + const AlfParam& operator = ( const AlfParam& src ) + { + std::memcpy( enabledFlag, src.enabledFlag, sizeof( enabledFlag ) ); + std::memcpy( nonLinearFlag, src.nonLinearFlag, sizeof( nonLinearFlag ) ); + std::memcpy( lumaCoeff, src.lumaCoeff, sizeof( lumaCoeff ) ); + std::memcpy( lumaClipp, src.lumaClipp, sizeof( lumaClipp ) ); + numAlternativesChroma = src.numAlternativesChroma; + std::memcpy( chromaCoeff, src.chromaCoeff, sizeof( chromaCoeff ) ); + std::memcpy( chromaClipp, src.chromaClipp, sizeof( chromaClipp ) ); + std::memcpy( filterCoeffDeltaIdx, src.filterCoeffDeltaIdx, sizeof( filterCoeffDeltaIdx ) ); + std::memcpy( alfLumaCoeffFlag, src.alfLumaCoeffFlag, sizeof( alfLumaCoeffFlag ) ); + numLumaFilters = src.numLumaFilters; + alfLumaCoeffDeltaFlag = src.alfLumaCoeffDeltaFlag; + filterShapes = src.filterShapes; + std::memcpy(newFilterFlag, src.newFilterFlag, sizeof(newFilterFlag)); + return *this; + } + + bool operator==( const AlfParam& other ) + { + if( memcmp( enabledFlag, other.enabledFlag, sizeof( enabledFlag ) ) ) + { + return false; + } + if( memcmp( nonLinearFlag, other.nonLinearFlag, sizeof( nonLinearFlag ) ) ) + { + return false; + } + if( memcmp( lumaCoeff, other.lumaCoeff, sizeof( lumaCoeff ) ) ) + { + return false; + } + if( memcmp( lumaClipp, other.lumaClipp, sizeof( lumaClipp ) ) ) + { + return false; + } + if( memcmp( chromaCoeff, other.chromaCoeff, sizeof( chromaCoeff ) ) ) + { + return false; + } + if( memcmp( chromaClipp, other.chromaClipp, sizeof( chromaClipp ) ) ) + { + return false; + } + if( memcmp( filterCoeffDeltaIdx, other.filterCoeffDeltaIdx, sizeof( filterCoeffDeltaIdx ) ) ) + { + return false; + } + if( memcmp( alfLumaCoeffFlag, other.alfLumaCoeffFlag, sizeof( alfLumaCoeffFlag ) ) ) + { + return false; + } + if( memcmp( newFilterFlag, other.newFilterFlag, sizeof( newFilterFlag ) ) ) + { + return false; + } + if( numAlternativesChroma != other.numAlternativesChroma ) + { + return false; + } + if( numLumaFilters != other.numLumaFilters ) + { + return false; + } + if( alfLumaCoeffDeltaFlag != other.alfLumaCoeffDeltaFlag ) + { + return false; + } + + return true; + } + + bool operator!=( const AlfParam& other ) + { + return !( *this == other ); + } +}; + +//! \} + +#endif // end of #ifndef __ALFPARAMETERS__ diff --git a/source/Lib/CommonLib/BitStream.cpp b/source/Lib/CommonLib/BitStream.cpp index 9d66589fc13fcc302c9833d5e76b01582724b854..58a3360b3e5f9b22af1651219ae8f8a83d7654e3 100644 --- a/source/Lib/CommonLib/BitStream.cpp +++ b/source/Lib/CommonLib/BitStream.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/source/Lib/CommonLib/BitStream.h b/source/Lib/CommonLib/BitStream.h index 64a9c8df612a13fd2cb806f30e5e1d8609e5c20b..bce5feadcb227b56dbcf2c757ddba14279f6ca91 100644 --- a/source/Lib/CommonLib/BitStream.h +++ b/source/Lib/CommonLib/BitStream.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/source/Lib/CommonLib/Buffer.cpp b/source/Lib/CommonLib/Buffer.cpp index a773bd30664c756d70014d295fbfdc7e9a0dc726..e1f967f14fef717a85f2c5904e8cc6377feb69eb 100644 --- a/source/Lib/CommonLib/Buffer.cpp +++ b/source/Lib/CommonLib/Buffer.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * -* Copyright (c) 2010-2019, ITU/ISO/IEC +* Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -42,10 +42,33 @@ #include "Buffer.h" #include "InterpolationFilter.h" -#if ENABLE_SIMD_OPT_BUFFER -#ifdef TARGET_SIMD_X86 +void applyPROFCore(Pel* dst, int dstStride, const Pel* src, int srcStride, int width, int height, const Pel* gradX, const Pel* gradY, int gradStride, const int* dMvX, const int* dMvY, int dMvStride, const bool& bi, int shiftNum, Pel offset, const ClpRng& clpRng) +{ + int idx = 0; + + const int dILimit = 1 << std::max<int>(clpRng.bd + 1, 13); + for (int h = 0; h < height; h++) + { + for (int w = 0; w < width; w++) + { + int32_t dI = dMvX[idx] * gradX[w] + dMvY[idx] * gradY[w]; + dI = Clip3(-dILimit, dILimit - 1, dI); + dst[w] = src[w] + dI; + if (!bi) + { + dst[w] = (dst[w] + offset) >> shiftNum; + dst[w] = ClipPel(dst[w], clpRng); + } + + idx++; + } + gradX += gradStride; + gradY += gradStride; + dst += dstStride; + src += srcStride; + } +} -#include "CommonDefX86.h" template< typename T > void addAvgCore( const T* src1, int src1Stride, const T* src2, int src2Stride, T* dest, int dstStride, int width, int height, int rshift, int offset, const ClpRng& clpRng ) @@ -71,19 +94,15 @@ void addBIOAvgCore(const Pel* src0, int src0Stride, const Pel* src1, int src1Str for (int x = 0; x < width; x += 4) { b = tmpx * (gradX0[x] - gradX1[x]) + tmpy * (gradY0[x] - gradY1[x]); - b = ((b + 1) >> 1); dst[x] = ClipPel((int16_t)rightShift((src0[x] + src1[x] + b + offset), shift), clpRng); b = tmpx * (gradX0[x + 1] - gradX1[x + 1]) + tmpy * (gradY0[x + 1] - gradY1[x + 1]); - b = ((b + 1) >> 1); dst[x + 1] = ClipPel((int16_t)rightShift((src0[x + 1] + src1[x + 1] + b + offset), shift), clpRng); b = tmpx * (gradX0[x + 2] - gradX1[x + 2]) + tmpy * (gradY0[x + 2] - gradY1[x + 2]); - b = ((b + 1) >> 1); dst[x + 2] = ClipPel((int16_t)rightShift((src0[x + 2] + src1[x + 2] + b + offset), shift), clpRng); b = tmpx * (gradX0[x + 3] - gradX1[x + 3]) + tmpy * (gradY0[x + 3] - gradY1[x + 3]); - b = ((b + 1) >> 1); dst[x + 3] = ClipPel((int16_t)rightShift((src0[x + 3] + src1[x + 3] + b + offset), shift), clpRng); } dst += dstStride; src0 += src0Stride; src1 += src1Stride; @@ -91,25 +110,28 @@ void addBIOAvgCore(const Pel* src0, int src0Stride, const Pel* src1, int src1Str } } +template<bool PAD = true> void gradFilterCore(Pel* pSrc, int srcStride, int width, int height, int gradStride, Pel* gradX, Pel* gradY, const int bitDepth) { Pel* srcTmp = pSrc + srcStride + 1; Pel* gradXTmp = gradX + gradStride + 1; Pel* gradYTmp = gradY + gradStride + 1; - int shift1 = std::max<int>(2, (IF_INTERNAL_PREC - bitDepth)); + int shift1 = 6; for (int y = 0; y < (height - 2 * BIO_EXTEND_SIZE); y++) { for (int x = 0; x < (width - 2 * BIO_EXTEND_SIZE); x++) { - gradYTmp[x] = (srcTmp[x + srcStride] - srcTmp[x - srcStride]) >> shift1; - gradXTmp[x] = (srcTmp[x + 1] - srcTmp[x - 1]) >> shift1; + gradYTmp[x] = ( srcTmp[x + srcStride] >> shift1 ) - ( srcTmp[x - srcStride] >> shift1 ); + gradXTmp[x] = ( srcTmp[x + 1] >> shift1 ) - ( srcTmp[x - 1] >> shift1 ); } gradXTmp += gradStride; gradYTmp += gradStride; srcTmp += srcStride; } + if (PAD) + { gradXTmp = gradX + gradStride + 1; gradYTmp = gradY + gradStride + 1; for (int y = 0; y < (height - 2 * BIO_EXTEND_SIZE); y++) @@ -129,39 +151,38 @@ void gradFilterCore(Pel* pSrc, int srcStride, int width, int height, int gradStr ::memcpy(gradXTmp + (height - 2 * BIO_EXTEND_SIZE)*gradStride, gradXTmp + (height - 2 * BIO_EXTEND_SIZE - 1)*gradStride, sizeof(Pel)*(width)); ::memcpy(gradYTmp - gradStride, gradYTmp, sizeof(Pel)*(width)); ::memcpy(gradYTmp + (height - 2 * BIO_EXTEND_SIZE)*gradStride, gradYTmp + (height - 2 * BIO_EXTEND_SIZE - 1)*gradStride, sizeof(Pel)*(width)); + } } -void calcBIOParCore(const Pel* srcY0Temp, const Pel* srcY1Temp, const Pel* gradX0, const Pel* gradX1, const Pel* gradY0, const Pel* gradY1, int* dotProductTemp1, int* dotProductTemp2, int* dotProductTemp3, int* dotProductTemp5, int* dotProductTemp6, const int src0Stride, const int src1Stride, const int gradStride, const int widthG, const int heightG, const int bitDepth) +void calcBIOSumsCore(const Pel* srcY0Tmp, const Pel* srcY1Tmp, Pel* gradX0, Pel* gradX1, Pel* gradY0, Pel* gradY1, int xu, int yu, const int src0Stride, const int src1Stride, const int widthG, const int bitDepth, int* sumAbsGX, int* sumAbsGY, int* sumDIX, int* sumDIY, int* sumSignGY_GX) { - int shift4 = std::min<int>(8, (bitDepth - 4)); - int shift5 = std::min<int>(5, (bitDepth - 7)); - for (int y = 0; y < heightG; y++) + int shift4 = 4; + int shift5 = 1; + + for (int y = 0; y < 6; y++) { - for (int x = 0; x < widthG; x++) + for (int x = 0; x < 6; x++) { - int temp = (srcY0Temp[x] >> shift4) - (srcY1Temp[x] >> shift4); - int tempX = (gradX0[x] + gradX1[x]) >> shift5; - int tempY = (gradY0[x] + gradY1[x]) >> shift5; - dotProductTemp1[x] = tempX * tempX; - dotProductTemp2[x] = tempX * tempY; - dotProductTemp3[x] = -tempX * temp; - dotProductTemp5[x] = tempY * tempY; - dotProductTemp6[x] = -tempY * temp; + int tmpGX = (gradX0[x] + gradX1[x]) >> shift5; + int tmpGY = (gradY0[x] + gradY1[x]) >> shift5; + int tmpDI = (int)((srcY1Tmp[x] >> shift4) - (srcY0Tmp[x] >> shift4)); + *sumAbsGX += (tmpGX < 0 ? -tmpGX : tmpGX); + *sumAbsGY += (tmpGY < 0 ? -tmpGY : tmpGY); + *sumDIX += (tmpGX < 0 ? -tmpDI : (tmpGX == 0 ? 0 : tmpDI)); + *sumDIY += (tmpGY < 0 ? -tmpDI : (tmpGY == 0 ? 0 : tmpDI)); + *sumSignGY_GX += (tmpGY < 0 ? -tmpGX : (tmpGY == 0 ? 0 : tmpGX)); + } - srcY0Temp += src0Stride; - srcY1Temp += src1Stride; - gradX0 += gradStride; - gradX1 += gradStride; - gradY0 += gradStride; - gradY1 += gradStride; - dotProductTemp1 += widthG; - dotProductTemp2 += widthG; - dotProductTemp3 += widthG; - dotProductTemp5 += widthG; - dotProductTemp6 += widthG; + srcY1Tmp += src1Stride; + srcY0Tmp += src0Stride; + gradX0 += widthG; + gradX1 += widthG; + gradY0 += widthG; + gradY1 += widthG; } } + void calcBlkGradientCore(int sx, int sy, int *arraysGx2, int *arraysGxGy, int *arraysGxdI, int *arraysGy2, int *arraysGydI, int &sGx2, int &sGy2, int &sGxGy, int &sGxdI, int &sGydI, int width, int height, int unitSize) { int *Gx2 = arraysGx2; @@ -195,12 +216,12 @@ void calcBlkGradientCore(int sx, int sy, int *arraysGx2, int *arraysGxGy } } -#if ENABLE_SIMD_OPT_GBI -void removeWeightHighFreq(int16_t* dst, int dstStride, const int16_t* src, int srcStride, int width, int height, int shift, int gbiWeight) +#if ENABLE_SIMD_OPT_BCW +void removeWeightHighFreq(int16_t* dst, int dstStride, const int16_t* src, int srcStride, int width, int height, int shift, int bcwWeight) { - int normalizer = ((1 << 16) + (gbiWeight > 0 ? (gbiWeight >> 1) : -(gbiWeight >> 1))) / gbiWeight; - int weight0 = normalizer << g_GbiLog2WeightBase; - int weight1 = (g_GbiWeightBase - gbiWeight)*normalizer; + int normalizer = ((1 << 16) + (bcwWeight > 0 ? (bcwWeight >> 1) : -(bcwWeight >> 1))) / bcwWeight; + int weight0 = normalizer << g_BcwLog2WeightBase; + int weight1 = (g_BcwWeightBase - bcwWeight)*normalizer; #define REM_HF_INC \ src += srcStride; \ dst += dstStride; \ @@ -273,25 +294,24 @@ PelBufferOps::PelBufferOps() addBIOAvg4 = addBIOAvgCore; bioGradFilter = gradFilterCore; - calcBIOPar = calcBIOParCore; - calcBlkGradient = calcBlkGradientCore; + calcBIOSums = calcBIOSumsCore; copyBuffer = copyBufferCore; padding = paddingCore; -#if ENABLE_SIMD_OPT_GBI +#if ENABLE_SIMD_OPT_BCW removeWeightHighFreq8 = removeWeightHighFreq; removeWeightHighFreq4 = removeWeightHighFreq; removeHighFreq8 = removeHighFreq; removeHighFreq4 = removeHighFreq; #endif + profGradFilter = gradFilterCore <false>; + applyPROF = applyPROFCore; + roundIntVector = nullptr; } PelBufferOps g_pelBufOP = PelBufferOps(); -#endif -#endif - void copyBufferCore(Pel *src, int srcStride, Pel *dst, int dstStride, int width, int height) { int numBytes = width * sizeof(Pel); @@ -327,11 +347,11 @@ void paddingCore(Pel *ptr, int stride, int width, int height, int padSize) } } template<> -void AreaBuf<Pel>::addWeightedAvg(const AreaBuf<const Pel> &other1, const AreaBuf<const Pel> &other2, const ClpRng& clpRng, const int8_t gbiIdx) +void AreaBuf<Pel>::addWeightedAvg(const AreaBuf<const Pel> &other1, const AreaBuf<const Pel> &other2, const ClpRng& clpRng, const int8_t bcwIdx) { - const int8_t w0 = getGbiWeight(gbiIdx, REF_PIC_LIST_0); - const int8_t w1 = getGbiWeight(gbiIdx, REF_PIC_LIST_1); - const int8_t log2WeightBase = g_GbiLog2WeightBase; + const int8_t w0 = getBcwWeight(bcwIdx, REF_PIC_LIST_0); + const int8_t w1 = getBcwWeight(bcwIdx, REF_PIC_LIST_1); + const int8_t log2WeightBase = g_BcwLog2WeightBase; const Pel* src0 = other1.buf; const Pel* src2 = other2.buf; @@ -407,6 +427,7 @@ void AreaBuf<Pel>::scaleSignal(const int scale, const bool dir, const ClpRng& cl { for (unsigned x = 0; x < width; x++) { + src[x] = (Pel)Clip3((Pel)(-maxAbsclipBD - 1), (Pel)maxAbsclipBD, src[x]); sign = src[x] >= 0 ? 1 : -1; absval = sign * src[x]; int val = sign * ((absval * scale + (1 << (CSCALE_FP_PREC - 1))) >> CSCALE_FP_PREC); @@ -757,3 +778,74 @@ const CPelUnitBuf PelStorage::getBuf( const UnitArea &unit ) const return ( chromaFormat == CHROMA_400 ) ? CPelUnitBuf( chromaFormat, getBuf( unit.Y() ) ) : CPelUnitBuf( chromaFormat, getBuf( unit.Y() ), getBuf( unit.Cb() ), getBuf( unit.Cr() ) ); } +template<> +void UnitBuf<Pel>::colorSpaceConvert(const UnitBuf<Pel> &other, const bool forward) +{ + const Pel* pOrg0 = bufs[COMPONENT_Y].buf; + const Pel* pOrg1 = bufs[COMPONENT_Cb].buf; + const Pel* pOrg2 = bufs[COMPONENT_Cr].buf; + const int strideOrg = bufs[COMPONENT_Y].stride; + + Pel* pDst0 = other.bufs[COMPONENT_Y].buf; + Pel* pDst1 = other.bufs[COMPONENT_Cb].buf; + Pel* pDst2 = other.bufs[COMPONENT_Cr].buf; + const int strideDst = other.bufs[COMPONENT_Y].stride; + + int width = bufs[COMPONENT_Y].width; + int height = bufs[COMPONENT_Y].height; + int r, g, b; + int y0, cg, co; + + CHECK(bufs[COMPONENT_Y].stride != bufs[COMPONENT_Cb].stride || bufs[COMPONENT_Y].stride != bufs[COMPONENT_Cr].stride, "unequal stride for 444 content"); + CHECK(other.bufs[COMPONENT_Y].stride != other.bufs[COMPONENT_Cb].stride || other.bufs[COMPONENT_Y].stride != other.bufs[COMPONENT_Cr].stride, "unequal stride for 444 content"); + CHECK(bufs[COMPONENT_Y].width != other.bufs[COMPONENT_Y].width || bufs[COMPONENT_Y].height != other.bufs[COMPONENT_Y].height, "unequal block size") + + if (forward) + { + for (int y = 0; y < height; y++) + { + for (int x = 0; x < width; x++) + { + r = pOrg2[x]; + g = pOrg0[x]; + b = pOrg1[x]; + + pDst0[x] = (g << 1) + r + b; + pDst1[x] = (g << 1) - r - b; + pDst2[x] = ((r - b) << 1); + pDst0[x] = (pDst0[x] + 2) >> 2; + pDst1[x] = (pDst1[x] + 2) >> 2; + pDst2[x] = (pDst2[x] + 2) >> 2; + } + pOrg0 += strideOrg; + pOrg1 += strideOrg; + pOrg2 += strideOrg; + pDst0 += strideDst; + pDst1 += strideDst; + pDst2 += strideDst; + } + } + else + { + for (int y = 0; y < height; y++) + { + for (int x = 0; x < width; x++) + { + y0 = pOrg0[x]; + cg = pOrg1[x]; + co = pOrg2[x]; + + pDst0[x] = (y0 + cg); + pDst1[x] = (y0 - cg - co); + pDst2[x] = (y0 - cg + co); + } + + pOrg0 += strideOrg; + pOrg1 += strideOrg; + pOrg2 += strideOrg; + pDst0 += strideDst; + pDst1 += strideDst; + pDst2 += strideDst; + } + } +} diff --git a/source/Lib/CommonLib/Buffer.h b/source/Lib/CommonLib/Buffer.h index 4d34cc3d5e873e3854981dec450c733481c40a59..9b461389d48aa12dc181aa7d4dac732a7caca5c2 100644 --- a/source/Lib/CommonLib/Buffer.h +++ b/source/Lib/CommonLib/Buffer.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -51,16 +51,15 @@ // AreaBuf struct // --------------------------------------------------------------------------- -#if ENABLE_SIMD_OPT_BUFFER -#ifdef TARGET_SIMD_X86 - struct PelBufferOps { PelBufferOps(); +#if ENABLE_SIMD_OPT_BUFFER && defined(TARGET_SIMD_X86) void initPelBufOpsX86(); template<X86_VEXT vext> void _initPelBufOpsX86(); +#endif void ( *addAvg4 ) ( const Pel* src0, int src0Stride, const Pel* src1, int src1Stride, Pel *dst, int dstStride, int width, int height, int shift, int offset, const ClpRng& clpRng ); void ( *addAvg8 ) ( const Pel* src0, int src0Stride, const Pel* src1, int src1Stride, Pel *dst, int dstStride, int width, int height, int shift, int offset, const ClpRng& clpRng ); @@ -71,23 +70,23 @@ struct PelBufferOps void(*addBIOAvg4) (const Pel* src0, int src0Stride, const Pel* src1, int src1Stride, Pel *dst, int dstStride, const Pel *gradX0, const Pel *gradX1, const Pel *gradY0, const Pel*gradY1, int gradStride, int width, int height, int tmpx, int tmpy, int shift, int offset, const ClpRng& clpRng); void(*bioGradFilter) (Pel* pSrc, int srcStride, int width, int height, int gradStride, Pel* gradX, Pel* gradY, const int bitDepth); void(*calcBIOPar) (const Pel* srcY0Temp, const Pel* srcY1Temp, const Pel* gradX0, const Pel* gradX1, const Pel* gradY0, const Pel* gradY1, int* dotProductTemp1, int* dotProductTemp2, int* dotProductTemp3, int* dotProductTemp5, int* dotProductTemp6, const int src0Stride, const int src1Stride, const int gradStride, const int widthG, const int heightG, const int bitDepth); + void(*calcBIOSums) (const Pel* srcY0Tmp, const Pel* srcY1Tmp, Pel* gradX0, Pel* gradX1, Pel* gradY0, Pel* gradY1, int xu, int yu, const int src0Stride, const int src1Stride, const int widthG, const int bitDepth, int* sumAbsGX, int* sumAbsGY, int* sumDIX, int* sumDIY, int* sumSignGY_GX); void(*calcBlkGradient)(int sx, int sy, int *arraysGx2, int *arraysGxGy, int *arraysGxdI, int *arraysGy2, int *arraysGydI, int &sGx2, int &sGy2, int &sGxGy, int &sGxdI, int &sGydI, int width, int height, int unitSize); void(*copyBuffer)(Pel *src, int srcStride, Pel *dst, int dstStride, int width, int height); void(*padding)(Pel *dst, int stride, int width, int height, int padSize); -#if ENABLE_SIMD_OPT_GBI - void ( *removeWeightHighFreq8) ( Pel* src0, int src0Stride, const Pel* src1, int src1Stride, int width, int height, int shift, int gbiWeight); - void ( *removeWeightHighFreq4) ( Pel* src0, int src0Stride, const Pel* src1, int src1Stride, int width, int height, int shift, int gbiWeight); +#if ENABLE_SIMD_OPT_BCW + void ( *removeWeightHighFreq8) ( Pel* src0, int src0Stride, const Pel* src1, int src1Stride, int width, int height, int shift, int bcwWeight); + void ( *removeWeightHighFreq4) ( Pel* src0, int src0Stride, const Pel* src1, int src1Stride, int width, int height, int shift, int bcwWeight); void ( *removeHighFreq8) ( Pel* src0, int src0Stride, const Pel* src1, int src1Stride, int width, int height); void ( *removeHighFreq4) ( Pel* src0, int src0Stride, const Pel* src1, int src1Stride, int width, int height); #endif + void (*profGradFilter) (Pel* pSrc, int srcStride, int width, int height, int gradStride, Pel* gradX, Pel* gradY, const int bitDepth); + void (*applyPROF) (Pel* dst, int dstStride, const Pel* src, int srcStride, int width, int height, const Pel* gradX, const Pel* gradY, int gradStride, const int* dMvX, const int* dMvY, int dMvStride, const bool& bi, int shiftNum, Pel offset, const ClpRng& clpRng); + void (*roundIntVector) (int* v, int size, unsigned int nShift, const int dmvLimit); }; extern PelBufferOps g_pelBufOP; -#endif -#endif - - void paddingCore(Pel *ptr, int stride, int width, int height, int padSize); void copyBufferCore(Pel *src, int srcStride, Pel *Dst, int dstStride, int width, int height); @@ -118,8 +117,10 @@ struct AreaBuf : public Size void subtract ( const AreaBuf<const T> &other ); void extendSingleBorderPel(); void extendBorderPel ( unsigned margin ); - void addWeightedAvg ( const AreaBuf<const T> &other1, const AreaBuf<const T> &other2, const ClpRng& clpRng, const int8_t gbiIdx); - void removeWeightHighFreq ( const AreaBuf<T>& other, const bool bClip, const ClpRng& clpRng, const int8_t iGbiWeight); + void extendBorderPel(unsigned marginX, unsigned marginY); + void padBorderPel ( unsigned marginX, unsigned marginY, int dir ); + void addWeightedAvg ( const AreaBuf<const T> &other1, const AreaBuf<const T> &other2, const ClpRng& clpRng, const int8_t bcwIdx); + void removeWeightHighFreq ( const AreaBuf<T>& other, const bool bClip, const ClpRng& clpRng, const int8_t iBcwWeight); void addAvg ( const AreaBuf<const T> &other1, const AreaBuf<const T> &other2, const ClpRng& clpRng ); void removeHighFreq ( const AreaBuf<T>& other, const bool bClip, const ClpRng& clpRng); void updateHistogram ( std::vector<int32_t>& hist ) const; @@ -165,6 +166,11 @@ typedef AreaBuf<const TCoeff> CCoeffBuf; typedef AreaBuf< MotionInfo> MotionBuf; typedef AreaBuf<const MotionInfo> CMotionBuf; +typedef AreaBuf< TCoeff> PLTescapeBuf; +typedef AreaBuf<const TCoeff> CPLTescapeBuf; + +typedef AreaBuf< bool> PLTtypeBuf; +typedef AreaBuf<const bool> CPLTtypeBuf; #define SIZE_AWARE_PER_EL_OP( OP, INC ) \ if( ( width & 7 ) == 0 ) \ @@ -360,6 +366,7 @@ void AreaBuf<T>::subtract( const AreaBuf<const T> &other ) #undef SUBS_INC } + template<typename T> void AreaBuf<T>::copyClip( const AreaBuf<const T> &src, const ClpRng& clpRng ) { @@ -407,10 +414,10 @@ template<> void AreaBuf<Pel>::toLast( const ClpRng& clpRng ); template<typename T> -void AreaBuf<T>::removeWeightHighFreq(const AreaBuf<T>& other, const bool bClip, const ClpRng& clpRng, const int8_t gbiWeight) +void AreaBuf<T>::removeWeightHighFreq(const AreaBuf<T>& other, const bool bClip, const ClpRng& clpRng, const int8_t bcwWeight) { - const int8_t gbiWeightOther = g_GbiWeightBase - gbiWeight; - const int8_t log2WeightBase = g_GbiLog2WeightBase; + const int8_t bcwWeightOther = g_BcwWeightBase - bcwWeight; + const int8_t log2WeightBase = g_BcwLog2WeightBase; const Pel* src = other.buf; const int srcStride = other.stride; @@ -418,22 +425,22 @@ void AreaBuf<T>::removeWeightHighFreq(const AreaBuf<T>& other, const bool bClip, Pel* dst = buf; const int dstStride = stride; -#if ENABLE_SIMD_OPT_GBI +#if ENABLE_SIMD_OPT_BCW if(!bClip) { if(!(width & 7)) - g_pelBufOP.removeWeightHighFreq8(dst, dstStride, src, srcStride, width, height, 16, gbiWeight); + g_pelBufOP.removeWeightHighFreq8(dst, dstStride, src, srcStride, width, height, 16, bcwWeight); else if(!(width & 3)) - g_pelBufOP.removeWeightHighFreq4(dst, dstStride, src, srcStride, width, height, 16, gbiWeight); + g_pelBufOP.removeWeightHighFreq4(dst, dstStride, src, srcStride, width, height, 16, bcwWeight); else CHECK(true, "Not supported"); } else { #endif - int normalizer = ((1 << 16) + (gbiWeight > 0 ? (gbiWeight >> 1) : -(gbiWeight >> 1))) / gbiWeight; + int normalizer = ((1 << 16) + (bcwWeight > 0 ? (bcwWeight >> 1) : -(bcwWeight >> 1))) / bcwWeight; int weight0 = normalizer << log2WeightBase; - int weight1 = gbiWeightOther * normalizer; + int weight1 = bcwWeightOther * normalizer; #define REM_HF_INC \ src += srcStride; \ dst += dstStride; \ @@ -453,7 +460,7 @@ void AreaBuf<T>::removeWeightHighFreq(const AreaBuf<T>& other, const bool bClip, #undef REM_HF_INC #undef REM_HF_OP #undef REM_HF_OP_CLIP -#if ENABLE_SIMD_OPT_GBI +#if ENABLE_SIMD_OPT_BCW } #endif } @@ -467,7 +474,7 @@ void AreaBuf<T>::removeHighFreq( const AreaBuf<T>& other, const bool bClip, cons T* dst = buf; const int dstStride = stride; -#if ENABLE_SIMD_OPT_GBI +#if ENABLE_SIMD_OPT_BCW if (!bClip) { if(!(width & 7)) @@ -501,7 +508,7 @@ void AreaBuf<T>::removeHighFreq( const AreaBuf<T>& other, const bool bClip, cons #undef REM_HF_OP #undef REM_HF_OP_CLIP -#if ENABLE_SIMD_OPT_GBI +#if ENABLE_SIMD_OPT_BCW } #endif } @@ -520,6 +527,82 @@ void AreaBuf<T>::updateHistogram( std::vector<int32_t>& hist ) const } } +template<typename T> +void AreaBuf<T>::extendBorderPel(unsigned marginX, unsigned marginY) +{ + T* p = buf; + int h = height; + int w = width; + int s = stride; + + CHECK((w + 2 * marginX) > s, "Size of buffer too small to extend"); + // do left and right margins + for (int y = 0; y < h; y++) + { + for (int x = 0; x < marginX; x++) + { + *(p - marginX + x) = p[0]; + p[w + x] = p[w - 1]; + } + p += s; + } + + // p is now the (0,height) (bottom left of image within bigger picture + p -= (s + marginX); + // p is now the (-margin, height-1) + for (int y = 0; y < marginY; y++) + { + ::memcpy(p + (y + 1) * s, p, sizeof(T) * (w + (marginX << 1))); + } + + // p is still (-marginX, height-1) + p -= ((h - 1) * s); + // p is now (-marginX, 0) + for (int y = 0; y < marginY; y++) + { + ::memcpy(p - (y + 1) * s, p, sizeof(T) * (w + (marginX << 1))); + } +} + +template<typename T> +void AreaBuf<T>::padBorderPel( unsigned marginX, unsigned marginY, int dir ) +{ + T* p = buf; + int s = stride; + int h = height; + int w = width; + + CHECK( w > s, "Size of buffer too small to extend" ); + + // top-left margin + if ( dir == 1 ) + { + for( int y = 0; y < marginY; y++ ) + { + for( int x = 0; x < marginX; x++ ) + { + p[x] = p[marginX]; + } + p += s; + } + } + + // bottom-right margin + if ( dir == 2 ) + { + p = buf + s * ( h - marginY ) + w - marginX; + + for( int y = 0; y < marginY; y++ ) + { + for( int x = 0; x < marginX; x++ ) + { + p[x] = p[-1]; + } + p += s; + } + } +} + template<typename T> void AreaBuf<T>::extendBorderPel( unsigned margin ) { @@ -680,20 +763,23 @@ struct UnitBuf const AreaBuf<T>& Cr() const { return bufs[2]; } void fill ( const T &val ); - void copyFrom ( const UnitBuf<const T> &other ); + void copyFrom ( const UnitBuf<const T> &other, const bool lumaOnly = false, const bool chromaOnly = false ); void reconstruct ( const UnitBuf<const T> &pred, const UnitBuf<const T> &resi, const ClpRngs& clpRngs ); - void copyClip ( const UnitBuf<const T> &src, const ClpRngs& clpRngs ); + void copyClip ( const UnitBuf<const T> &src, const ClpRngs& clpRngs, const bool lumaOnly = false, const bool chromaOnly = false ); void subtract ( const UnitBuf<const T> &other ); - void addWeightedAvg ( const UnitBuf<const T> &other1, const UnitBuf<const T> &other2, const ClpRngs& clpRngs, const uint8_t gbiIdx = GBI_DEFAULT, const bool chromaOnly = false, const bool lumaOnly = false); + void addWeightedAvg ( const UnitBuf<const T> &other1, const UnitBuf<const T> &other2, const ClpRngs& clpRngs, const uint8_t bcwIdx = BCW_DEFAULT, const bool chromaOnly = false, const bool lumaOnly = false); void addAvg ( const UnitBuf<const T> &other1, const UnitBuf<const T> &other2, const ClpRngs& clpRngs, const bool chromaOnly = false, const bool lumaOnly = false); void extendSingleBorderPel(); + void extendBorderPel(unsigned marginX, unsigned marginY); + void padBorderPel ( unsigned margin, int dir ); void extendBorderPel ( unsigned margin ); void removeHighFreq ( const UnitBuf<T>& other, const bool bClip, const ClpRngs& clpRngs - , const int8_t gbiWeight = g_GbiWeights[GBI_DEFAULT] + , const int8_t bcwWeight = g_BcwWeights[BCW_DEFAULT] ); UnitBuf< T> subBuf (const UnitArea& subArea); const UnitBuf<const T> subBuf (const UnitArea& subArea) const; + void colorSpaceConvert(const UnitBuf<T> &other, const bool forward); }; typedef UnitBuf< Pel> PelUnitBuf; @@ -712,11 +798,14 @@ void UnitBuf<T>::fill( const T &val ) } template<typename T> -void UnitBuf<T>::copyFrom( const UnitBuf<const T> &other ) +void UnitBuf<T>::copyFrom(const UnitBuf<const T> &other, const bool lumaOnly, const bool chromaOnly ) { CHECK( chromaFormat != other.chromaFormat, "Incompatible formats" ); - for( unsigned i = 0; i < bufs.size(); i++ ) + CHECK( lumaOnly && chromaOnly, "Not allowed to have both lumaOnly and chromaOnly selected" ); + const size_t compStart = chromaOnly ? 1 : 0; + const size_t compEnd = lumaOnly ? 1 : (unsigned) bufs.size(); + for( size_t i = compStart; i < compEnd; i++ ) { bufs[i].copyFrom( other.bufs[i] ); } @@ -736,11 +825,14 @@ void UnitBuf<T>::subtract( const UnitBuf<const T> &other ) } template<typename T> -void UnitBuf<T>::copyClip(const UnitBuf<const T> &src, const ClpRngs& clpRngs) +void UnitBuf<T>::copyClip(const UnitBuf<const T> &src, const ClpRngs &clpRngs, const bool lumaOnly, const bool chromaOnly ) { CHECK( chromaFormat != src.chromaFormat, "Incompatible formats" ); - for( unsigned i = 0; i < bufs.size(); i++ ) + CHECK( lumaOnly && chromaOnly, "Not allowed to have both lumaOnly and chromaOnly selected" ); + const size_t compStart = chromaOnly ? 1 : 0; + const size_t compEnd = lumaOnly ? 1 : bufs.size(); + for( size_t i = compStart; i < compEnd; i++ ) { bufs[i].copyClip( src.bufs[i], clpRngs.comp[i] ); } @@ -760,7 +852,7 @@ void UnitBuf<T>::reconstruct(const UnitBuf<const T> &pred, const UnitBuf<const T } template<typename T> -void UnitBuf<T>::addWeightedAvg(const UnitBuf<const T> &other1, const UnitBuf<const T> &other2, const ClpRngs& clpRngs, const uint8_t gbiIdx /* = GBI_DEFAULT */, const bool chromaOnly /* = false */, const bool lumaOnly /* = false */) +void UnitBuf<T>::addWeightedAvg(const UnitBuf<const T> &other1, const UnitBuf<const T> &other2, const ClpRngs& clpRngs, const uint8_t bcwIdx /* = BCW_DEFAULT */, const bool chromaOnly /* = false */, const bool lumaOnly /* = false */) { const size_t istart = chromaOnly ? 1 : 0; const size_t iend = lumaOnly ? 1 : bufs.size(); @@ -769,7 +861,7 @@ void UnitBuf<T>::addWeightedAvg(const UnitBuf<const T> &other1, const UnitBuf<co for(size_t i = istart; i < iend; i++) { - bufs[i].addWeightedAvg(other1.bufs[i], other2.bufs[i], clpRngs.comp[i], gbiIdx); + bufs[i].addWeightedAvg(other1.bufs[i], other2.bufs[i], clpRngs.comp[i], bcwIdx); } } @@ -787,6 +879,15 @@ void UnitBuf<T>::addAvg(const UnitBuf<const T> &other1, const UnitBuf<const T> & } } +template<typename T> +void UnitBuf<T>::colorSpaceConvert(const UnitBuf<T> &other, const bool forward) +{ + THROW("Type not supported"); +} + +template<> +void UnitBuf<Pel>::colorSpaceConvert(const UnitBuf<Pel> &other, const bool forward); + template<typename T> void UnitBuf<T>::extendSingleBorderPel() { @@ -796,6 +897,24 @@ void UnitBuf<T>::extendSingleBorderPel() } } +template<typename T> +void UnitBuf<T>::extendBorderPel(unsigned marginX, unsigned marginY) +{ + for (unsigned i = 0; i < bufs.size(); i++) + { + bufs[i].extendBorderPel(marginX >> getComponentScaleX(ComponentID(i), chromaFormat), marginY >> getComponentScaleY(ComponentID(i), chromaFormat)); + } +} + +template<typename T> +void UnitBuf<T>::padBorderPel( unsigned margin, int dir ) +{ + for( unsigned i = 0; i < bufs.size(); i++ ) + { + bufs[i].padBorderPel( margin >> getComponentScaleX( ComponentID( i ), chromaFormat ), margin >> getComponentScaleY( ComponentID( i ), chromaFormat ), dir ); + } +} + template<typename T> void UnitBuf<T>::extendBorderPel( unsigned margin ) { @@ -807,12 +926,12 @@ void UnitBuf<T>::extendBorderPel( unsigned margin ) template<typename T> void UnitBuf<T>::removeHighFreq( const UnitBuf<T>& other, const bool bClip, const ClpRngs& clpRngs - , const int8_t gbiWeight + , const int8_t bcwWeight ) { - if(gbiWeight != g_GbiWeights[GBI_DEFAULT]) + if(bcwWeight != g_BcwWeights[BCW_DEFAULT]) { - bufs[0].removeWeightHighFreq(other.bufs[0], bClip, clpRngs.comp[0], gbiWeight); + bufs[0].removeWeightHighFreq(other.bufs[0], bClip, clpRngs.comp[0], bcwWeight); return; } bufs[0].removeHighFreq(other.bufs[0], bClip, clpRngs.comp[0]); @@ -885,5 +1004,25 @@ private: Pel *m_origin[MAX_NUM_COMPONENT]; }; +struct CompStorage : public PelBuf +{ + CompStorage () { m_memory = nullptr; } + ~CompStorage() { if (valid()) delete [] m_memory; } + + void create( const Size& size ) + { + CHECK( m_memory, "Trying to re-create an already initialized buffer" ); + m_memory = new Pel [ size.area() ]; + *static_cast<PelBuf*>(this) = PelBuf( m_memory, size ); + } + void destroy() + { + if (valid()) delete [] m_memory; + m_memory = nullptr; + } + bool valid() { return m_memory != nullptr; } +private: + Pel* m_memory; +}; #endif diff --git a/source/Lib/CommonLib/CMakeLists.txt b/source/Lib/CommonLib/CMakeLists.txt index 06cb4088c813a94e400fb9379486eb14fc5050f1..b12307342f0d454321099a2e3c532f01921a3ff1 100644 --- a/source/Lib/CommonLib/CMakeLists.txt +++ b/source/Lib/CommonLib/CMakeLists.txt @@ -51,6 +51,10 @@ if( EXTENSION_360_VIDEO ) target_compile_definitions( ${LIB_NAME} PUBLIC EXTENSION_360_VIDEO=1 ) endif() +if( EXTENSION_HDRTOOLS ) + target_compile_definitions( ${LIB_NAME} PUBLIC EXTENSION_HDRTOOLS=1 ) +endif() + if( SET_ENABLE_TRACING ) if( ENABLE_TRACING ) target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_TRACING=1 ) diff --git a/source/Lib/CommonLib/CacheModel.h b/source/Lib/CommonLib/CacheModel.h index 094390a6379e761613b236b089e6072a80b84c2a..1150c24e0262c4a7be046b6f648de21c92d3867b 100644 --- a/source/Lib/CommonLib/CacheModel.h +++ b/source/Lib/CommonLib/CacheModel.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/source/Lib/CommonLib/ChromaFormat.cpp b/source/Lib/CommonLib/ChromaFormat.cpp index 4bccf4336aeb17fb1f5e34c0da2191ad762e4dbc..9a56d89e33ad88e808a2ab60ba34b7845e2bad69 100644 --- a/source/Lib/CommonLib/ChromaFormat.cpp +++ b/source/Lib/CommonLib/ChromaFormat.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/source/Lib/CommonLib/ChromaFormat.h b/source/Lib/CommonLib/ChromaFormat.h index 2922914b4d3ab54838f1b976b3400c90b0992c86..14bc517bd07346aef7bbae95b5fb89884c9d9c8a 100644 --- a/source/Lib/CommonLib/ChromaFormat.h +++ b/source/Lib/CommonLib/ChromaFormat.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -106,37 +106,23 @@ static inline uint64_t getTotalFracBits(const uint32_t width, const uint32_t hei //Intra prediction ==================================================================================================== //====================================================================================================================== -static inline bool filterIntraReferenceSamples (const ChannelType chType, const ChromaFormat chFmt, const bool intraReferenceSmoothingDisabled) -{ - return (!intraReferenceSmoothingDisabled) && (isLuma(chType) || (chFmt == CHROMA_444)); -} - - //------------------------------------------------ static inline int getTransformShift(const int channelBitDepth, const Size size, const int maxLog2TrDynamicRange) { - return maxLog2TrDynamicRange - channelBitDepth - ( ( g_aucLog2[size.width] + g_aucLog2[size.height] ) >> 1 ); + return maxLog2TrDynamicRange - channelBitDepth - ( ( floorLog2(size.width) + floorLog2(size.height) ) >> 1 ); } //------------------------------------------------ -static inline int getScaledChromaQP(int unscaledChromaQP, const ChromaFormat chFmt) -{ - return g_aucChromaScale[chFmt][Clip3(0, (chromaQPMappingTableSize - 1), unscaledChromaQP)]; -} - - -#if HEVC_USE_SCALING_LISTS //====================================================================================================================== //Scaling lists ======================================================================================================= //====================================================================================================================== static inline int getScalingListType(const PredMode predMode, const ComponentID compID) { - return ((predMode != MODE_INTER) ? 0 : MAX_NUM_COMPONENT) + MAP_CHROMA(compID); + return ((predMode == MODE_INTRA) ? 0 : MAX_NUM_COMPONENT) + MAP_CHROMA(compID); } -#endif #endif diff --git a/source/Lib/CommonLib/CodingStatistics.h b/source/Lib/CommonLib/CodingStatistics.h index 1a47050e37f336b7209d64809ef3940f6111d3c0..375ed6ec212e215c9472c0daca7185bde3b8a6f7 100644 --- a/source/Lib/CommonLib/CodingStatistics.h +++ b/source/Lib/CommonLib/CodingStatistics.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -58,6 +58,7 @@ enum CodingStatisticsType STATS__CABAC_BITS__MERGE_INDEX, STATS__CABAC_BITS__MVP_IDX, STATS__CABAC_BITS__SPLIT_FLAG, + STATS__CABAC_BITS__MODE_CONSTRAINT_FLAG, STATS__CABAC_BITS__PART_SIZE, STATS__CABAC_BITS__PRED_MODE, STATS__CABAC_BITS__INTRA_DIR_ANG, @@ -76,6 +77,7 @@ enum CodingStatisticsType STATS__CABAC_BITS__CHROMA_QP_ADJUSTMENT, STATS__CABAC_BITS__QT_CBF, STATS__CABAC_BITS__CROSS_COMPONENT_PREDICTION, + STATS__CABAC_BITS__JOINT_CB_CR, STATS__CABAC_BITS__MTS_FLAGS, STATS__CABAC_BITS__LAST_SIG_X_Y, STATS__CABAC_BITS__SIG_COEFF_GROUP_FLAG, @@ -85,12 +87,19 @@ enum CodingStatisticsType STATS__CABAC_BITS__GT2_FLAG, STATS__CABAC_BITS__SIGN_BIT, STATS__CABAC_BITS__ESCAPE_BITS, +#if TR_ONLY_COEFF_STATS + STATS__CABAC_BITS__SIG_COEFF_MAP_FLAG_TS, + STATS__CABAC_BITS__PAR_FLAG_TS, + STATS__CABAC_BITS__GT1_FLAG_TS, + STATS__CABAC_BITS__GT2_FLAG_TS, + STATS__CABAC_BITS__SIGN_BIT_TS, + STATS__CABAC_BITS__ESCAPE_BITS_TS, +#endif STATS__CABAC_BITS__SAO, + STATS__CABAC_BITS__LFNST, STATS__CABAC_BITS__ALF, STATS__CABAC_TRM_BITS, STATS__CABAC_FIXED_BITS, - STATS__CABAC_PCM_ALIGN_BITS, - STATS__CABAC_PCM_CODE_BITS, STATS__BYTE_ALIGNMENT_BITS, STATS__TRAILING_BITS, STATS__EXPLICIT_RDPCM_BITS, @@ -100,16 +109,18 @@ enum CodingStatisticsType STATS__CABAC_BITS__OTHER, STATS__CABAC_BITS__INVALID, STATS__CABAC_BITS__IMV_FLAG, - STATS__CABAC_BITS__GBI_IDX, + STATS__CABAC_BITS__BCW_IDX, STATS__CABAC_BITS__SBT_MODE, STATS__CABAC_BITS__MH_INTRA_FLAG, STATS__CABAC_BITS__TRIANGLE_FLAG, STATS__CABAC_BITS__TRIANGLE_INDEX, STATS__CABAC_BITS__MULTI_REF_LINE, STATS__CABAC_BITS__SYMMVD_FLAG, + STATS__CABAC_BITS__BDPCM_MODE, STATS__TOOL_TOTAL_FRAME,// This is a special case and is not included in the report. STATS__TOOL_AFF, STATS__TOOL_EMT, + STATS__TOOL_LFNST, STATS__TOOL_TOTAL, STATS__NUM_STATS }; @@ -139,6 +150,7 @@ static inline const char* getName(CodingStatisticsType name) "CABAC_BITS__MERGE_INDEX", "CABAC_BITS__MVP_IDX", "CABAC_BITS__SPLIT_FLAG", + "CABAC_BITS__MODE_CONSTRAINT_FLAG", "CABAC_BITS__PART_SIZE", "CABAC_BITS__PRED_MODE", "CABAC_BITS__INTRA_DIR_ANG", @@ -157,6 +169,7 @@ static inline const char* getName(CodingStatisticsType name) "CABAC_BITS__CHROMA_QP_ADJUSTMENT", "CABAC_BITS__QT_CBF", "CABAC_BITS__CROSS_COMPONENT_PREDICTION", + "CABAC_BITS__JOINT_CB_CR", "CABAC_BITS__MTS_FLAGS", "CABAC_BITS__LAST_SIG_X_Y", "CABAC_BITS__SIG_COEFF_GROUP_FLAG", @@ -166,12 +179,19 @@ static inline const char* getName(CodingStatisticsType name) "CABAC_BITS__GT2_FLAG", "CABAC_BITS__SIGN_BIT", "CABAC_BITS__ESCAPE_BITS", +#if TR_ONLY_COEFF_STATS + "CABAC_BITS__SIG_COEFF_MAP_FLAG_TS", + "CABAC_BITS__PAR_FLAG_TS", + "CABAC_BITS__GT1_FLAG_TS", + "CABAC_BITS__GT2_FLAG_TS", + "CABAC_BITS__SIGN_BIT_TS", + "CABAC_BITS__ESCAPE_BITS_TS", +#endif "CABAC_BITS__SAO", + "CABAC_BITS__LFNST", "CABAC_BITS__ALF", "CABAC_TRM_BITS", "CABAC_FIXED_BITS", - "CABAC_PCM_ALIGN_BITS", - "CABAC_PCM_CODE_BITS", "BYTE_ALIGNMENT_BITS", "TRAILING_BITS", "EXPLICIT_RDPCM_BITS", @@ -181,16 +201,18 @@ static inline const char* getName(CodingStatisticsType name) "CABAC_BITS__OTHER", "CABAC_BITS__INVALID", "CABAC_BITS__IMV_FLAG", - "CABAC_BITS__GBI_IDX", + "CABAC_BITS__BCW_IDX", "CABAC_BITS__SBT_MODE", "CABAC_BITS__MH_INTRA_FLAG", "CABAC_BITS__TRIANGLE_FLAG", "CABAC_BITS__TRIANGLE_INDEX", "CABAC_BITS__MULTI_REF_LINE", "CABAC_BITS__SYMMVD_FLAG", + "CABAC_BITS__BDPCM_MODE", "TOOL_FRAME", "TOOL_AFFINE", "TOOL_EMT", + "TOOL_LFNST", "TOOL_TOTAL" }; CHECK( STATS__NUM_STATS != sizeof( statNames ) / sizeof( char* ) || name >= STATS__NUM_STATS, "stats out of range" ); @@ -301,6 +323,13 @@ public: { bits += src.bits; count += src.count; sum += src.sum; classCount += src.classCount; return *this; } + +#if RExt__DECODER_DEBUG_TOOL_MAX_FRAME_STATS + SStat &operator-=(const SStat &src) + { + bits -= src.bits; count -= src.count; sum -= src.sum; classCount -= src.classCount; return *this; + } +#endif }; struct StatTool @@ -319,12 +348,45 @@ public: } }; +#if RExt__DECODER_DEBUG_TOOL_MAX_FRAME_STATS + struct SStat_max + { + SStat max_CABAC_state; + SStat max_EP_state; + SStat trf_CABAC_state; + SStat trf_EP_state; + SStat acc_trf_CABAC_state; + SStat acc_trf_EP_state; + SStat prev_CABAC_state; + SStat prev_EP_state; + SStat prev_trf_CABAC_state; + SStat prev_trf_EP_state; + + void clear() + { + max_CABAC_state.clear(); + max_EP_state.clear(); + trf_CABAC_state.clear(); + trf_EP_state.clear(); + acc_trf_CABAC_state.clear(); + acc_trf_EP_state.clear(); + prev_CABAC_state.clear(); + prev_EP_state.clear(); + prev_trf_CABAC_state.clear(); + prev_trf_EP_state.clear(); + } + }; +#endif + class CodingStatisticsData { private: SStat statistics [STATS__NUM_STATS + 1][CODING_STATS_NUM_SUBCLASSES]; SStat statistics_ep [STATS__NUM_STATS + 1][CODING_STATS_NUM_SUBCLASSES]; StatTool statistics_tool [STATS__NUM_STATS + 1][CODING_STATS_NUM_SUBCLASSES]; +#if RExt__DECODER_DEBUG_TOOL_MAX_FRAME_STATS + SStat_max statistics_max; +#endif std::map<std::string, SStat> mappings_ep; friend class CodingStatistics; }; @@ -421,6 +483,10 @@ private: int64_t classCounts[STATS__NUM_STATS]; std::fill_n( classCounts, ( size_t ) STATS__NUM_STATS, 0 ); +#if RExt__DECODER_DEBUG_TOOL_MAX_FRAME_STATS + SStat_max &max = GetStatisticMax(); +#endif + int64_t cr = 0; // CABAC remainder, which is added to "STATS__CABAC_INITIALISATION" { int64_t totalCABACbits = 0, roundedCABACbits = 0; @@ -486,6 +552,9 @@ private: if( i == STATS__CABAC_INITIALISATION && sCABACorig.bits != 0 ) { thisCABACbits += cr; +#if EPBINCOUNT_FIX + sCABACorig.count = 0; +#endif cr = 0; } sCABAC.bits = thisCABACbits; @@ -493,6 +562,12 @@ private: sCABAC.sum = sCABACorig.sum; sCABAC.classCount = classCounts[i]; } +#if EPBINCOUNT_FIX + if (i == STATS__BYTE_ALIGNMENT_BITS || i == STATS__TRAILING_BITS || i == STATS__NAL_UNIT_HEADER_BITS || i == STATS__EMULATION_PREVENTION_3_BYTES) + { + sEP.count = 0; + } +#endif uint32_t wIdx = CodingStatisticsClassType::GetSubClassWidth( c ); uint32_t hIdx = CodingStatisticsClassType::GetSubClassHeight( c ); OutputLine( pName, ':', wIdx, hIdx, CodingStatisticsClassType::GetSubClassString( c ), sCABAC, sEP ); @@ -514,6 +589,18 @@ private: { cabacSubTotal.classCount = classCounts[i]; OutputLine( pName, '~', "~~ST~~", "~~ST~~", "~~ST~~", cabacSubTotal, epSubTotal ); + +#if RExt__DECODER_DEBUG_TOOL_MAX_FRAME_STATS + // For TRF + if ((i == STATS__CABAC_BITS__SIG_COEFF_MAP_FLAG) || (i == STATS__CABAC_BITS__PAR_FLAG) + || (i == STATS__CABAC_BITS__GT1_FLAG) || (i == STATS__CABAC_BITS__GT2_FLAG) + || (i == STATS__CABAC_BITS__ESCAPE_BITS)) + { + max.acc_trf_CABAC_state += cabacSubTotal; + max.acc_trf_EP_state += epSubTotal; + } +#endif + } if( i == STATS__NAL_UNIT_TOTAL_BODY ) { @@ -596,6 +683,12 @@ private: OutputDashedLine( "GRAND TOTAL" ); epTotalBits += cavlcTotalBits; OutputLine ( "TOTAL", '~', "~~GT~~", "~~GT~~", "~~GT~~", cabacTotalBits, epTotalBits ); +#if RExt__DECODER_DEBUG_TOOL_MAX_FRAME_STATS + OutputDashedLine(""); + OutputLine("CABAC MAX FRAME stat", '~', "~~ST~~", "~~ST~~", "~~ST~~", max.max_CABAC_state, max.max_EP_state); + OutputLine("CABAC MAX FRAME TRF stat", '~', "~~ST~~", "~~ST~~", "~~ST~~", max.trf_CABAC_state, max.trf_EP_state); + OutputLine("CABAC Accumulated TRF stat", '~', "~~ST~~", "~~ST~~", "~~ST~~", max.acc_trf_CABAC_state, max.acc_trf_EP_state); +#endif } void OutputToolStats() @@ -704,6 +797,10 @@ public: static StatTool &GetStatisticTool ( const CodingStatisticsClassType &stat ) { return GetSingletonInstance().data.statistics_tool[stat.type][stat.subClass]; } +#if RExt__DECODER_DEBUG_TOOL_MAX_FRAME_STATS + static SStat_max &GetStatisticMax() { return GetSingletonInstance().data.statistics_max; } +#endif + static int getNumOnes( int bins ) { CHECK( bins < 0, "Bins should not be nagative" ); @@ -722,7 +819,11 @@ public: CHECK( stat.type == STATS__CABAC_BITS__INVALID, "Should never be used." ); SStat &s = GetStatisticEP( stat ); s.bits += numBits; +#if EPBINCOUNT_FIX + s.count += numBits; +#else s.count++; +#endif s.sum += getNumOnes( value ); } @@ -730,7 +831,11 @@ public: { SStat &s = GetStatisticEP( str ); s.bits += numBits; +#if EPBINCOUNT_FIX + s.count += numBits; +#else s.count++; +#endif s.sum += getNumOnes( value ); } @@ -738,7 +843,11 @@ public: { SStat &s = GetStatisticEP( pKey ); s.bits += numBits; +#if EPBINCOUNT_FIX + s.count += numBits; +#else s.count++; +#endif s.sum += getNumOnes( value ); } @@ -768,6 +877,132 @@ public: s.count++; s.sum += val; } + +#if RExt__DECODER_DEBUG_TOOL_MAX_FRAME_STATS + static void UpdateMaxStat(CodingStatisticsData *data) + { + SStat_max & max = GetStatisticMax(); + const int64_t es = CODINGSTATISTICS_ENTROPYSCALE; + + int64_t countTotal = 0; + int64_t classCounts[STATS__NUM_STATS]; + std::fill_n(classCounts, (size_t) STATS__NUM_STATS, 0); + + int64_t cr = 0; // CABAC remainder, which is added to "STATS__CABAC_INITIALISATION" + + int64_t totalCABACbits = 0, roundedCABACbits = 0; + for (int i = STATS__NAL_UNIT_PACKING; i < STATS__NUM_STATS; i++) + { + int64_t classCount = 0; + + for (uint32_t c = 0; c < CODING_STATS_NUM_SUBCLASSES; c++) + { + totalCABACbits += data->statistics[i][c].bits; + roundedCABACbits += data->statistics[i][c].bits / es; + classCount += data->statistics[i][c].count; + } + + for (uint32_t c = 0; c < CODING_STATS_NUM_SUBCLASSES; c++) + { + data->statistics[i][c].classCount = classCount; + } + + classCounts[i] = classCount; + countTotal += classCount; + } + int64_t remainder = totalCABACbits - roundedCABACbits * es; + cr = (remainder + es / 2) / es; + + classCounts[0] = countTotal; + + SStat cabacTotalBits, epTotalBits, cabacTrfTotalBits, epTrfTotalBits; + + cabacTotalBits.classCount = countTotal; + epTotalBits.classCount = countTotal; + cabacTrfTotalBits.classCount = countTotal; + epTrfTotalBits.classCount = countTotal; + + // Calculate the actual bin and bit count + for (int i = 0; i < STATS__NUM_STATS; i++) + { + for (uint32_t c = 0; c < CODING_STATS_NUM_SUBCLASSES; c++) + { + SStat &sCABACorig = data->statistics[i][c]; + SStat &sEP = data->statistics_ep[i][c]; + + if (sCABACorig.bits == 0 && sEP.bits == 0) + { + continue; + } + + SStat sCABAC; + { + int64_t thisCABACbits = sCABACorig.bits / es; + if (i == STATS__CABAC_INITIALISATION && sCABACorig.bits != 0) + { + thisCABACbits += cr; +#if EPBINCOUNT_FIX + sCABACorig.count = 0; +#endif + cr = 0; + } + sCABAC.bits = thisCABACbits; + sCABAC.count = sCABACorig.count; + sCABAC.sum = sCABACorig.sum; + sCABAC.classCount = classCounts[i]; + } +#if EPBINCOUNT_FIX + if ( i == STATS__BYTE_ALIGNMENT_BITS || i == STATS__TRAILING_BITS || i == STATS__NAL_UNIT_HEADER_BITS || i == STATS__EMULATION_PREVENTION_3_BYTES ) + { + sEP.count = 0; + } +#endif + + if( i != STATS__NAL_UNIT_TOTAL_BODY ) + { + cabacTotalBits += sCABAC; + epTotalBits += sEP; + + // For TRF + if ((i == STATS__CABAC_BITS__SIG_COEFF_MAP_FLAG) || (i == STATS__CABAC_BITS__PAR_FLAG) + || (i == STATS__CABAC_BITS__GT1_FLAG) || (i == STATS__CABAC_BITS__GT2_FLAG) + || (i == STATS__CABAC_BITS__ESCAPE_BITS)) + { + cabacTrfTotalBits += sCABAC; + epTrfTotalBits += sEP; + } + } + } + } + + SStat delta_CABAC = cabacTotalBits; + SStat delta_EP = epTotalBits; + SStat delta_trf_CABAC = cabacTrfTotalBits; + SStat delta_trf_EP = epTrfTotalBits; + + delta_CABAC -= max.prev_CABAC_state; + delta_EP -= max.prev_EP_state; + + delta_trf_CABAC -= max.prev_trf_CABAC_state; + delta_trf_EP -= max.prev_trf_EP_state; + int64_t max_frame_bins = EPBIN_WEIGHT_FACTOR * max.max_CABAC_state.count + max.max_EP_state.count; + int64_t cur_frame_bins = EPBIN_WEIGHT_FACTOR * delta_CABAC.count + delta_EP.count; + + if (cur_frame_bins > max_frame_bins) + { + max.max_CABAC_state = delta_CABAC; + max.max_EP_state = delta_EP; + max.trf_CABAC_state = delta_trf_CABAC; + max.trf_EP_state = delta_trf_EP; + } + + max.prev_CABAC_state = cabacTotalBits; + max.prev_EP_state = epTotalBits; + + max.prev_trf_CABAC_state = cabacTrfTotalBits; + max.prev_trf_EP_state = epTrfTotalBits; + } +#endif }; #endif diff --git a/source/Lib/CommonLib/CodingStructure.cpp b/source/Lib/CommonLib/CodingStructure.cpp index d45ca4e86b9575af64fe7806abd2d7738d985259..336daef4fdb05d365d34eff86c583b47ac09e3c8 100644 --- a/source/Lib/CommonLib/CodingStructure.cpp +++ b/source/Lib/CommonLib/CodingStructure.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * -* Copyright (c) 2010-2019, ITU/ISO/IEC +* Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -67,15 +67,23 @@ CodingStructure::CodingStructure(CUCache& cuCache, PUCache& puCache, TUCache& tu , m_cuCache ( cuCache ) , m_puCache ( puCache ) , m_tuCache ( tuCache ) + , bestParent ( nullptr ) + , tmpColorSpaceCost(MAX_DOUBLE) + , firstColorSpaceSelected(true) + , resetIBCBuffer (false) { for( uint32_t i = 0; i < MAX_NUM_COMPONENT; i++ ) { m_coeffs[ i ] = nullptr; m_pcmbuf[ i ] = nullptr; - m_offsets[ i ] = 0; } + for (uint32_t i = 0; i < MAX_NUM_CHANNEL_TYPE; i++) + { + m_runType[i] = nullptr; + } + for( uint32_t i = 0; i < MAX_NUM_CHANNEL_TYPE; i++ ) { m_cuIdx [ i ] = nullptr; @@ -86,7 +94,11 @@ CodingStructure::CodingStructure(CUCache& cuCache, PUCache& puCache, TUCache& tu m_motionBuf = nullptr; features.resize( NUM_ENC_FEATURES ); - + treeType = TREE_D; + modeType = MODE_TYPE_ALL; + tmpColorSpaceIntraCost[0] = MAX_DOUBLE; + tmpColorSpaceIntraCost[1] = MAX_DOUBLE; + firstColorSpaceTestOnly = false; } void CodingStructure::destroy() @@ -183,14 +195,96 @@ void CodingStructure::setDecomp(const UnitArea &_area, const bool _isCoded /*= t } } +const int CodingStructure::signalModeCons( const PartSplit split, Partitioner &partitioner, const ModeType modeTypeParent ) const +{ + if (CS::isDualITree(*this) || modeTypeParent != MODE_TYPE_ALL || partitioner.currArea().chromaFormat == CHROMA_444 || partitioner.currArea().chromaFormat == CHROMA_400 ) + return LDT_MODE_TYPE_INHERIT; + int minLumaArea = partitioner.currArea().lumaSize().area(); + if (split == CU_QUAD_SPLIT || split == CU_TRIH_SPLIT || split == CU_TRIV_SPLIT) // the area is split into 3 or 4 parts + { + minLumaArea = minLumaArea >> 2; + } + else if (split == CU_VERT_SPLIT || split == CU_HORZ_SPLIT) // the area is split into 2 parts + { + minLumaArea = minLumaArea >> 1; + } + int minChromaBlock = minLumaArea >> (getChannelTypeScaleX(CHANNEL_TYPE_CHROMA, partitioner.currArea().chromaFormat) + getChannelTypeScaleY(CHANNEL_TYPE_CHROMA, partitioner.currArea().chromaFormat)); + bool is2xNChroma = (partitioner.currArea().chromaSize().width == 4 && split == CU_VERT_SPLIT) || (partitioner.currArea().chromaSize().width == 8 && split == CU_TRIV_SPLIT); + return minChromaBlock >= 16 && !is2xNChroma ? LDT_MODE_TYPE_INHERIT : ((minLumaArea < 32) || slice->isIntra()) ? LDT_MODE_TYPE_INFER : LDT_MODE_TYPE_SIGNAL; +} + +void CodingStructure::clearCuPuTuIdxMap( const UnitArea &_area, uint32_t numCu, uint32_t numPu, uint32_t numTu, uint32_t* pOffset ) +{ + UnitArea clippedArea = clipArea( _area, *picture ); + uint32_t numCh = ::getNumberValidChannels( _area.chromaFormat ); + for( uint32_t i = 0; i < numCh; i++ ) + { + const CompArea &_selfBlk = area.blocks[i]; + const CompArea &_blk = clippedArea.blocks[i]; + + const UnitScale& scale = unitScale[_blk.compID]; + const Area scaledSelf = scale.scale( _selfBlk ); + const Area scaledBlk = scale.scale( _blk ); + const size_t offset = rsAddr( scaledBlk.pos(), scaledSelf.pos(), scaledSelf.width ); + unsigned *idxPtrCU = m_cuIdx[i] + offset; + AreaBuf<uint32_t>( idxPtrCU, scaledSelf.width, scaledBlk.size() ).fill( 0 ); + + unsigned *idxPtrPU = m_puIdx[i] + offset; + AreaBuf<uint32_t>( idxPtrPU, scaledSelf.width, scaledBlk.size() ).fill( 0 ); + + unsigned *idxPtrTU = m_tuIdx[i] + offset; + AreaBuf<uint32_t>( idxPtrTU, scaledSelf.width, scaledBlk.size() ).fill( 0 ); + } + + //pop cu/pu/tus + for( int i = m_numTUs; i > numTu; i-- ) + { + m_tuCache.cache( tus.back() ); + tus.pop_back(); + m_numTUs--; + } + for( int i = m_numPUs; i > numPu; i-- ) + { + m_puCache.cache( pus.back() ); + pus.pop_back(); + m_numPUs--; + } + for( int i = m_numCUs; i > numCu; i-- ) + { + m_cuCache.cache( cus.back() ); + cus.pop_back(); + m_numCUs--; + } + for( int i = 0; i < 3; i++ ) + { + m_offsets[i] = pOffset[i]; + } +} + +CodingUnit* CodingStructure::getLumaCU( const Position &pos ) +{ + const ChannelType effChType = CHANNEL_TYPE_LUMA; + const CompArea &_blk = area.blocks[effChType]; + CHECK( !_blk.contains( pos ), "must contain the pos" ); + const unsigned idx = m_cuIdx[effChType][rsAddr( pos, _blk.pos(), _blk.width, unitScale[effChType] )]; + + if( idx != 0 ) return cus[idx - 1]; + else return nullptr; +} CodingUnit* CodingStructure::getCU( const Position &pos, const ChannelType effChType ) { const CompArea &_blk = area.blocks[effChType]; - if( !_blk.contains( pos ) ) + if( !_blk.contains( pos ) || (treeType == TREE_C && effChType == CHANNEL_TYPE_LUMA) ) { + //keep this check, which is helpful to identify bugs + if( treeType == TREE_C && effChType == CHANNEL_TYPE_LUMA ) + { + CHECK( parent == nullptr, "parent shall be valid; consider using function getLumaCU()" ); + CHECK( parent->treeType != TREE_D, "wrong parent treeType " ); + } if( parent ) return parent->getCU( pos, effChType ); else return nullptr; } @@ -207,8 +301,13 @@ const CodingUnit* CodingStructure::getCU( const Position &pos, const ChannelType { const CompArea &_blk = area.blocks[effChType]; - if( !_blk.contains( pos ) ) + if( !_blk.contains( pos ) || (treeType == TREE_C && effChType == CHANNEL_TYPE_LUMA) ) { + if( treeType == TREE_C && effChType == CHANNEL_TYPE_LUMA ) + { + CHECK( parent == nullptr, "parent shall be valid; consider using function getLumaCU()" ); + CHECK( parent->treeType != TREE_D, "wrong parent treeType" ); + } if( parent ) return parent->getCU( pos, effChType ); else return nullptr; } @@ -286,9 +385,11 @@ TransformUnit* CodingStructure::getTU( const Position &pos, const ChannelType ef } else { - while( pos != tus[idx - 1 + extraIdx]->blocks[getFirstComponentOfChannel( effChType )].pos() ) + while( !tus[idx - 1 + extraIdx]->blocks[getFirstComponentOfChannel( effChType )].contains( pos ) ) { extraIdx++; + CHECK( tus[idx - 1 + extraIdx]->cu->treeType == TREE_C, "tu searched by position points to a chroma tree CU" ); + CHECK( extraIdx > 3, "extraIdx > 3" ); } } } @@ -327,9 +428,11 @@ const TransformUnit * CodingStructure::getTU( const Position &pos, const Channel } else { - while( pos != tus[idx - 1 + extraIdx]->blocks[effChType].pos() ) + while ( !tus[idx - 1 + extraIdx]->blocks[getFirstComponentOfChannel( effChType )].contains(pos) ) { extraIdx++; + CHECK( tus[idx - 1 + extraIdx]->cu->treeType == TREE_C, "tu searched by position points to a chroma tree CU" ); + CHECK( extraIdx > 3, "extraIdx > 3" ); } } } @@ -355,13 +458,15 @@ CodingUnit& CodingStructure::addCU( const UnitArea &unit, const ChannelType chTy cu->firstTU = nullptr; cu->lastTU = nullptr; cu->chType = chType; + cu->treeType = treeType; + cu->modeType = modeType; CodingUnit *prevCU = m_numCUs > 0 ? cus.back() : nullptr; if( prevCU ) { prevCU->next = cu; -#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM +#if ENABLE_SPLIT_PARALLELISM CHECK( prevCU->cacheId != cu->cacheId, "Inconsintent cacheId between previous and current CU" ); #endif @@ -405,7 +510,7 @@ PredictionUnit& CodingStructure::addPU( const UnitArea &unit, const ChannelType pu->cs = this; pu->cu = m_isTuEnc ? cus[0] : getCU( unit.blocks[chType].pos(), chType ); pu->chType = chType; -#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM +#if ENABLE_SPLIT_PARALLELISM CHECK( pu->cacheId != pu->cu->cacheId, "Inconsintent cacheId between the PU and assigned CU" ); CHECK( pu->cu->firstPU != nullptr, "Without an RQT the firstPU should be null" ); @@ -416,7 +521,7 @@ PredictionUnit& CodingStructure::addPU( const UnitArea &unit, const ChannelType if( prevPU && prevPU->cu == pu->cu ) { prevPU->next = pu; -#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM +#if ENABLE_SPLIT_PARALLELISM CHECK( prevPU->cacheId != pu->cacheId, "Inconsintent cacheId between previous and current PU" ); #endif @@ -466,7 +571,7 @@ TransformUnit& CodingStructure::addTU( const UnitArea &unit, const ChannelType c tu->cs = this; tu->cu = m_isTuEnc ? cus[0] : getCU( unit.blocks[chType].pos(), chType ); tu->chType = chType; -#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM +#if ENABLE_SPLIT_PARALLELISM if( tu->cu ) CHECK( tu->cacheId != tu->cu->cacheId, "Inconsintent cacheId between the TU and assigned CU" ); @@ -479,7 +584,7 @@ TransformUnit& CodingStructure::addTU( const UnitArea &unit, const ChannelType c { prevTU->next = tu; tu->prev = prevTU; -#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM +#if ENABLE_SPLIT_PARALLELISM CHECK( prevTU->cacheId != tu->cacheId, "Inconsintent cacheId between previous and current TU" ); #endif @@ -501,12 +606,13 @@ TransformUnit& CodingStructure::addTU( const UnitArea &unit, const ChannelType c TCoeff *coeffs[5] = { nullptr, nullptr, nullptr, nullptr, nullptr }; Pel *pcmbuf[5] = { nullptr, nullptr, nullptr, nullptr, nullptr }; + bool *runType[5] = { nullptr, nullptr, nullptr, nullptr, nullptr }; uint32_t numCh = ::getNumberValidComponents( area.chromaFormat ); - for( uint32_t i = 0; i < numCh; i++ ) + for (uint32_t i = 0; i < numCh; i++) { - if( !tu->blocks[i].valid() ) + if (!tu->blocks[i].valid()) { continue; } @@ -514,35 +620,39 @@ TransformUnit& CodingStructure::addTU( const UnitArea &unit, const ChannelType c if (i < ::getNumberValidChannels(area.chromaFormat)) { const CompArea &_selfBlk = area.blocks[i]; - const CompArea &_blk = tu-> blocks[i]; + const CompArea &_blk = tu->blocks[i]; - bool isIspTu = tu->cu != nullptr && tu->cu->ispMode && isLuma( _blk.compID ); + bool isIspTu = tu->cu != nullptr && tu->cu->ispMode && isLuma(_blk.compID); bool isFirstIspTu = false; - if( isIspTu ) + if (isIspTu) { - isFirstIspTu = CU::isISPFirst( *tu->cu, _blk, getFirstComponentOfChannel( ChannelType( i ) ) ); + isFirstIspTu = CU::isISPFirst(*tu->cu, _blk, getFirstComponentOfChannel(ChannelType(i))); } - if( !isIspTu || isFirstIspTu ) + if (!isIspTu || isFirstIspTu) { const UnitScale& scale = unitScale[_blk.compID]; - const Area scaledSelf = scale.scale( _selfBlk ); - const Area scaledBlk = isIspTu ? scale.scale( tu->cu->blocks[i] ) : scale.scale( _blk ); - unsigned *idxPtr = m_tuIdx[i] + rsAddr( scaledBlk.pos(), scaledSelf.pos(), scaledSelf.width ); - CHECK( *idxPtr, "Overwriting a pre-existing value, should be '0'!" ); - AreaBuf<uint32_t>( idxPtr, scaledSelf.width, scaledBlk.size() ).fill( idx ); + const Area scaledSelf = scale.scale(_selfBlk); + const Area scaledBlk = isIspTu ? scale.scale(tu->cu->blocks[i]) : scale.scale(_blk); + unsigned *idxPtr = m_tuIdx[i] + rsAddr(scaledBlk.pos(), scaledSelf.pos(), scaledSelf.width); + CHECK(*idxPtr, "Overwriting a pre-existing value, should be '0'!"); + AreaBuf<uint32_t>(idxPtr, scaledSelf.width, scaledBlk.size()).fill(idx); } } coeffs[i] = m_coeffs[i] + m_offsets[i]; pcmbuf[i] = m_pcmbuf[i] + m_offsets[i]; + if (i < MAX_NUM_CHANNEL_TYPE) + { + if (m_runType[i] != nullptr) runType[i] = m_runType[i] + m_offsets[i]; + } + unsigned areaSize = tu->blocks[i].area(); m_offsets[i] += areaSize; } - - tu->init( coeffs, pcmbuf ); + tu->init(coeffs, pcmbuf, runType); return *tu; } @@ -551,8 +661,41 @@ CUTraverser CodingStructure::traverseCUs( const UnitArea& unit, const ChannelTyp { CodingUnit* firstCU = getCU( isLuma( effChType ) ? unit.lumaPos() : unit.chromaPos(), effChType ); CodingUnit* lastCU = firstCU; - + if( !CS::isDualITree( *this ) ) //for a more generalized separate tree + { + bool bContinue = true; + CodingUnit* currCU = firstCU; + while( bContinue ) + { + if( currCU == nullptr ) + { + bContinue = false; + lastCU = currCU; + } + else if( currCU->chType != effChType ) + { + lastCU = currCU; + currCU = currCU->next; + } + else + { + if( unit.contains( *currCU ) ) + { + lastCU = currCU; + currCU = currCU->next; + } + else + { + bContinue = false; + lastCU = currCU; + } + } + } + } + else + { do { } while( lastCU && ( lastCU = lastCU->next ) && unit.contains( *lastCU ) ); + } return CUTraverser( firstCU, lastCU ); } @@ -621,9 +764,9 @@ void CodingStructure::allocateVectorsAtPicLevel() -void CodingStructure::create(const ChromaFormat &_chromaFormat, const Area& _area, const bool isTopLayer) +void CodingStructure::create(const ChromaFormat &_chromaFormat, const Area& _area, const bool isTopLayer, const bool isPLTused) { - createInternals( UnitArea( _chromaFormat, _area ), isTopLayer ); + createInternals(UnitArea(_chromaFormat, _area), isTopLayer, isPLTused); if( isTopLayer ) return; @@ -633,9 +776,9 @@ void CodingStructure::create(const ChromaFormat &_chromaFormat, const Area& _are m_orgr.create( area ); } -void CodingStructure::create(const UnitArea& _unit, const bool isTopLayer) +void CodingStructure::create(const UnitArea& _unit, const bool isTopLayer, const bool isPLTused) { - createInternals( _unit, isTopLayer ); + createInternals(_unit, isTopLayer, isPLTused); if( isTopLayer ) return; @@ -645,7 +788,7 @@ void CodingStructure::create(const UnitArea& _unit, const bool isTopLayer) m_orgr.create( area ); } -void CodingStructure::createInternals( const UnitArea& _unit, const bool isTopLayer ) +void CodingStructure::createInternals(const UnitArea& _unit, const bool isTopLayer, const bool isPLTused) { area = _unit; @@ -673,7 +816,7 @@ void CodingStructure::createInternals( const UnitArea& _unit, const bool isTopLa m_offsets[i] = 0; } - if( !isTopLayer ) createCoeffs(); + if( !isTopLayer ) createCoeffs(isPLTused); unsigned _lumaAreaScaled = g_miScaling.scale( area.lumaSize() ).area(); m_motionBuf = new MotionInfo[_lumaAreaScaled]; @@ -705,6 +848,66 @@ void CodingStructure::addMiToLut(static_vector<MotionInfo, MAX_NUM_HMVP_CANDS> & lut.push_back(mi); } +void CodingStructure::resetPrevPLT(PLTBuf& prevPLT) +{ + for (int comp = 0; comp < MAX_NUM_CHANNEL_TYPE; comp++) + { + prevPLT.curPLTSize[comp] = 0; + } + + for (int comp = 0; comp < MAX_NUM_COMPONENT; comp++) + { + memset(prevPLT.curPLT[comp], 0, MAXPLTPREDSIZE * sizeof(Pel)); + } +} + +void CodingStructure::reorderPrevPLT(PLTBuf& prevPLT, uint8_t curPLTSize[MAX_NUM_CHANNEL_TYPE], Pel curPLT[MAX_NUM_COMPONENT][MAXPLTSIZE], bool reuseflag[MAX_NUM_CHANNEL_TYPE][MAXPLTPREDSIZE], uint32_t compBegin, uint32_t numComp, bool jointPLT) +{ + Pel stuffedPLT[MAX_NUM_COMPONENT][MAXPLTPREDSIZE]; + uint8_t tempCurPLTsize[MAX_NUM_CHANNEL_TYPE]; + uint8_t stuffPLTsize[MAX_NUM_COMPONENT]; + + for (int i = compBegin; i < (compBegin + numComp); i++) + { + ComponentID comID = jointPLT ? (ComponentID)compBegin : ((i > 0) ? COMPONENT_Cb : COMPONENT_Y); + tempCurPLTsize[comID] = curPLTSize[comID]; + stuffPLTsize[i] = 0; + memcpy(stuffedPLT[i], curPLT[i], curPLTSize[comID] * sizeof(Pel)); + } + + for (int ch = compBegin; ch < (compBegin + numComp); ch++) + { + ComponentID comID = jointPLT ? (ComponentID)compBegin : ((ch > 0) ? COMPONENT_Cb : COMPONENT_Y); + if (ch > 1) break; + for (int i = 0; i < prevPLT.curPLTSize[comID]; i++) + { + if (tempCurPLTsize[comID] + stuffPLTsize[ch] >= MAXPLTPREDSIZE) + break; + + if (!reuseflag[comID][i]) + { + if (ch == COMPONENT_Y) + { + stuffedPLT[0][tempCurPLTsize[comID] + stuffPLTsize[ch]] = prevPLT.curPLT[0][i]; + } + else + { + stuffedPLT[1][tempCurPLTsize[comID] + stuffPLTsize[ch]] = prevPLT.curPLT[1][i]; + stuffedPLT[2][tempCurPLTsize[comID] + stuffPLTsize[ch]] = prevPLT.curPLT[2][i]; + } + stuffPLTsize[ch]++; + } + } + } + + for (int i = compBegin; i < (compBegin + numComp); i++) + { + ComponentID comID = jointPLT ? (ComponentID)compBegin : ((i > 0) ? COMPONENT_Cb : COMPONENT_Y); + prevPLT.curPLTSize[comID] = curPLTSize[comID] + stuffPLTsize[comID]; + memcpy(prevPLT.curPLT[i], stuffedPLT[i], prevPLT.curPLTSize[comID] * sizeof(Pel)); + } +} + void CodingStructure::rebindPicBufs() { CHECK( parent, "rebindPicBufs can only be used for the top level CodingStructure" ); @@ -722,7 +925,7 @@ void CodingStructure::rebindPicBufs() } } -void CodingStructure::createCoeffs() +void CodingStructure::createCoeffs(const bool isPLTused) { const unsigned numCh = getNumberValidComponents( area.chromaFormat ); @@ -733,6 +936,16 @@ void CodingStructure::createCoeffs() m_coeffs[i] = _area > 0 ? ( TCoeff* ) xMalloc( TCoeff, _area ) : nullptr; m_pcmbuf[i] = _area > 0 ? ( Pel* ) xMalloc( Pel, _area ) : nullptr; } + + if (isPLTused) + { + for (unsigned i = 0; i < numCh - 1; i++) + { + unsigned _area = area.blocks[i].area(); + + m_runType[i] = _area > 0 ? (bool*)xMalloc(bool, _area) : nullptr; + } + } } void CodingStructure::destroyCoeffs() @@ -742,6 +955,11 @@ void CodingStructure::destroyCoeffs() if( m_coeffs[i] ) { xFree( m_coeffs[i] ); m_coeffs[i] = nullptr; } if( m_pcmbuf[i] ) { xFree( m_pcmbuf[i] ); m_pcmbuf[i] = nullptr; } } + + for (uint32_t i = 0; i < MAX_NUM_CHANNEL_TYPE; i++) + { + if (m_runType[i]) { xFree(m_runType[i]); m_runType[i] = nullptr; } + } } void CodingStructure::initSubStructure( CodingStructure& subStruct, const ChannelType _chType, const UnitArea &subArea, const bool &isTuEnc ) @@ -768,11 +986,14 @@ void CodingStructure::initSubStructure( CodingStructure& subStruct, const Channe subStruct.picture = picture; subStruct.sps = sps; -#if HEVC_VPS subStruct.vps = vps; -#endif subStruct.pps = pps; - subStruct.aps = aps; + subStruct.picHeader = picHeader; + memcpy(subStruct.alfApss, alfApss, sizeof(alfApss)); + + subStruct.lmcsAps = lmcsAps; + subStruct.scalinglistAps = scalinglistAps; + subStruct.slice = slice; subStruct.baseQP = baseQP; subStruct.prevQP[_chType] @@ -783,7 +1004,12 @@ void CodingStructure::initSubStructure( CodingStructure& subStruct, const Channe subStruct.motionLut = motionLut; - subStruct.initStructData( currQP[_chType], isLossless ); + subStruct.prevPLT = prevPLT; + + subStruct.treeType = treeType; + subStruct.modeType = modeType; + + subStruct.initStructData( currQP[_chType] ); if( isTuEnc ) { @@ -844,75 +1070,8 @@ void CodingStructure::useSubStructure( const CodingStructure& subStruct, const C motionLut = subStruct.motionLut; } -#if ENABLE_WPP_PARALLELISM - - if( nullptr == parent ) - { -#pragma omp critical - { - fracBits += subStruct.fracBits; - dist += subStruct.dist; - cost += subStruct.cost; - costDbOffset += subStruct.costDbOffset; - if( parent ) - { - // allow this to be false at the top level - CHECKD( !area.contains( subArea ), "Trying to use a sub-structure not contained in self" ); - } - - // copy the CUs over - if( subStruct.m_isTuEnc ) - { - // don't copy if the substruct was created for encoding of the TUs - } - else - { - for( const auto &pcu : subStruct.cus ) - { - // add an analogue CU into own CU store - const UnitArea &cuPatch = *pcu; - - CodingUnit &cu = addCU( cuPatch, chType ); - - // copy the CU info from subPatch - cu = *pcu; - } - } - - // copy the PUs over - if( subStruct.m_isTuEnc ) - { - // don't copy if the substruct was created for encoding of the TUs - } - else - { - for( const auto &ppu : subStruct.pus ) - { - // add an analogue PU into own PU store - const UnitArea &puPatch = *ppu; + prevPLT = subStruct.prevPLT; - PredictionUnit &pu = addPU( puPatch, chType ); - - // copy the PU info from subPatch - pu = *ppu; - } - } - // copy the TUs over - for( const auto &ptu : subStruct.tus ) - { - // add an analogue TU into own TU store - const UnitArea &tuPatch = *ptu; - - TransformUnit &tu = addTU( tuPatch, chType ); - - // copy the TU info from subPatch - tu = *ptu; - } - } - - return; - } -#endif fracBits += subStruct.fracBits; dist += subStruct.dist; @@ -935,8 +1094,7 @@ void CodingStructure::useSubStructure( const CodingStructure& subStruct, const C { // add an analogue CU into own CU store const UnitArea &cuPatch = *pcu; - - CodingUnit &cu = addCU( cuPatch, chType ); + CodingUnit &cu = addCU( cuPatch, pcu->chType ); // copy the CU info from subPatch cu = *pcu; @@ -954,8 +1112,7 @@ void CodingStructure::useSubStructure( const CodingStructure& subStruct, const C { // add an analogue PU into own PU store const UnitArea &puPatch = *ppu; - - PredictionUnit &pu = addPU( puPatch, chType ); + PredictionUnit &pu = addPU( puPatch, ppu->chType ); // copy the PU info from subPatch pu = *ppu; @@ -966,8 +1123,7 @@ void CodingStructure::useSubStructure( const CodingStructure& subStruct, const C { // add an analogue TU into own TU store const UnitArea &tuPatch = *ptu; - - TransformUnit &tu = addTU( tuPatch, chType ); + TransformUnit &tu = addTU( tuPatch, ptu->chType ); // copy the TU info from subPatch tu = *ptu; @@ -1026,6 +1182,7 @@ void CodingStructure::copyStructure( const CodingStructure& other, const Channel motionLut = other.motionLut; } + prevPLT = other.prevPLT; if( copyTUs ) { @@ -1079,7 +1236,7 @@ void CodingStructure::copyStructure( const CodingStructure& other, const Channel } } -void CodingStructure::initStructData( const int &QP, const bool &_isLosses, const bool &skipMotBuf ) +void CodingStructure::initStructData( const int &QP, const bool &skipMotBuf ) { clearPUs(); clearTUs(); @@ -1088,7 +1245,6 @@ void CodingStructure::initStructData( const int &QP, const bool &_isLosses, cons if( QP < MAX_INT ) { currQP[0] = currQP[1] = QP; - isLossless = _isLosses; } if (!skipMotBuf && (!parent || ((!slice->isIntra() || slice->getSPS()->getIBCFlag()) && !m_isTuEnc))) @@ -1338,15 +1494,14 @@ const CPelUnitBuf CodingStructure::getBuf( const UnitArea &unit, const PictureTy const CodingUnit* CodingStructure::getCURestricted( const Position &pos, const CodingUnit& curCu, const ChannelType _chType ) const { const CodingUnit* cu = getCU( pos, _chType ); -#if HEVC_TILES_WPP // exists same slice and tile cu precedes curCu in encoding order // (thus, is either from parent CS in RD-search or its index is lower) - if( cu && CU::isSameSliceAndTile( *cu, curCu ) && ( cu->cs != curCu.cs || cu->idx <= curCu.idx ) ) -#else - // exists same slice cu precedes curCu in encoding order - // (thus, is either from parent CS in RD-search or its index is lower) - if(cu && CU::isSameSlice(*cu, curCu) && (cu->cs != curCu.cs || cu->idx <= curCu.idx)) -#endif + const bool wavefrontsEnabled = curCu.slice->getPPS()->getEntropyCodingSyncEnabledFlag(); + int ctuSizeBit = floorLog2(curCu.cs->sps->getMaxCUWidth()); + int xNbY = pos.x << getChannelTypeScaleX( _chType, curCu.chromaFormat ); + int xCurr = curCu.blocks[_chType].x << getChannelTypeScaleX( _chType, curCu.chromaFormat ); + bool addCheck = (wavefrontsEnabled && (xNbY >> ctuSizeBit) >= (xCurr >> ctuSizeBit) + 1 ) ? false : true; + if( cu && CU::isSameSliceAndTile( *cu, curCu ) && ( cu->cs != curCu.cs || cu->idx <= curCu.idx ) && addCheck) { return cu; } @@ -1356,32 +1511,28 @@ const CodingUnit* CodingStructure::getCURestricted( const Position &pos, const C } } -#if HEVC_TILES_WPP -const CodingUnit* CodingStructure::getCURestricted( const Position &pos, const unsigned curSliceIdx, const unsigned curTileIdx, const ChannelType _chType ) const +const CodingUnit* CodingStructure::getCURestricted( const Position &pos, const Position curPos, const unsigned curSliceIdx, const unsigned curTileIdx, const ChannelType _chType ) const { const CodingUnit* cu = getCU( pos, _chType ); - return ( cu && cu->slice->getIndependentSliceIdx() == curSliceIdx && cu->tileIdx == curTileIdx ) ? cu : nullptr; -} -#else -const CodingUnit* CodingStructure::getCURestricted(const Position &pos, const unsigned curSliceIdx, const ChannelType _chType) const -{ - const CodingUnit* cu = getCU(pos, _chType); - return (cu && cu->slice->getIndependentSliceIdx() == curSliceIdx ) ? cu : nullptr; + const bool wavefrontsEnabled = this->slice->getPPS()->getEntropyCodingSyncEnabledFlag(); + int ctuSizeBit = floorLog2(this->sps->getMaxCUWidth()); + int xNbY = pos.x << getChannelTypeScaleX( _chType, this->area.chromaFormat ); + int xCurr = curPos.x << getChannelTypeScaleX( _chType, this->area.chromaFormat ); + bool addCheck = (wavefrontsEnabled && (xNbY >> ctuSizeBit) >= (xCurr >> ctuSizeBit) + 1 ) ? false : true; + return ( cu && cu->slice->getIndependentSliceIdx() == curSliceIdx && cu->tileIdx == curTileIdx && addCheck ) ? cu : nullptr; } -#endif const PredictionUnit* CodingStructure::getPURestricted( const Position &pos, const PredictionUnit& curPu, const ChannelType _chType ) const { const PredictionUnit* pu = getPU( pos, _chType ); -#if HEVC_TILES_WPP // exists same slice and tile pu precedes curPu in encoding order // (thus, is either from parent CS in RD-search or its index is lower) - if( pu && CU::isSameSliceAndTile( *pu->cu, *curPu.cu ) && ( pu->cs != curPu.cs || pu->idx <= curPu.idx ) ) -#else - // exists same slice pu precedes curPu in encoding order - // (thus, is either from parent CS in RD-search or its index is lower) - if(pu && CU::isSameSlice(*pu->cu, *curPu.cu) && (pu->cs != curPu.cs || pu->idx <= curPu.idx)) -#endif + const bool wavefrontsEnabled = curPu.cu->slice->getPPS()->getEntropyCodingSyncEnabledFlag(); + int ctuSizeBit = floorLog2(curPu.cs->sps->getMaxCUWidth()); + int xNbY = pos.x << getChannelTypeScaleX( _chType, curPu.chromaFormat ); + int xCurr = curPu.blocks[_chType].x << getChannelTypeScaleX( _chType, curPu.chromaFormat ); + bool addCheck = (wavefrontsEnabled && (xNbY >> ctuSizeBit) >= (xCurr >> ctuSizeBit) + 1 ) ? false : true; + if( pu && CU::isSameSliceAndTile( *pu->cu, *curPu.cu ) && ( pu->cs != curPu.cs || pu->idx <= curPu.idx ) && addCheck ) { return pu; } @@ -1394,15 +1545,14 @@ const PredictionUnit* CodingStructure::getPURestricted( const Position &pos, con const TransformUnit* CodingStructure::getTURestricted( const Position &pos, const TransformUnit& curTu, const ChannelType _chType ) const { const TransformUnit* tu = getTU( pos, _chType ); -#if HEVC_TILES_WPP // exists same slice and tile tu precedes curTu in encoding order // (thus, is either from parent CS in RD-search or its index is lower) - if( tu && CU::isSameSliceAndTile( *tu->cu, *curTu.cu ) && ( tu->cs != curTu.cs || tu->idx <= curTu.idx ) ) -#else - // exists same slice tu precedes curTu in encoding order - // (thus, is either from parent CS in RD-search or its index is lower) - if(tu && CU::isSameSlice(*tu->cu, *curTu.cu) && (tu->cs != curTu.cs || tu->idx <= curTu.idx)) -#endif + const bool wavefrontsEnabled = curTu.cu->slice->getPPS()->getEntropyCodingSyncEnabledFlag(); + int ctuSizeBit = floorLog2(curTu.cs->sps->getMaxCUWidth()); + int xNbY = pos.x << getChannelTypeScaleX( _chType, curTu.chromaFormat ); + int xCurr = curTu.blocks[_chType].x << getChannelTypeScaleX( _chType, curTu.chromaFormat ); + bool addCheck = (wavefrontsEnabled && (xNbY >> ctuSizeBit) >= (xCurr >> ctuSizeBit) + 1 ) ? false : true; + if( tu && CU::isSameSliceAndTile( *tu->cu, *curTu.cu ) && ( tu->cs != curTu.cs || tu->idx <= curTu.idx ) && addCheck ) { return tu; } @@ -1412,36 +1562,3 @@ const TransformUnit* CodingStructure::getTURestricted( const Position &pos, cons } } -IbcLumaCoverage CodingStructure::getIbcLumaCoverage(const CompArea& chromaArea) const -{ - CHECK(chType != CHANNEL_TYPE_CHROMA, "Error"); - - const unsigned int unitAreaSubBlock = MIN_PU_SIZE * MIN_PU_SIZE; - CompArea lumaArea = CompArea(COMPONENT_Y, chromaArea.chromaFormat, chromaArea.lumaPos(), recalcSize(chromaArea.chromaFormat, CHANNEL_TYPE_CHROMA, CHANNEL_TYPE_LUMA, chromaArea.size())); - lumaArea = clipArea(lumaArea, picture->block(COMPONENT_Y)); - const unsigned int fullArea = lumaArea.area(); - unsigned int ibcArea = 0; - for (SizeType y = 0; y < lumaArea.height; y += MIN_PU_SIZE) - { - for (SizeType x = 0; x < lumaArea.width; x += MIN_PU_SIZE) - { - Position pos = lumaArea.offset(x, y); - if (picture->cs->getMotionInfo(pos).isInter) // need to change if inter slice allows dualtree - { - ibcArea += unitAreaSubBlock; - } - } - } - - IbcLumaCoverage coverage = IBC_LUMA_COVERAGE_FULL; - if (ibcArea == 0) - { - coverage = IBC_LUMA_COVERAGE_NONE; - } - else if (ibcArea < fullArea) - { - coverage = IBC_LUMA_COVERAGE_PARTIAL; - } - - return coverage; -} diff --git a/source/Lib/CommonLib/CodingStructure.h b/source/Lib/CommonLib/CodingStructure.h index 99556f67dccb85c6a7167cac72e6fb1c3e162d34..317e330ba33e86f62432527e1ab5182a0c0ad072 100644 --- a/source/Lib/CommonLib/CodingStructure.h +++ b/source/Lib/CommonLib/CodingStructure.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * -* Copyright (c) 2010-2019, ITU/ISO/IEC +* Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -57,15 +57,11 @@ enum PictureType PIC_PREDICTION, PIC_RESIDUAL, PIC_ORG_RESI, + PIC_RECON_WRAP, + PIC_ORIGINAL_INPUT, + PIC_TRUE_ORIGINAL_INPUT, NUM_PIC_TYPES }; -enum IbcLumaCoverage -{ - IBC_LUMA_COVERAGE_FULL = 0, - IBC_LUMA_COVERAGE_PARTIAL, - IBC_LUMA_COVERAGE_NONE, - NUM_IBC_LUMA_COVERAGE, -}; extern XUCache g_globalUnitCache; // --------------------------------------------------------------------------- @@ -84,31 +80,31 @@ public: Slice *slice; UnitScale unitScale[MAX_NUM_COMPONENT]; - ChannelType chType; int baseQP; int prevQP[MAX_NUM_CHANNEL_TYPE]; int currQP[MAX_NUM_CHANNEL_TYPE]; int chromaQpAdj; - Position sharedBndPos; - Size sharedBndSize; - bool isLossless; const SPS *sps; const PPS *pps; - APS * aps; -#if HEVC_VPS + PicHeader *picHeader; + APS* alfApss[ALF_CTB_MAX_NUM_APS]; + APS * lmcsAps; + APS * scalinglistAps; const VPS *vps; -#endif const PreCalcValues* pcv; CodingStructure(CUCache&, PUCache&, TUCache&); - void create( const UnitArea &_unit, const bool isTopLayer ); - void create( const ChromaFormat &_chromaFormat, const Area& _area, const bool isTopLayer ); + + void create(const UnitArea &_unit, const bool isTopLayer, const bool isPLTused); + void create(const ChromaFormat &_chromaFormat, const Area& _area, const bool isTopLayer, const bool isPLTused); + void destroy(); void releaseIntermediateData(); void rebindPicBufs(); - void createCoeffs(); + + void createCoeffs(const bool isPLTused); void destroyCoeffs(); void allocateVectorsAtPicLevel(); @@ -127,6 +123,7 @@ public: const TransformUnit *getTU(const Position &pos, const ChannelType _chType, const int subTuIdx = -1) const; CodingUnit *getCU(const Position &pos, const ChannelType _chType); + CodingUnit *getLumaCU( const Position &pos ); PredictionUnit *getPU(const Position &pos, const ChannelType _chType); TransformUnit *getTU(const Position &pos, const ChannelType _chType, const int subTuIdx = -1); @@ -138,11 +135,7 @@ public: PredictionUnit *getPU(const ChannelType &_chType ) { return getPU(area.blocks[_chType].pos(), _chType); } TransformUnit *getTU(const ChannelType &_chType ) { return getTU(area.blocks[_chType].pos(), _chType); } -#if HEVC_TILES_WPP - const CodingUnit *getCURestricted(const Position &pos, const unsigned curSliceIdx, const unsigned curTileIdx, const ChannelType _chType) const; -#else - const CodingUnit *getCURestricted(const Position &pos, const unsigned curSliceIdx, const ChannelType _chType) const; -#endif + const CodingUnit *getCURestricted(const Position &pos, const Position curPos, const unsigned curSliceIdx, const unsigned curTileIdx, const ChannelType _chType) const; const CodingUnit *getCURestricted(const Position &pos, const CodingUnit& curCu, const ChannelType _chType) const; const PredictionUnit *getPURestricted(const Position &pos, const PredictionUnit& curPu, const ChannelType _chType) const; const TransformUnit *getTURestricted(const Position &pos, const TransformUnit& curTu, const ChannelType _chType) const; @@ -158,7 +151,6 @@ public: cCUTraverser traverseCUs(const UnitArea& _unit, const ChannelType _chType) const; cPUTraverser traversePUs(const UnitArea& _unit, const ChannelType _chType) const; cTUTraverser traverseTUs(const UnitArea& _unit, const ChannelType _chType) const; - IbcLumaCoverage getIbcLumaCoverage(const CompArea& chromaArea) const; // --------------------------------------------------------------------------- // encoding search utilities // --------------------------------------------------------------------------- @@ -172,8 +164,10 @@ public: uint64_t fracBits; Distortion dist; Distortion interHad; + TreeType treeType; //because partitioner can not go deep to tu and cu coding (e.g., addCU()), need another variable for indicating treeType + ModeType modeType; - void initStructData (const int &QP = MAX_INT, const bool &_isLosses = false, const bool &skipMotBuf = false); + void initStructData (const int &QP = MAX_INT, const bool &skipMotBuf = false); void initSubStructure( CodingStructure& cs, const ChannelType chType, const UnitArea &subArea, const bool &isTuEnc); void copyStructure (const CodingStructure& cs, const ChannelType chType, const bool copyTUs = false, const bool copyRecoBuffer = false); @@ -183,10 +177,17 @@ public: void clearTUs(); void clearPUs(); void clearCUs(); + const int signalModeCons( const PartSplit split, Partitioner &partitioner, const ModeType modeTypeParent ) const; + void clearCuPuTuIdxMap ( const UnitArea &_area, uint32_t numCu, uint32_t numPu, uint32_t numTu, uint32_t* pOffset ); + void getNumCuPuTuOffset ( uint32_t* pArray ) + { + pArray[0] = m_numCUs; pArray[1] = m_numPUs; pArray[2] = m_numTUs; + pArray[3] = m_offsets[0]; pArray[4] = m_offsets[1]; pArray[5] = m_offsets[2]; + } private: - void createInternals(const UnitArea& _unit, const bool isTopLayer); + void createInternals(const UnitArea& _unit, const bool isTopLayer, const bool isPLTused); public: @@ -198,6 +199,10 @@ public: void addMiToLut(static_vector<MotionInfo, MAX_NUM_HMVP_CANDS>& lut, const MotionInfo &mi); + PLTBuf prevPLT; + void resetPrevPLT(PLTBuf& prevPLT); + void reorderPrevPLT(PLTBuf& prevPLT, uint8_t curPLTSize[MAX_NUM_CHANNEL_TYPE], Pel curPLT[MAX_NUM_COMPONENT][MAXPLTSIZE], bool reuseflag[MAX_NUM_CHANNEL_TYPE][MAXPLTPREDSIZE], uint32_t compBegin, uint32_t numComp, bool jointPLT); + private: // needed for TU encoding @@ -225,12 +230,18 @@ private: TCoeff *m_coeffs [ MAX_NUM_COMPONENT ]; Pel *m_pcmbuf [ MAX_NUM_COMPONENT ]; - + bool *m_runType[ MAX_NUM_CHANNEL_TYPE ]; int m_offsets[ MAX_NUM_COMPONENT ]; MotionInfo *m_motionBuf; public: + CodingStructure *bestParent; + double tmpColorSpaceCost; + bool firstColorSpaceSelected; + double tmpColorSpaceIntraCost[2]; + bool firstColorSpaceTestOnly; + bool resetIBCBuffer; MotionBuf getMotionBuf( const Area& _area ); MotionBuf getMotionBuf( const UnitArea& _area ) { return getMotionBuf( _area.Y() ); } @@ -262,6 +273,7 @@ public: const CPelBuf getRecoBuf(const CompArea &blk) const; PelUnitBuf getRecoBuf(const UnitArea &unit); const CPelUnitBuf getRecoBuf(const UnitArea &unit) const; + PelUnitBuf& getRecoBufRef() { return m_reco; } PelBuf getOrgResiBuf(const CompArea &blk); const CPelBuf getOrgResiBuf(const CompArea &blk) const; @@ -314,8 +326,5 @@ private: static inline uint32_t getNumberValidTBlocks(const PreCalcValues& pcv) { return (pcv.chrFormat==CHROMA_400) ? 1 : ( pcv.multiBlock422 ? MAX_NUM_TBLOCKS : MAX_NUM_COMPONENT ); } -inline unsigned toWSizeIdx( const CodingStructure* cs ) { return gp_sizeIdxInfo->idxFrom( cs->area.lwidth() ); } -inline unsigned toHSizeIdx( const CodingStructure* cs ) { return gp_sizeIdxInfo->idxFrom( cs->area.lheight() ); } - #endif diff --git a/source/Lib/CommonLib/Common.h b/source/Lib/CommonLib/Common.h index 81c92278b2bd60a59c558424bcdbedf0f1defb5b..9d30b8393ea28a4b619d2c3b9d3d2d256b33bcd3 100644 --- a/source/Lib/CommonLib/Common.h +++ b/source/Lib/CommonLib/Common.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/source/Lib/CommonLib/CommonDef.h b/source/Lib/CommonLib/CommonDef.h index 6d37b49542cebf6ca0ff73f6b19249b37067d5fb..f24085c106c07b6c6028cd2bd81a9fcccf32d1f8 100644 --- a/source/Lib/CommonLib/CommonDef.h +++ b/source/Lib/CommonLib/CommonDef.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -58,6 +58,8 @@ #if _MSC_VER < 1900 #error "MS Visual Studio version not supported. Please upgrade to Visual Studio 2015 or higher (or use other compilers)" #endif + +#include <intrin.h> #endif //! \ingroup CommonLib @@ -143,11 +145,13 @@ static const int MAX_NUM_REF = 16; ///< max. static const int MAX_QP = 63; static const int NOT_VALID = -1; + static const int AMVP_MAX_NUM_CANDS = 2; ///< AMVP: advanced motion vector prediction - max number of final candidates static const int AMVP_MAX_NUM_CANDS_MEM = 3; ///< AMVP: advanced motion vector prediction - max number of candidates static const int AMVP_DECIMATION_FACTOR = 2; static const int MRG_MAX_NUM_CANDS = 6; ///< MERGE static const int AFFINE_MRG_MAX_NUM_CANDS = 5; ///< AFFINE MERGE +static const int IBC_MRG_MAX_NUM_CANDS = 6; ///< IBC MERGE static const int MAX_TLAYER = 7; ///< Explicit temporal layer QP offset - max number of temporal layer @@ -164,15 +168,33 @@ static const int MAX_NUM_PICS_IN_SOP = 1024; static const int MAX_NESTING_NUM_OPS = 1024; static const int MAX_NESTING_NUM_LAYER = 64; -#if HEVC_VPS static const int MAX_VPS_NUM_HRD_PARAMETERS = 1; -static const int MAX_VPS_OP_SETS_PLUS1 = 1024; -static const int MAX_VPS_NUH_RESERVED_ZERO_LAYER_ID_PLUS1 = 1; -#endif - +static const int MAX_VPS_LAYERS = 64; +static const int MAX_VPS_SUBLAYERS = 7; +static const int MAX_NUM_REF_LAYERS = 7; +static const int MAX_NUM_OLSS = 256; +static const int MAX_VPS_OLS_MODE_IDC = 2; static const int MAXIMUM_INTRA_FILTERED_WIDTH = 16; static const int MAXIMUM_INTRA_FILTERED_HEIGHT = 16; +static const int MIP_MAX_WIDTH = MAX_TB_SIZEY; +static const int MIP_MAX_HEIGHT = MAX_TB_SIZEY; + +static const int MAX_NUM_ALF_ALTERNATIVES_CHROMA = 8; +static const int MAX_NUM_ALF_CLASSES = 25; +static const int MAX_NUM_ALF_LUMA_COEFF = 13; +static const int MAX_NUM_ALF_CHROMA_COEFF = 7; +static const int MAX_ALF_FILTER_LENGTH = 7; +static const int MAX_NUM_ALF_COEFF = MAX_ALF_FILTER_LENGTH * MAX_ALF_FILTER_LENGTH / 2 + 1; +static const int MAX_ALF_PADDING_SIZE = 4; + +static const int ALF_FIXED_FILTER_NUM = 64; +static const int ALF_CTB_MAX_NUM_APS = 8; +static const int NUM_FIXED_FILTER_SETS = 16; +static const int NUM_TOTAL_FILTER_SETS = NUM_FIXED_FILTER_SETS + ALF_CTB_MAX_NUM_APS; + + +static const int MAX_BDOF_APPLICATION_REGION = 16; static const int MAX_CPB_CNT = 32; ///< Upper bound of (cpb_cnt_minus1 + 1) static const int MAX_NUM_LAYER_IDS = 64; @@ -182,31 +204,37 @@ static const int CU_DQP_EG_k = 0; ///< expg static const int SBH_THRESHOLD = 4; ///< value of the fixed SBH controlling threshold -static const int C1FLAG_NUMBER = 8; ///< maximum number of largerThan1 flag coded in one chunk: 16 in HM5 -static const int C2FLAG_NUMBER = 1; ///< maximum number of largerThan2 flag coded in one chunk: 16 in HM5 - static const int MAX_NUM_VPS = 16; +static const int MAX_NUM_DPS = 16; static const int MAX_NUM_SPS = 16; static const int MAX_NUM_PPS = 64; static const int MAX_NUM_APS = 32; //Currently APS ID has 5 bits +static const int NUM_APS_TYPE_LEN = 3; //Currently APS Type has 3 bits +static const int MAX_NUM_APS_TYPE = 8; //Currently APS Type has 3 bits so the max type is 8 +static const int MAX_TILE_COLS = 20; ///< Maximum number of tile columns +static const int MAX_TILE_ROWS = 22; ///< Maximum number of tile rows +static const int MAX_TILES = MAX_TILE_COLS * MAX_TILE_ROWS; ///< Maximum number of tiles +static const int MAX_SLICES = 600; ///< Maximum number of slices per picture static const int MLS_GRP_NUM = 1024; ///< Max number of coefficient groups, max(16, 256) static const int MLS_CG_SIZE = 4; ///< Coefficient group size of 4x4; = MLS_CG_LOG2_WIDTH + MLS_CG_LOG2_HEIGHT -static const int ADJ_QUANT_SHIFT = 7; -static const int ADJ_DEQUANT_SHIFT = ( ADJ_QUANT_SHIFT + 1 ); static const int RVM_VCEGAM10_M = 4; static const int MAX_REF_LINE_IDX = 3; //highest refLine offset in the list static const int MRL_NUM_REF_LINES = 3; //number of candidates in the array -static const int MULTI_REF_LINE_IDX[4] = { 0, 1, 3, 0 }; +static const int MULTI_REF_LINE_IDX[4] = { 0, 1, 2, 0 }; + +static const int PRED_REG_MIN_WIDTH = 4; // Minimum prediction region width for ISP subblocks static const int NUM_LUMA_MODE = 67; ///< Planar + DC + 65 directional mode (4*16 + 1) static const int NUM_LMC_MODE = 1 + 2; ///< LMC + MDLM_T + MDLM_L static const int NUM_INTRA_MODE = (NUM_LUMA_MODE + NUM_LMC_MODE); +static const int NUM_EXT_LUMA_MODE = 28; + static const int NUM_DIR = (((NUM_LUMA_MODE - 3) >> 2) + 1); static const int PLANAR_IDX = 0; ///< index for intra PLANAR mode static const int DC_IDX = 1; ///< index for intra DC mode @@ -214,6 +242,7 @@ static const int HOR_IDX = (1 * (NUM_DIR - 1) + 2); ///< inde static const int DIA_IDX = (2 * (NUM_DIR - 1) + 2); ///< index for intra DIAGONAL mode static const int VER_IDX = (3 * (NUM_DIR - 1) + 2); ///< index for intra VERTICAL mode static const int VDIA_IDX = (4 * (NUM_DIR - 1) + 2); ///< index for intra VDIAGONAL mode +static const int BDPCM_IDX = (5 * (NUM_DIR - 1) + 2); ///< index for intra VDIAGONAL mode static const int NOMODE_IDX = MAX_UCHAR; ///< indicating uninitialized elements static const int NUM_CHROMA_MODE = (5 + NUM_LMC_MODE); ///< total number of chroma modes @@ -222,21 +251,21 @@ static const int MDLM_L_IDX = LM_CHROMA_IDX + 1; ///< M static const int MDLM_T_IDX = LM_CHROMA_IDX + 2; ///< MDLM_T static const int DM_CHROMA_IDX = NUM_INTRA_MODE; ///< chroma mode index for derived from luma intra mode -static const uint8_t INTER_MODE_IDX = 255; ///< index for inter modes - static const uint32_t NUM_TRAFO_MODES_MTS = 6; ///< Max Intra CU size applying EMT, supported values: 8, 16, 32, 64, 128 static const uint32_t MTS_INTRA_MAX_CU_SIZE = 32; ///< Max Intra CU size applying EMT, supported values: 8, 16, 32, 64, 128 static const uint32_t MTS_INTER_MAX_CU_SIZE = 32; ///< Max Inter CU size applying EMT, supported values: 8, 16, 32, 64, 128 static const int NUM_MOST_PROBABLE_MODES = 6; static const int LM_SYMBOL_NUM = (1 + NUM_LMC_MODE); -static const int FAST_UDI_MAX_RDMODE_NUM = NUM_LUMA_MODE; ///< maximum number of RD comparison in fast-UDI estimation loop +static const int MAX_NUM_MIP_MODE = 32; ///< maximum number of MIP pred. modes +static const int FAST_UDI_MAX_RDMODE_NUM = (NUM_LUMA_MODE + MAX_NUM_MIP_MODE); ///< maximum number of RD comparison in fast-UDI estimation loop -static const int MDCS_ANGLE_LIMIT = 9; ///< 0 = Horizontal/vertical only, 1 = Horizontal/vertical +/- 1, 2 = Horizontal/vertical +/- 2 etc... +static const int MAX_LFNST_COEF_NUM = 16; -static const int MDCS_MAXIMUM_WIDTH = 8; ///< (measured in pixels) TUs with width greater than this can only use diagonal scan -static const int MDCS_MAXIMUM_HEIGHT = 8; ///< (measured in pixels) TUs with height greater than this can only use diagonal scan +static const int LFNST_LAST_SIG_LUMA = 1; +static const int LFNST_LAST_SIG_CHROMA = 1; +static const int NUM_LFNST_NUM_PER_SET = 3; static const int LOG2_MAX_NUM_COLUMNS_MINUS1 = 7; static const int LOG2_MAX_NUM_ROWS_MINUS1 = 7; @@ -250,18 +279,20 @@ static const int LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS_SIGNAL = 1 << MV static const int LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS = 1 << MV_FRACTIONAL_BITS_INTERNAL; static const int CHROMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS = 1 << (MV_FRACTIONAL_BITS_INTERNAL + 1); +static const int MAX_NUM_SUB_PICS = 255; static const int MAX_NUM_LONG_TERM_REF_PICS = 33; static const int NUM_LONG_TERM_REF_PIC_SPS = 0; static const int MAX_QP_OFFSET_LIST_SIZE = 6; ///< Maximum size of QP offset list is 6 entries +static const int MAX_NUM_CQP_MAPPING_TABLES = 3; ///< Maximum number of chroma QP mapping tables (Cb, Cr and joint Cb-Cr) +static const int MIN_QP_VALUE_FOR_16_BIT = -48; ////< Minimum value for QP (-6*(bitdepth - 8) ) for bit depth 16 ; actual minimum QP value is bit depth dependent +static const int MAX_NUM_QP_VALUES = MAX_QP + 1 - MIN_QP_VALUE_FOR_16_BIT; ////< Maximum number of QP values possible - bit depth dependent // Cost mode support static const int LOSSLESS_AND_MIXED_LOSSLESS_RD_COST_TEST_QP = 0; ///< QP to use for lossless coding. static const int LOSSLESS_AND_MIXED_LOSSLESS_RD_COST_TEST_QP_PRIME =4; ///< QP' to use for mixed_lossy_lossless coding. -static const int CR_FROM_CB_REG_COST_SHIFT = 9; - static const int RExt__GOLOMB_RICE_ADAPTATION_STATISTICS_SETS = 4; static const int RExt__PREDICTION_WEIGHTING_ANALYSIS_DC_PRECISION = 0; ///< Additional fixed bit precision used during encoder-side weighting prediction analysis. Currently only used when high_precision_prediction_weighting_flag is set, for backwards compatibility reasons. @@ -273,7 +304,8 @@ static const int MAX_CU_SIZE = 1<<MAX_CU_DEPTH; static const int MIN_CU_LOG2 = 2; static const int MIN_PU_SIZE = 4; static const int MAX_NUM_PARTS_IN_CTU = ( ( MAX_CU_SIZE * MAX_CU_SIZE ) >> ( MIN_CU_LOG2 << 1 ) ); -static const int MAX_LOG2_DIFF_CU_TR_SIZE = 2; +static const int MAX_NUM_TUS = 16; ///< Maximum number of TUs within one CU. When max TB size is 32x32, up to 16 TUs within one CU (128x128) is supported +static const int MAX_LOG2_DIFF_CU_TR_SIZE = 3; static const int MAX_CU_TILING_PARTITIONS = 1 << ( MAX_LOG2_DIFF_CU_TR_SIZE << 1 ); static const int JVET_C0024_ZERO_OUT_TH = 32; @@ -283,9 +315,12 @@ static const int SCALING_LIST_REM_NUM = 6; static const int QUANT_SHIFT = 14; ///< Q(4) = 2^14 static const int IQUANT_SHIFT = 6; -static const int SCALE_BITS = 15; ///< Precision for fractional bit estimates -static const int SCALING_LIST_NUM = MAX_NUM_COMPONENT * NUMBER_OF_PREDICTION_MODES; ///< list number for quantization matrix +static constexpr int SCALE_BITS = 15; // Precision for fractional bit estimates +static constexpr double FRAC_BITS_SCALE = 1.0 / (1 << SCALE_BITS); + +static constexpr int SCALING_LIST_PRED_MODES = 2; +static const int SCALING_LIST_NUM = MAX_NUM_COMPONENT * SCALING_LIST_PRED_MODES; ///< list number for quantization matrix static const int SCALING_LIST_START_VALUE = 8; ///< start value for dpcm mode static const int MAX_MATRIX_COEF_NUM = 64; ///< max coefficient number for quantization matrix @@ -294,13 +329,10 @@ static const int SCALING_LIST_BITS = 8; ///< bit static const int LOG2_SCALING_LIST_NEUTRAL_VALUE = 4; ///< log2 of the value that, when used in a scaling list, has no effect on quantisation static const int SCALING_LIST_DC = 16; ///< default DC value -static const int CONTEXT_STATE_BITS = 6; static const int LAST_SIGNIFICANT_GROUPS = 14; -static const int MAX_GR_ORDER_RESIDUAL = 10; static const int AFFINE_MIN_BLOCK_SIZE = 4; ///< Minimum affine MC block size - static const int MMVD_REFINE_STEP = 8; ///< max number of distance step static const int MMVD_MAX_REFINE_NUM = (MMVD_REFINE_STEP * 4); ///< max number of candidate from a base candidate static const int MMVD_BASE_MV_NUM = 2; ///< max number of base candidate @@ -308,20 +340,23 @@ static const int MMVD_ADD_NUM = (MMVD_MAX_RE static const int MMVD_MRG_MAX_RD_NUM = MRG_MAX_NUM_CANDS; static const int MMVD_MRG_MAX_RD_BUF_NUM = (MMVD_MRG_MAX_RD_NUM + 1);///< increase buffer size by 1 -static const int MAX_NUM_REG_BINS_4x4SUBBLOCK = 32; ///< max number of context-coded bins (incl. gt2 bins) per 4x4 subblock -static const int MAX_NUM_GT2_BINS_4x4SUBBLOCK = 4; ///< max number of gt2 bins per 4x4 subblock -static const int MAX_NUM_REG_BINS_2x2SUBBLOCK = 8; ///< max number of context-coded bins (incl. gt2 bins) per 2x2 subblock (chroma) -static const int MAX_NUM_GT2_BINS_2x2SUBBLOCK = 2; ///< max number of gt2 bins per 2x2 subblock (chroma) +static const int MAX_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT_LUMA = 28; +static const int MAX_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT_CHROMA = 28; static const int BIO_EXTEND_SIZE = 1; static const int BIO_TEMP_BUFFER_SIZE = (MAX_CU_SIZE + 2 * BIO_EXTEND_SIZE) * (MAX_CU_SIZE + 2 * BIO_EXTEND_SIZE); -static const int GBI_NUM = 5; ///< the number of weight options -static const int GBI_DEFAULT = ((uint8_t)(GBI_NUM >> 1)); ///< Default weighting index representing for w=0.5 -static const int GBI_SIZE_CONSTRAINT = 256; ///< disabling GBi if cu size is smaller than 256 +static const int PROF_BORDER_EXT_W = 1; +static const int PROF_BORDER_EXT_H = 1; +static const int BCW_NUM = 5; ///< the number of weight options +static const int BCW_DEFAULT = ((uint8_t)(BCW_NUM >> 1)); ///< Default weighting index representing for w=0.5 +static const int BCW_SIZE_CONSTRAINT = 256; ///< disabling Bcw if cu size is smaller than 256 static const int MAX_NUM_HMVP_CANDS = (MRG_MAX_NUM_CANDS-1); ///< maximum number of HMVP candidates to be stored and used in merge list static const int MAX_NUM_HMVP_AVMPCANDS = 4; ///< maximum number of HMVP candidates to be used in AMVP list +static const int ALF_VB_POS_ABOVE_CTUROW_LUMA = 4; +static const int ALF_VB_POS_ABOVE_CTUROW_CHMA = 2; + #if W0038_DB_OPT static const int MAX_ENCODER_DEBLOCKING_QUALITY_LAYERS = 8 ; #endif @@ -341,25 +376,20 @@ static const int DMVR_NUM_ITERATION = 2; //for I slice luma CTB configuration para. static const int MAX_BT_DEPTH = 4; ///< <=7 static const int MAX_BT_SIZE = 32; ///< [1<<MIN_QT_SIZE, 1<<CTU_LOG2] -static const int MIN_BT_SIZE = 4; ///< can be set down to 1<<MIN_CU_LOG2 static const int MAX_TT_SIZE = 32; ///< [1<<MIN_QT_SIZE, 1<<CTU_LOG2] static const int MAX_TT_SIZE_C = 32; ///< [1<<MIN_QT_SIZE, 1<<CTU_LOG2] -static const int MIN_TT_SIZE = 4; ///< can be set down to 1<<MIN_CU_LOG2 -static const int MIN_TT_SIZE_C = 4; ///< can be set down to 1<<MIN_CU_LOG2 //for P/B slice CTU config. para. static const int MAX_BT_DEPTH_INTER = 4; ///< <=7 static const int MAX_BT_SIZE_INTER = 128; ///< for initialization, [1<<MIN_BT_SIZE_INTER, 1<<CTU_LOG2] -static const int MIN_BT_SIZE_INTER = 4; ///< //for I slice chroma CTB configuration para. (in luma samples) static const int MAX_BT_DEPTH_C = 0; ///< <=7 static const int MAX_BT_SIZE_C = 64; ///< [1<<MIN_QT_SIZE_C, 1<<CTU_LOG2], in luma samples -static const int MIN_BT_SIZE_C = 4; ///< can be set down to 4, in luma samples static const int MAX_TT_SIZE_INTER = 64; ///< for initialization, [1<<MIN_CU_LOG2, 64] -static const int MIN_TT_SIZE_INTER = 4; ///< - +static const int MIN_DUALTREE_CHROMA_WIDTH = 4; +static const int MIN_DUALTREE_CHROMA_SIZE = 16; static const SplitSeries SPLIT_BITS = 5; static const SplitSeries SPLIT_DMULT = 5; static const SplitSeries SPLIT_MASK = 31; ///< = (1 << SPLIT_BITS) - 1 @@ -382,11 +412,6 @@ static const int MAX_TESTED_QPs = ( 1 + 1 + ( MAX_DELTA_QP << 1 ) ); ///< static const int COM16_C806_TRANS_PREC = 0; -static const int NUM_MERGE_IDX_EXT_CTX = 5; -static const unsigned E0104_ALF_MAX_TEMPLAYERID = 5; // define to zero to switch of code -static const unsigned C806_ALF_TEMPPRED_NUM = 6; - - static const int NTAPS_LUMA = 8; ///< Number of taps for luma static const int NTAPS_CHROMA = 4; ///< Number of taps for chroma #if LUMA_ADAPTIVE_DEBLOCKING_FILTER_QP_OFFSET @@ -396,9 +421,9 @@ static const int MAX_LADF_INTERVALS = 5; /// max n static const int NTAPS_BILINEAR = 2; ///< Number of taps for bilinear filter static const int ATMVP_SUB_BLOCK_SIZE = 3; ///< sub-block size for ATMVP -static const int TRIANGLE_MAX_NUM_UNI_CANDS = 5; +static const int TRIANGLE_MAX_NUM_UNI_CANDS = 6; static const int TRIANGLE_MAX_NUM_CANDS_MEM = 7; -static const int TRIANGLE_MAX_NUM_CANDS = 40; +static const int TRIANGLE_MAX_NUM_CANDS = TRIANGLE_MAX_NUM_UNI_CANDS * (TRIANGLE_MAX_NUM_UNI_CANDS - 1) * 2; static const int TRIANGLE_MAX_NUM_SATD_CANDS = 3; static const int TRIANGLE_MIN_SIZE = 8 * 8; @@ -406,6 +431,11 @@ static const int SBT_MAX_SIZE = 64; ///< maxi static const int SBT_NUM_SL = 10; ///< maximum number of historical PU decision saved for a CU static const int SBT_NUM_RDO = 2; ///< maximum number of SBT mode tried for a PU +static const int NUM_INTER_CU_INFO_SAVE = 8; ///< maximum number of inter cu information saved for fast algorithm +static const int LDT_MODE_TYPE_INHERIT = 0; ///< No need to signal mode_constraint_flag, and the modeType of the region is inherited from its parent node +static const int LDT_MODE_TYPE_INFER = 1; ///< No need to signal mode_constraint_flag, and the modeType of the region is inferred as MODE_TYPE_INTRA +static const int LDT_MODE_TYPE_SIGNAL = 2; ///< Need to signal mode_constraint_flag, and the modeType of the region is determined by the flag + static const int IBC_MAX_CAND_SIZE = 16; // max block size for ibc search static const int IBC_NUM_CANDIDATES = 64; ///< Maximum number of candidates to store/test static const int CHROMA_REFINEMENT_CANDIDATES = 8; /// 8 candidates BV to choose from @@ -417,10 +447,41 @@ static constexpr int MV_MANTISSA_BITCOUNT = 6; static constexpr int MV_MANTISSA_UPPER_LIMIT = ((1 << (MV_MANTISSA_BITCOUNT - 1)) - 1); static constexpr int MV_MANTISSA_LIMIT = (1 << (MV_MANTISSA_BITCOUNT - 1)); static constexpr int MV_EXPONENT_MASK = ((1 << MV_EXPONENT_BITCOUNT) - 1); + +static constexpr int MV_BITS = 18; +static constexpr int MV_MAX = (1 << (MV_BITS - 1)) - 1; +static constexpr int MV_MIN = -(1 << (MV_BITS - 1)); + +static const int MVD_MAX = (1 << 17) - 1; +static const int MVD_MIN = -(1 << 17); + static const int PIC_ANALYZE_CW_BINS = 32; static const int PIC_CODE_CW_BINS = 16; -static const int FP_PREC = 14; +static const int LMCS_SEG_NUM = 32; +static const int FP_PREC = 11; static const int CSCALE_FP_PREC = 11; +static const int NEIG_NUM_LOG = 6; +static const int NEIG_NUM = 1 << NEIG_NUM_LOG; +static const int LOG2_PALETTE_CG_SIZE = 4; +static const int RUN_IDX_THRE = 4; +static const int MAX_CU_BLKSIZE_PLT = 64; +static const int NUM_TRELLIS_STATE = 3; +static const double ENC_CHROMA_WEIGHTING = 0.8; +static const int MAXPLTPREDSIZE = 63; +static const int MAXPLTSIZE = 31; +static const double PLT_CHROMA_WEIGHTING = 0.8; +static const int PLT_ENCBITDEPTH = 8; +static const int PLT_FAST_RATIO = 100; +#if RExt__DECODER_DEBUG_TOOL_MAX_FRAME_STATS +static const int EPBIN_WEIGHT_FACTOR = 4; +#endif +static const int ENC_PPS_ID_RPR = 3; +static const int SCALE_RATIO_BITS = 14; +static const int MAX_SCALING_RATIO = 2; // max downsampling ratio for RPR +static const std::pair<int, int> SCALE_1X = std::pair<int, int>( 1 << SCALE_RATIO_BITS, 1 << SCALE_RATIO_BITS ); // scale ratio 1x +static const int DELTA_QP_FOR_Y_Cg = -5; +static const int DELTA_QP_FOR_Co = -3; + // ==================================================================================================================== // Macro functions // ==================================================================================================================== @@ -566,10 +627,16 @@ static inline int floorLog2(uint32_t x) { if (x == 0) { + // note: ceilLog2() expects -1 as return value return -1; } #ifdef __GNUC__ return 31 - __builtin_clz(x); +#else +#ifdef _MSC_VER + unsigned long r = 0; + _BitScanReverse(&r, x); + return r; #else int result = 0; if (x & 0xffff0000) @@ -599,8 +666,15 @@ static inline int floorLog2(uint32_t x) } return result; #endif +#endif } +static inline int ceilLog2(uint32_t x) +{ + return (x==0) ? -1 : floorLog2(x - 1) + 1; +} + + //CASE-BREAK for breakpoints #if defined ( _MSC_VER ) && defined ( _DEBUG ) #define _CASE(_x) if(_x) @@ -616,7 +690,7 @@ static inline int floorLog2(uint32_t x) #define _UNIT_AREA_AT(_a,_x,_y,_w,_h) #endif -#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM +#if ENABLE_SPLIT_PARALLELISM #include <omp.h> #define PARL_PARAM(DEF) , DEF diff --git a/source/Lib/CommonLib/ContextModelling.cpp b/source/Lib/CommonLib/ContextModelling.cpp index 6e64f1b2f727f234d743dbe65ea691f1eb553004..dd8f0d97f69cfb07adc86d77787a93377a91f2b2 100644 --- a/source/Lib/CommonLib/ContextModelling.cpp +++ b/source/Lib/CommonLib/ContextModelling.cpp @@ -41,35 +41,25 @@ #include "Picture.h" -#if HEVC_USE_SIGN_HIDING -CoeffCodingContext::CoeffCodingContext(const TransformUnit& tu, ComponentID component, bool signHide) -#else -CoeffCodingContext::CoeffCodingContext(const TransformUnit& tu, ComponentID component ) -#endif +CoeffCodingContext::CoeffCodingContext( const TransformUnit& tu, ComponentID component, bool signHide, bool bdpcm ) : m_compID (component) , m_chType (toChannelType(m_compID)) , m_width (tu.block(m_compID).width) , m_height (tu.block(m_compID).height) - , m_log2CGWidth ( g_log2SbbSize[m_chType][ g_aucLog2[m_width] ][ g_aucLog2[m_height] ][0] ) - , m_log2CGHeight ( g_log2SbbSize[m_chType][ g_aucLog2[m_width] ][ g_aucLog2[m_height] ][1] ) + , m_log2CGWidth ( g_log2SbbSize[ floorLog2(m_width) ][ floorLog2(m_height) ][0] ) + , m_log2CGHeight ( g_log2SbbSize[ floorLog2(m_width) ][ floorLog2(m_height) ][1] ) , m_log2CGSize (m_log2CGWidth + m_log2CGHeight) , m_widthInGroups(std::min<unsigned>(JVET_C0024_ZERO_OUT_TH, m_width) >> m_log2CGWidth) , m_heightInGroups(std::min<unsigned>(JVET_C0024_ZERO_OUT_TH, m_height) >> m_log2CGHeight) - , m_log2BlockWidth (g_aucLog2[m_width]) - , m_log2BlockHeight (g_aucLog2[m_height]) + , m_log2BlockWidth ((unsigned)floorLog2(m_width)) + , m_log2BlockHeight ((unsigned)floorLog2(m_height)) , m_maxNumCoeff (m_width * m_height) -#if HEVC_USE_SIGN_HIDING , m_signHiding (signHide) -#endif , m_extendedPrecision (tu.cs->sps->getSpsRangeExtension().getExtendedPrecisionProcessingFlag()) , m_maxLog2TrDynamicRange (tu.cs->sps->getMaxLog2TrDynamicRange(m_chType)) -#if HEVC_USE_MDCS - , m_scanType (CoeffScanType(TU::getCoefScanIdx( tu, m_compID))) -#else , m_scanType (SCAN_DIAG) -#endif - , m_scan (g_scanOrder [m_chType][SCAN_GROUPED_4x4][m_scanType][gp_sizeIdxInfo->idxFrom(m_width )][gp_sizeIdxInfo->idxFrom(m_height )]) - , m_scanCG (g_scanOrder [m_chType][SCAN_UNGROUPED ][m_scanType][gp_sizeIdxInfo->idxFrom(m_widthInGroups)][gp_sizeIdxInfo->idxFrom(m_heightInGroups)]) + , m_scan (g_scanOrder [SCAN_GROUPED_4x4][m_scanType][gp_sizeIdxInfo->idxFrom(m_width )][gp_sizeIdxInfo->idxFrom(m_height )]) + , m_scanCG (g_scanOrder [SCAN_UNGROUPED ][m_scanType][gp_sizeIdxInfo->idxFrom(m_widthInGroups)][gp_sizeIdxInfo->idxFrom(m_heightInGroups)]) , m_CtxSetLastX (Ctx::LastX[m_chType]) , m_CtxSetLastY (Ctx::LastY[m_chType]) , m_maxLastPosX(g_uiGroupIdx[std::min<unsigned>(JVET_C0024_ZERO_OUT_TH, m_width) - 1]) @@ -78,7 +68,11 @@ CoeffCodingContext::CoeffCodingContext(const TransformUnit& tu, ComponentID comp , m_lastOffsetY (0) , m_lastShiftX (0) , m_lastShiftY (0) - , m_TrafoBypass (tu.cs->sps->getSpsRangeExtension().getTransformSkipContextEnabledFlag() && (tu.cu->transQuantBypass || tu.mtsIdx==1)) +#if JVET_P0058_CHROMA_TS + , m_TrafoBypass (tu.cs->sps->getSpsRangeExtension().getTransformSkipContextEnabledFlag() && (tu.cu->transQuantBypass || tu.mtsIdx[m_compID] == MTS_SKIP)) +#else + , m_TrafoBypass (tu.cs->sps->getSpsRangeExtension().getTransformSkipContextEnabledFlag() && (tu.cu->transQuantBypass || tu.mtsIdx==MTS_SKIP)) +#endif , m_scanPosLast (-1) , m_subSetId (-1) , m_subSetPos (-1) @@ -92,27 +86,22 @@ CoeffCodingContext::CoeffCodingContext(const TransformUnit& tu, ComponentID comp , m_sigFlagCtxSet { Ctx::SigFlag[m_chType], Ctx::SigFlag[m_chType+2], Ctx::SigFlag[m_chType+4] } , m_parFlagCtxSet ( Ctx::ParFlag[m_chType] ) , m_gtxFlagCtxSet { Ctx::GtxFlag[m_chType], Ctx::GtxFlag[m_chType+2] } + , m_sigGroupCtxIdTS (-1) + , m_tsSigFlagCtxSet ( Ctx::TsSigFlag ) + , m_tsParFlagCtxSet ( Ctx::TsParFlag ) + , m_tsGtxFlagCtxSet ( Ctx::TsGtxFlag ) + , m_tsLrg1FlagCtxSet (Ctx::TsLrg1Flag) + , m_tsSignFlagCtxSet (Ctx::TsResidualSign) , m_sigCoeffGroupFlag () + , m_bdpcm (bdpcm) { // LOGTODO unsigned log2sizeX = m_log2BlockWidth; unsigned log2sizeY = m_log2BlockHeight; -#if HEVC_USE_MDCS - if (m_scanType == SCAN_VER) - { - std::swap(log2sizeX, log2sizeY); - std::swap(const_cast<unsigned&>(m_maxLastPosX), const_cast<unsigned&>(m_maxLastPosY)); - } -#endif if (m_chType == CHANNEL_TYPE_CHROMA) { -#if HEVC_USE_MDCS - const_cast<int&>(m_lastShiftX) = Clip3( 0, 2, int( ( m_scanType == SCAN_VER ? m_height : m_width ) >> 3) ); - const_cast<int&>(m_lastShiftY) = Clip3( 0, 2, int( ( m_scanType == SCAN_VER ? m_width : m_height ) >> 3) ); -#else const_cast<int&>(m_lastShiftX) = Clip3( 0, 2, int( m_width >> 3) ); const_cast<int&>(m_lastShiftY) = Clip3( 0, 2, int( m_height >> 3) ); -#endif } else { @@ -141,33 +130,38 @@ void CoeffCodingContext::initSubblock( int SubsetId, bool sigGroupFlag ) unsigned sigRight = unsigned( ( CGPosX + 1 ) < m_widthInGroups ? m_sigCoeffGroupFlag[ m_subSetPos + 1 ] : false ); unsigned sigLower = unsigned( ( CGPosY + 1 ) < m_heightInGroups ? m_sigCoeffGroupFlag[ m_subSetPos + m_widthInGroups ] : false ); m_sigGroupCtxId = Ctx::SigCoeffGroup[m_chType]( sigRight | sigLower ); + unsigned sigLeft = unsigned( CGPosX > 0 ? m_sigCoeffGroupFlag[m_subSetPos - 1 ] : false ); + unsigned sigAbove = unsigned( CGPosY > 0 ? m_sigCoeffGroupFlag[m_subSetPos - m_widthInGroups] : false ); + m_sigGroupCtxIdTS = Ctx::TsSigCoeffGroup( sigLeft + sigAbove ); } +unsigned DeriveCtx::CtxModeConsFlag( const CodingStructure& cs, Partitioner& partitioner ) +{ + assert( partitioner.chType == CHANNEL_TYPE_LUMA ); + const Position pos = partitioner.currArea().blocks[partitioner.chType]; + const unsigned curSliceIdx = cs.slice->getIndependentSliceIdx(); + const unsigned curTileIdx = cs.picture->brickMap->getBrickIdxRsMap( partitioner.currArea().lumaPos() ); + const CodingUnit* cuLeft = cs.getCURestricted( pos.offset( -1, 0 ), pos, curSliceIdx, curTileIdx, partitioner.chType ); + const CodingUnit* cuAbove = cs.getCURestricted( pos.offset( 0, -1 ), pos, curSliceIdx, curTileIdx, partitioner.chType ); + + unsigned ctxId = ((cuAbove && cuAbove->predMode == MODE_INTRA) || (cuLeft && cuLeft->predMode == MODE_INTRA)) ? 1 : 0; + return ctxId; +} void DeriveCtx::CtxSplit( const CodingStructure& cs, Partitioner& partitioner, unsigned& ctxSpl, unsigned& ctxQt, unsigned& ctxHv, unsigned& ctxHorBt, unsigned& ctxVerBt, bool* _canSplit /*= nullptr */ ) { const Position pos = partitioner.currArea().blocks[partitioner.chType]; const unsigned curSliceIdx = cs.slice->getIndependentSliceIdx(); -#if HEVC_TILES_WPP - const unsigned curTileIdx = cs.picture->tileMap->getTileIdxMap( partitioner.currArea().lumaPos() ); -#endif + const unsigned curTileIdx = cs.picture->brickMap->getBrickIdxRsMap( partitioner.currArea().lumaPos() ); // get left depth -#if HEVC_TILES_WPP - const CodingUnit* cuLeft = cs.getCURestricted( pos.offset( -1, 0 ), curSliceIdx, curTileIdx, partitioner.chType ); -#else - const CodingUnit* cuLeft = cs.getCURestricted( pos.offset( -1, 0 ), curSliceIdx, partitioner.chType ); -#endif + const CodingUnit* cuLeft = cs.getCURestricted( pos.offset( -1, 0 ), pos, curSliceIdx, curTileIdx, partitioner.chType ); // get above depth -#if HEVC_TILES_WPP - const CodingUnit* cuAbove = cs.getCURestricted( pos.offset( 0, -1 ), curSliceIdx, curTileIdx, partitioner.chType ); -#else - const CodingUnit* cuAbove = cs.getCURestricted( pos.offset( 0, -1 ), curSliceIdx, partitioner.chType ); -#endif + const CodingUnit* cuAbove = cs.getCURestricted( pos.offset( 0, -1 ), pos, curSliceIdx, curTileIdx, partitioner.chType ); bool canSplit[6]; @@ -255,29 +249,22 @@ void DeriveCtx::CtxSplit( const CodingStructure& cs, Partitioner& partitioner, u ctxVerBt = ( partitioner.currMtDepth <= 1 ? 3 : 2 ); } -unsigned DeriveCtx::CtxQtCbf( const ComponentID compID, const unsigned trDepth, const bool prevCbCbf, const int ispIdx ) +unsigned DeriveCtx::CtxQtCbf( const ComponentID compID, const bool prevCbf, const int ispIdx ) { if( ispIdx && isLuma( compID ) ) { - return 2 + (int)prevCbCbf; + return 2 + (int)prevCbf; } if( compID == COMPONENT_Cr ) { - return ( prevCbCbf ? 1 : 0 ); - } - if( isChroma( compID ) ) - { - return trDepth; - } - else - { - return ( trDepth == 0 ? 1 : 0 ); + return ( prevCbf ? 1 : 0 ); } + return 0; } unsigned DeriveCtx::CtxInterDir( const PredictionUnit& pu ) { - return Clip3( 0, 3, 7 - ( ( g_aucLog2[pu.lumaSize().width] + g_aucLog2[pu.lumaSize().height] + 1 ) >> 1 ) ); // VG-ASYMM DONE + return ( 7 - ((floorLog2(pu.lumaSize().width) + floorLog2(pu.lumaSize().height) + 1) >> 1) ); } unsigned DeriveCtx::CtxAffineFlag( const CodingUnit& cu ) @@ -310,35 +297,6 @@ unsigned DeriveCtx::CtxSkipFlag( const CodingUnit& cu ) } -unsigned DeriveCtx::CtxIMVFlag( const CodingUnit& cu ) -{ - const CodingStructure *cs = cu.cs; - unsigned ctxId = 0; - - // Get BCBP of left PU - const CodingUnit *cuLeft = cs->getCURestricted( cu.lumaPos().offset( -1, 0 ), cu, CH_L ); - ctxId = ( cuLeft && cuLeft->imv ) ? 1 : 0; - - // Get BCBP of above PU - const CodingUnit *cuAbove = cs->getCURestricted( cu.lumaPos().offset( 0, -1 ), cu, CH_L ); - ctxId += ( cuAbove && cuAbove->imv ) ? 1 : 0; - - return ctxId; -} - -unsigned DeriveCtx::CtxTriangleFlag( const CodingUnit& cu ) -{ - const CodingStructure *cs = cu.cs; - unsigned ctxId = 0; - - const CodingUnit *cuLeft = cs->getCURestricted( cu.lumaPos().offset( -1, 0 ), cu, CH_L ); - ctxId = ( cuLeft && cuLeft->triangle ) ? 1 : 0; - - const CodingUnit *cuAbove = cs->getCURestricted( cu.lumaPos().offset( 0, -1 ), cu, CH_L ); - ctxId += ( cuAbove && cuAbove->triangle ) ? 1 : 0; - - return ctxId; -} unsigned DeriveCtx::CtxPredModeFlag( const CodingUnit& cu ) { @@ -354,10 +312,11 @@ unsigned DeriveCtx::CtxIBCFlag(const CodingUnit& cu) { const CodingStructure *cs = cu.cs; unsigned ctxId = 0; - const CodingUnit *cuLeft = cs->getCURestricted(cu.lumaPos().offset(-1, 0), cu, CH_L); + const Position pos = cu.chType == CHANNEL_TYPE_CHROMA ? cu.chromaPos() : cu.lumaPos(); + const CodingUnit *cuLeft = cs->getCURestricted(pos.offset(-1, 0), cu, cu.chType); ctxId += (cuLeft && CU::isIBC(*cuLeft)) ? 1 : 0; - const CodingUnit *cuAbove = cs->getCURestricted(cu.lumaPos().offset(0, -1), cu, CH_L); + const CodingUnit *cuAbove = cs->getCURestricted(pos.offset(0, -1), cu, cu.chType); ctxId += (cuAbove && CU::isIBC(*cuAbove)) ? 1 : 0; return ctxId; } @@ -365,10 +324,11 @@ unsigned DeriveCtx::CtxIBCFlag(const CodingUnit& cu) void MergeCtx::setMergeInfo( PredictionUnit& pu, int candIdx ) { CHECK( candIdx >= numValidMergeCand, "Merge candidate does not exist" ); - + pu.regularMergeFlag = !(pu.mhIntraFlag || pu.cu->triangle); pu.mergeFlag = true; pu.mmvdMergeFlag = false; pu.interDir = interDirNeighbours[candIdx]; + pu.cu->imv = (!pu.cu->triangle && useAltHpelIf[candIdx]) ? IMV_HPEL : 0; pu.mergeIdx = candIdx; pu.mergeType = mrgTypeNeighbours[candIdx]; pu.mv [REF_PIC_LIST_0] = mvFieldNeighbours[(candIdx << 1) + 0].mv; @@ -385,6 +345,7 @@ void MergeCtx::setMergeInfo( PredictionUnit& pu, int candIdx ) { pu.bv = pu.mv[REF_PIC_LIST_0]; pu.bv.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_INT); // used for only integer resolution + pu.cu->imv = pu.cu->imv == IMV_HPEL ? 0 : pu.cu->imv; } pu.cu->GBiIdx = ( interDirNeighbours[candIdx] == 3 ) ? GBiIdx[candIdx] : GBI_DEFAULT; @@ -447,10 +408,9 @@ void MergeCtx::setMmvdMergeCandiInfo(PredictionUnit& pu, int candIdx) { const int scale = PU::getDistScaleFactor(currPoc, poc0, currPoc, poc1); tempMv[1] = tempMv[0]; -#if MMVD_LTRP - const bool bIsL0RefLongTerm = slice.getRefPic(REF_PIC_LIST_0, refList0)->longTerm; - const bool bIsL1RefLongTerm = slice.getRefPic(REF_PIC_LIST_1, refList1)->longTerm; - if (bIsL0RefLongTerm || bIsL1RefLongTerm) + const bool isL0RefLongTerm = slice.getRefPic(REF_PIC_LIST_0, refList0)->longTerm; + const bool isL1RefLongTerm = slice.getRefPic(REF_PIC_LIST_1, refList1)->longTerm; + if (isL0RefLongTerm || isL1RefLongTerm) { if ((poc1 - currPoc)*(poc0 - currPoc) > 0) { @@ -462,16 +422,14 @@ void MergeCtx::setMmvdMergeCandiInfo(PredictionUnit& pu, int candIdx) } } else -#endif tempMv[0] = tempMv[1].scaleMv(scale); } else { const int scale = PU::getDistScaleFactor(currPoc, poc1, currPoc, poc0); -#if MMVD_LTRP - const bool bIsL0RefLongTerm = slice.getRefPic(REF_PIC_LIST_0, refList0)->longTerm; - const bool bIsL1RefLongTerm = slice.getRefPic(REF_PIC_LIST_1, refList1)->longTerm; - if (bIsL0RefLongTerm || bIsL1RefLongTerm) + const bool isL0RefLongTerm = slice.getRefPic(REF_PIC_LIST_0, refList0)->longTerm; + const bool isL1RefLongTerm = slice.getRefPic(REF_PIC_LIST_1, refList1)->longTerm; + if (isL0RefLongTerm || isL1RefLongTerm) { if ((poc1 - currPoc)*(poc0 - currPoc) > 0) { @@ -483,7 +441,6 @@ void MergeCtx::setMmvdMergeCandiInfo(PredictionUnit& pu, int candIdx) } } else -#endif tempMv[1] = tempMv[0].scaleMv(scale); } @@ -545,6 +502,7 @@ void MergeCtx::setMmvdMergeCandiInfo(PredictionUnit& pu, int candIdx) pu.mmvdMergeFlag = true; pu.mmvdMergeIdx = candIdx; pu.mergeFlag = true; + pu.regularMergeFlag = true; pu.mergeIdx = candIdx; pu.mergeType = MRG_TYPE_DEFAULT_N; pu.mvd[REF_PIC_LIST_0] = Mv(); @@ -553,8 +511,50 @@ void MergeCtx::setMmvdMergeCandiInfo(PredictionUnit& pu, int candIdx) pu.mvpIdx[REF_PIC_LIST_1] = NOT_VALID; pu.mvpNum[REF_PIC_LIST_0] = NOT_VALID; pu.mvpNum[REF_PIC_LIST_1] = NOT_VALID; + pu.cu->imv = mmvdUseAltHpelIf[fPosBaseIdx] ? IMV_HPEL : 0; pu.cu->GBiIdx = (interDirNeighbours[fPosBaseIdx] == 3) ? GBiIdx[fPosBaseIdx] : GBI_DEFAULT; + for (int refList = 0; refList < 2; refList++) + { + if (pu.refIdx[refList] >= 0) + { + pu.mv[refList].clipToStorageBitDepth(); + } + } + + PU::restrictBiPredMergeCandsOne(pu); } + +unsigned DeriveCtx::CtxMipFlag( const CodingUnit& cu ) +{ + const CodingStructure *cs = cu.cs; + unsigned ctxId = 0; + + const CodingUnit *cuLeft = cs->getCURestricted( cu.lumaPos().offset( -1, 0 ), cu, CH_L ); + ctxId = (cuLeft && cuLeft->mipFlag) ? 1 : 0; + + const CodingUnit *cuAbove = cs->getCURestricted( cu.lumaPos().offset( 0, -1 ), cu, CH_L ); + ctxId += (cuAbove && cuAbove->mipFlag) ? 1 : 0; + + ctxId = (cu.lwidth() > 2*cu.lheight() || cu.lheight() > 2*cu.lwidth()) ? 3 : ctxId; + + return ctxId; +} + +#if JVET_P0077_LINE_CG_PALETTE +unsigned DeriveCtx::CtxPltCopyFlag( const unsigned prevRunType, const unsigned dist ) +{ + uint8_t *ucCtxLut = (prevRunType == PLT_RUN_INDEX) ? g_paletteRunLeftLut : g_paletteRunTopLut; + if ( dist <= RUN_IDX_THRE ) + { + return ucCtxLut[dist]; + } + else + { + return ucCtxLut[RUN_IDX_THRE]; + } +} +#endif + diff --git a/source/Lib/CommonLib/ContextModelling.h b/source/Lib/CommonLib/ContextModelling.h index ea58093d5ba0b6ec9b246dfa93f22fb8dbc7754a..ef1df8aed4967d581822b1729a85e5ddab2f853e 100644 --- a/source/Lib/CommonLib/ContextModelling.h +++ b/source/Lib/CommonLib/ContextModelling.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -51,16 +51,16 @@ struct CoeffCodingContext { public: -#if HEVC_USE_SIGN_HIDING - CoeffCodingContext( const TransformUnit& tu, ComponentID component, bool signHide); -#else - CoeffCodingContext( const TransformUnit& tu, ComponentID component ); -#endif + CoeffCodingContext( const TransformUnit& tu, ComponentID component, bool signHide, bool bdpcm = false ); public: void initSubblock ( int SubsetId, bool sigGroupFlag = false ); public: void resetSigGroup () { m_sigCoeffGroupFlag.reset( m_subSetPos ); } void setSigGroup () { m_sigCoeffGroupFlag.set( m_subSetPos ); } + bool noneSigGroup () { return m_sigCoeffGroupFlag.none(); } + int lastSubSet () { return ( maxNumCoeff() - 1 ) >> log2CGSize(); } + bool isLastSubSet () { return lastSubSet() == m_subSetId; } + bool only1stSigGroup () { return m_sigCoeffGroupFlag.count()-m_sigCoeffGroupFlag[lastSubSet()]==0; } void setScanPosLast ( int posLast ) { m_scanPosLast = posLast; } public: ComponentID compID () const { return m_compID; } @@ -83,11 +83,9 @@ public: bool isNotFirst () const { return ( m_subSetId != 0 ); } bool isSigGroup(int scanPosCG) const { return m_sigCoeffGroupFlag[m_scanCG[scanPosCG].idx]; } bool isSigGroup () const { return m_sigCoeffGroupFlag[ m_subSetPos ]; } -#if HEVC_USE_SIGN_HIDING bool signHiding () const { return m_signHiding; } bool hideSign ( int posFirst, int posLast ) const { return ( m_signHiding && ( posLast - posFirst >= SBH_THRESHOLD ) ); } -#endif CoeffScanType scanType () const { return m_scanType; } unsigned blockPos(int scanPos) const { return m_scan[scanPos].idx; } unsigned posX(int scanPos) const { return m_scan[scanPos].x; } @@ -96,7 +94,13 @@ public: unsigned maxLastPosY () const { return m_maxLastPosY; } unsigned lastXCtxId ( unsigned posLastX ) const { return m_CtxSetLastX( m_lastOffsetX + ( posLastX >> m_lastShiftX ) ); } unsigned lastYCtxId ( unsigned posLastY ) const { return m_CtxSetLastY( m_lastOffsetY + ( posLastY >> m_lastShiftY ) ); } - unsigned sigGroupCtxId () const { return m_sigGroupCtxId; } + int numCtxBins () const { return m_remainingContextBins; } + void setNumCtxBins ( int n ) { m_remainingContextBins = n; } + unsigned sigGroupCtxId ( bool ts = false ) const { return ts ? m_sigGroupCtxIdTS : m_sigGroupCtxId; } + bool bdpcm () const { return m_bdpcm; } + + void decimateNumCtxBins(int n) { m_remainingContextBins -= n; } + void increaseNumCtxBins(int n) { m_remainingContextBins += n; } unsigned sigCtxIdAbs( int scanPos, const TCoeff* coeff, const int state ) { @@ -128,11 +132,15 @@ public: } } #undef UPDATE - int ctxOfs = std::min( sumAbs, 5 ) + ( diag < 2 ? 6 : 0 ); + + + int ctxOfs = std::min((sumAbs+1)>>1, 3) + ( diag < 2 ? 4 : 0 ); + if( m_chType == CHANNEL_TYPE_LUMA ) { - ctxOfs += diag < 5 ? 6 : 0; + ctxOfs += diag < 5 ? 4 : 0; } + m_tmplCpDiag = diag; m_tmplCpSum1 = sumAbs - numPos; return m_sigFlagCtxSet[std::max( 0, state-1 )]( ctxOfs ); @@ -152,8 +160,7 @@ public: unsigned parityCtxIdAbs ( uint8_t offset ) const { return m_parFlagCtxSet ( offset ); } unsigned greater1CtxIdAbs ( uint8_t offset ) const { return m_gtxFlagCtxSet[1]( offset ); } unsigned greater2CtxIdAbs ( uint8_t offset ) const { return m_gtxFlagCtxSet[0]( offset ); } - - unsigned templateAbsSum( int scanPos, const TCoeff* coeff ) + unsigned templateAbsSum( int scanPos, const TCoeff* coeff, int baseLevel ) { const uint32_t posY = m_scan[scanPos].y; const uint32_t posX = m_scan[scanPos].x; @@ -179,9 +186,170 @@ public: sum += abs(pData[m_width << 1]); } } - return std::min(sum, 31); + return std::max(std::min(sum - 5 * baseLevel, 31), 0); + } + + unsigned sigCtxIdAbsTS( int scanPos, const TCoeff* coeff ) + { + const uint32_t posY = m_scan[scanPos].y; + const uint32_t posX = m_scan[scanPos].x; + const TCoeff* posC = coeff + posX + posY * m_width; + int numPos = 0; +#define UPDATE(x) {int a=abs(x);numPos+=!!a;} + if( posX > 0 ) + { + UPDATE( posC[-1] ); + } + if( posY > 0 ) + { + UPDATE( posC[-(int)m_width] ); + } +#undef UPDATE + + return m_tsSigFlagCtxSet( numPos ); + } + + unsigned parityCtxIdAbsTS () const { return m_tsParFlagCtxSet( 0 ); } + unsigned greaterXCtxIdAbsTS ( uint8_t offset ) const { return m_tsGtxFlagCtxSet( offset ); } + + unsigned lrg1CtxIdAbsTS(int scanPos, const TCoeff* coeff, int bdpcm) + { + const uint32_t posY = m_scan[scanPos].y; + const uint32_t posX = m_scan[scanPos].x; + const TCoeff* posC = coeff + posX + posY * m_width; + + int numPos = 0; +#define UPDATE(x) {int a=abs(x);numPos+=!!a;} + + if (bdpcm) + { + numPos = 3; + } + else + { + if (posX > 0) + { + UPDATE(posC[-1]); + } + if (posY > 0) + { + UPDATE(posC[-(int)m_width]); + } + } + +#undef UPDATE + return m_tsLrg1FlagCtxSet(numPos); + } + + unsigned signCtxIdAbsTS(int scanPos, const TCoeff* coeff, int bdpcm) + { + const uint32_t posY = m_scan[scanPos].y; + const uint32_t posX = m_scan[scanPos].x; + const TCoeff* pData = coeff + posX + posY * m_width; + + int rightSign = 0, belowSign = 0; + unsigned signCtx = 0; + + if (posX > 0) + { + rightSign = pData[-1]; + } + if (posY > 0) + { + belowSign = pData[-(int)m_width]; + } + + if ((rightSign == 0 && belowSign == 0) || ((rightSign*belowSign) < 0)) + { + signCtx = 0; + } + else if (rightSign >= 0 && belowSign >= 0) + { + signCtx = 1; + } + else + { + signCtx = 2; + } + if (bdpcm) + { + signCtx += 3; + } + return m_tsSignFlagCtxSet(signCtx); + } + + void neighTS(int &rightPixel, int &belowPixel, int scanPos, const TCoeff* coeff) + { + const uint32_t posY = m_scan[scanPos].y; + const uint32_t posX = m_scan[scanPos].x; + const TCoeff* data = coeff + posX + posY * m_width; + + rightPixel = belowPixel = 0; + + if (posX > 0) + { + rightPixel = data[-1]; + } + if (posY > 0) + { + belowPixel = data[-(int)m_width]; + } + } + + int deriveModCoeff(int rightPixel, int belowPixel, int absCoeff, int bdpcm = 0) + { + + if (absCoeff == 0) + return 0; + int pred1, absBelow = abs(belowPixel), absRight = abs(rightPixel); + + int absCoeffMod = absCoeff; + + if (bdpcm == 0) + { + pred1 = std::max(absBelow, absRight); + + if (absCoeff == pred1) + { + absCoeffMod = 1; + } + else + { + absCoeffMod = absCoeff < pred1 ? absCoeff + 1 : absCoeff; + } + } + + return(absCoeffMod); } + int decDeriveModCoeff(int rightPixel, int belowPixel, int absCoeff) + { + + if (absCoeff == 0) + return 0; + + int pred1, absBelow = abs(belowPixel), absRight = abs(rightPixel); + pred1 = std::max(absBelow, absRight); + + int absCoeffMod; + + if (absCoeff == 1 && pred1 > 0) + { + absCoeffMod = pred1; + } + else + { + absCoeffMod = absCoeff - (absCoeff <= pred1); + } + return(absCoeffMod); + } + + unsigned templateAbsSumTS( int scanPos, const TCoeff* coeff ) + { + return 1; + } + + int regBinLimit; private: // constant @@ -197,9 +365,7 @@ private: const unsigned m_log2BlockWidth; const unsigned m_log2BlockHeight; const unsigned m_maxNumCoeff; -#if HEVC_USE_SIGN_HIDING const bool m_signHiding; -#endif const bool m_extendedPrecision; const int m_maxLog2TrDynamicRange; CoeffScanType m_scanType; @@ -228,7 +394,15 @@ private: CtxSet m_sigFlagCtxSet[3]; CtxSet m_parFlagCtxSet; CtxSet m_gtxFlagCtxSet[2]; + unsigned m_sigGroupCtxIdTS; + CtxSet m_tsSigFlagCtxSet; + CtxSet m_tsParFlagCtxSet; + CtxSet m_tsGtxFlagCtxSet; + CtxSet m_tsLrg1FlagCtxSet; + CtxSet m_tsSignFlagCtxSet; + int m_remainingContextBins; std::bitset<MLS_GRP_NUM> m_sigCoeffGroupFlag; + const bool m_bdpcm; }; @@ -236,18 +410,31 @@ class CUCtx { public: CUCtx() : isDQPCoded(false), isChromaQpAdjCoded(false), - qgStart(false), - numNonZeroCoeffNonTs(0) {} + qgStart(false) + { + violatesLfnstConstrained[CHANNEL_TYPE_LUMA ] = false; + violatesLfnstConstrained[CHANNEL_TYPE_CHROMA] = false; + lfnstLastScanPos = false; + violatesMtsCoeffConstraint = false; + } CUCtx(int _qp) : isDQPCoded(false), isChromaQpAdjCoded(false), qgStart(false), - numNonZeroCoeffNonTs(0), qp(_qp) {} + qp(_qp) + { + violatesLfnstConstrained[CHANNEL_TYPE_LUMA ] = false; + violatesLfnstConstrained[CHANNEL_TYPE_CHROMA] = false; + lfnstLastScanPos = false; + violatesMtsCoeffConstraint = false; + } ~CUCtx() {} public: bool isDQPCoded; bool isChromaQpAdjCoded; bool qgStart; - uint32_t numNonZeroCoeffNonTs; - int8_t qp; // used as a previous(last) QP and for QP prediction + bool lfnstLastScanPos; + int8_t qp; // used as a previous(last) QP and for QP prediction + bool violatesLfnstConstrained[MAX_NUM_CHANNEL_TYPE]; + bool violatesMtsCoeffConstraint; }; class MergeCtx @@ -257,7 +444,7 @@ public: ~MergeCtx() {} public: MvField mvFieldNeighbours [ MRG_MAX_NUM_CANDS << 1 ]; // double length for mv of both lists - uint8_t GBiIdx [ MRG_MAX_NUM_CANDS ]; + uint8_t BcwIdx [ MRG_MAX_NUM_CANDS ]; unsigned char interDirNeighbours[ MRG_MAX_NUM_CANDS ]; MergeType mrgTypeNeighbours [ MRG_MAX_NUM_CANDS ]; int numValidMergeCand; @@ -267,6 +454,8 @@ public: MotionBuf subPuMvpExtMiBuf; MvField mmvdBaseMv[MMVD_BASE_MV_NUM][2]; void setMmvdMergeCandiInfo(PredictionUnit& pu, int candIdx); + bool mmvdUseAltHpelIf [ MMVD_BASE_MV_NUM ]; + bool useAltHpelIf [ MRG_MAX_NUM_CANDS ]; void setMergeInfo( PredictionUnit& pu, int candIdx ); }; @@ -279,7 +468,7 @@ public: MvField mvFieldNeighbours[AFFINE_MRG_MAX_NUM_CANDS << 1][3]; // double length for mv of both lists unsigned char interDirNeighbours[AFFINE_MRG_MAX_NUM_CANDS]; EAffineModel affineType[AFFINE_MRG_MAX_NUM_CANDS]; - uint8_t GBiIdx[AFFINE_MRG_MAX_NUM_CANDS]; + uint8_t BcwIdx[AFFINE_MRG_MAX_NUM_CANDS]; int numValidMergeCand; int maxNumMergeCand; @@ -291,14 +480,15 @@ public: namespace DeriveCtx { void CtxSplit ( const CodingStructure& cs, Partitioner& partitioner, unsigned& ctxSpl, unsigned& ctxQt, unsigned& ctxHv, unsigned& ctxHorBt, unsigned& ctxVerBt, bool* canSplit = nullptr ); -unsigned CtxQtCbf ( const ComponentID compID, const unsigned trDepth, const bool prevCbCbf = false, const int ispIdx = 0 ); +unsigned CtxModeConsFlag( const CodingStructure& cs, Partitioner& partitioner ); +unsigned CtxQtCbf ( const ComponentID compID, const bool prevCbf = false, const int ispIdx = 0 ); unsigned CtxInterDir ( const PredictionUnit& pu ); unsigned CtxSkipFlag ( const CodingUnit& cu ); -unsigned CtxIMVFlag ( const CodingUnit& cu ); unsigned CtxAffineFlag( const CodingUnit& cu ); -unsigned CtxTriangleFlag( const CodingUnit& cu ); unsigned CtxPredModeFlag( const CodingUnit& cu ); unsigned CtxIBCFlag(const CodingUnit& cu); +unsigned CtxMipFlag ( const CodingUnit& cu ); +unsigned CtxPltCopyFlag( const unsigned prevRunType, const unsigned dist ); } #endif // __CONTEXTMODELLING__ diff --git a/source/Lib/CommonLib/Contexts.cpp b/source/Lib/CommonLib/Contexts.cpp index dedb921d422e4b6c6e0f9be24b40316af664802d..749b8673911c7b3ddbd2404daa1bcc53481c306c 100644 --- a/source/Lib/CommonLib/Contexts.cpp +++ b/source/Lib/CommonLib/Contexts.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -120,24 +120,13 @@ const BinFracBits ProbModelTables::m_binFracBits[256] = { { { 0x28beb, 0x0057e } }, { { 0x2a658, 0x004c0 } }, { { 0x2c531, 0x00403 } }, { { 0x2ea40, 0x00346 } }, { { 0x318a9, 0x0028b } }, { { 0x356cb, 0x001d0 } }, { { 0x3b520, 0x00116 } }, { { 0x48000, 0x0005c } }, }; - -const uint16_t ProbModelTables::m_inistateToCount[128] = { - 614, 647, 681, 718, 756, 797, 839, 884, 932, 982, 1034, 1089, 1148, 1209, 1274, 1342, - 1414, 1490, 1569, 1653, 1742, 1835, 1933, 2037, 2146, 2261, 2382, 2509, 2643, 2785, 2934, 3091, - 3256, 3430, 3614, 3807, 4011, 4225, 4452, 4690, 4941, 5205, 5483, 5777, 6086, 6412, 6755, 7116, - 7497, 7898, 8320, 8766, 9235, 9729, 10249, 10798, 11375, 11984, 12625, 13300, 14012, 14762, 15551, 16384, - 16384, 17216, 18005, 18755, 19467, 20142, 20783, 21392, 21969, 22518, 23038, 23532, 24001, 24447, 24869, 25270, - 25651, 26012, 26355, 26681, 26990, 27284, 27562, 27826, 28077, 28315, 28542, 28756, 28960, 29153, 29337, 29511, - 29676, 29833, 29982, 30124, 30258, 30385, 30506, 30621, 30730, 30834, 30932, 31025, 31114, 31198, 31277, 31353, - 31425, 31493, 31558, 31619, 31678, 31733, 31785, 31835, 31883, 31928, 31970, 32011, 32049, 32086, 32120, 32153 -}; - void BinProbModel_Std::init( int qp, int initId ) { - int slope = ( ( initId >> 4 ) * 5 ) - 45; - int offset = ( ( initId & 15 ) << 3 ) - 16; - int inistate = ( ( slope * qp ) >> 4 ) + offset; - const int p1 = m_inistateToCount[inistate < 0 ? 0 : inistate > 127 ? 127 : inistate]; + int slope = (initId >> 3) - 4; + int offset = ((initId & 7) * 18) + 1; + int inistate = ((slope * (qp - 16)) >> 1) + offset; + int state_clip = inistate < 1 ? 1 : inistate > 127 ? 127 : inistate; + const int p1 = (state_clip << 8); m_state[0] = p1 & MASK_0; m_state[1] = p1 & MASK_1; } @@ -192,592 +181,731 @@ CtxSet ContextSetCfg::addCtxSet( std::initializer_list<std::initializer_list<uin } - -#define CNU 154 // dummy initialization value for unused context models 'Context model Not Used' +#define CNU 35 std::vector<std::vector<uint8_t>> ContextSetCfg::sm_InitTables(NUMBER_OF_SLICE_TYPES + 1); // clang-format off const CtxSet ContextSetCfg::SplitFlag = ContextSetCfg::addCtxSet ({ - // |-------- do split ctx -------------------| - { 122, 124, 141, 108, 125, 156, 138, 126, 143, }, - { 93, 139, 171, 124, 125, 141, 139, 141, 158, }, - { 138, 154, 172, 124, 140, 142, 154, 127, 175, }, - { 9, 13, 8, 8, 13, 12, 5, 10, 12, }, + { 18, 27, 15, 18, 28, 30, 19, 7, 23, }, + { 11, 35, 53, 12, 6, 30, 13, 15, 31, }, + { 19, 28, 38, 27, 29, 38, 28, 38, 31, }, + { 12, 13, 8, 8, 13, 12, 5, 9, 9, }, }); const CtxSet ContextSetCfg::SplitQtFlag = ContextSetCfg::addCtxSet ({ - { 138, 140, 142, 136, 138, 140, }, - { 139, 126, 142, 107, 138, 125, }, - { 139, 125, 127, 136, 153, 126, }, - { 0, 8, 8, 12, 12, 8, }, + { 26, 36, 38, 33, 34, 21, }, + { 20, 14, 23, 18, 19, 6, }, + { 27, 6, 15, 25, 19, 22, }, + { 0, 8, 8, 12, 12, 9, }, }); const CtxSet ContextSetCfg::SplitHvFlag = ContextSetCfg::addCtxSet ({ - { 154, 168, 155, 139, 155, }, - { 169, 168, 170, 153, 170, }, - { 154, 168, 140, 153, 169, }, - { 10, 9, 9, 8, 8, }, + { 43, 42, 37, 35, 44, }, + { 36, 35, 37, 27, 52, }, + { 43, 42, 29, 27, 44, }, + { 9, 8, 9, 8, 8, }, }); const CtxSet ContextSetCfg::Split12Flag = ContextSetCfg::addCtxSet ({ - { 154, 140, 154, 140, }, - { 169, 155, 154, 140, }, - { 154, 170, 154, 170, }, - { 12, 12, 12, 12, }, + { 28, 29, 28, 29, }, + { 43, 37, 21, 22, }, + { 51, 45, 36, 45, }, + { 12, 13, 12, 13, }, +}); + +const CtxSet ContextSetCfg::ModeConsFlag = ContextSetCfg::addCtxSet +({ + { 25, 20, }, + { 25, 12, }, + { CNU, CNU, }, + { 1, 0, }, }); const CtxSet ContextSetCfg::SkipFlag = ContextSetCfg::addCtxSet ({ - { 197, 214, 216, }, - { 197, 198, 185, }, - { 40, 138, 154, }, - { 5, 8, 8, }, + { 57, 60, 53, }, + { 57, 59, 45, }, + { 0, 26, 28, }, + { 5, 4, 8, }, }); const CtxSet ContextSetCfg::MergeFlag = ContextSetCfg::addCtxSet ({ - { 111, }, - { 111, }, - { 153, }, - { 5, }, + { 6, }, + { 6, }, + { 26, }, + { 4, }, +}); + +const CtxSet ContextSetCfg::RegularMergeFlag = ContextSetCfg::addCtxSet +({ + { 46, 15, }, + { 38, 7, }, + { CNU, CNU, }, + { 5, 5, }, }); const CtxSet ContextSetCfg::MergeIdx = ContextSetCfg::addCtxSet ({ - { 138, }, - { 154, }, - { 153, }, - { 8, }, + { 33, }, + { 35, }, + { 34, }, + { 4, }, }); const CtxSet ContextSetCfg::MmvdFlag = ContextSetCfg::addCtxSet ({ - { 120, }, - { 122, }, + { 40, }, + { 26, }, { CNU, }, - { 8, }, + { 4, }, }); const CtxSet ContextSetCfg::MmvdMergeIdx = ContextSetCfg::addCtxSet ({ - { 154, }, - { 154, }, + { 43, }, + { 43, }, { CNU, }, - { 10, }, + { 10, }, }); const CtxSet ContextSetCfg::MmvdStepMvpIdx = ContextSetCfg::addCtxSet ({ - { 213, }, - { 244, }, + { 51, }, + { 60, }, { CNU, }, - { 1, }, -}); - -const CtxSet ContextSetCfg::PartSize = ContextSetCfg::addCtxSet -({ - { CNU, CNU, CNU, CNU,}, - { CNU, CNU, CNU, CNU,}, - { CNU, CNU, CNU, CNU,}, - { DWS, DWS, DWS, DWS, } + { 0, }, }); const CtxSet ContextSetCfg::PredMode = ContextSetCfg::addCtxSet ({ - { 192, 168, }, - { 165, 139, }, + { 40, 35, }, + { 40, 35, }, { CNU, CNU, }, - { 5, 2, }, + { 5, 1, }, }); const CtxSet ContextSetCfg::MultiRefLineIdx = ContextSetCfg::addCtxSet ({ - { 90, 212, CNU, }, - { 118, 212, CNU, }, - { 119, 169, CNU, }, - { 8, 8, DWS, }, + { 25, 58, }, + { 25, 50, }, + { 25, 59, }, + { 6, 8, }, }); const CtxSet ContextSetCfg::IntraLumaMpmFlag = ContextSetCfg::addCtxSet ({ - { 154, }, - { 154, }, - { 170, }, - { 6, }, + { 29, }, + { 36, }, + { 45, }, + { 6, }, +}); + +const CtxSet ContextSetCfg::IntraLumaPlanarFlag = ContextSetCfg::addCtxSet +({ + { 13, 6, }, + { 12, 20, }, + { 13, 28, }, + { 1, 5, }, +}); + +const CtxSet ContextSetCfg::CclmModeFlag = ContextSetCfg::addCtxSet +({ + { 26, }, + { 34, }, + { 59, }, + { 4, }, +}); + +const CtxSet ContextSetCfg::CclmModeIdx = ContextSetCfg::addCtxSet +({ + { 27, }, + { 27, }, + { 27, }, + { 9, }, }); const CtxSet ContextSetCfg::IntraChromaPredMode = ContextSetCfg::addCtxSet ({ - { 137, 139, 140,}, - { 138, 139, 169,}, - { 154, 139, 154,}, - { 5, 8, 9,}, + { 25, }, + { 18, }, + { 34, }, + { 5, }, +}); + +const CtxSet ContextSetCfg::MipFlag = ContextSetCfg::addCtxSet +({ + { 56, 57, 50, 26, }, + { 41, 57, 58, 26, }, + { 33, 49, 42, 25, }, + { 9, 10, 9, 6, }, }); const CtxSet ContextSetCfg::DeltaQP = ContextSetCfg::addCtxSet ({ - { 154, 154, 154,}, - { 154, 154, 154,}, - { 154, 154, 154,}, - { DWS, DWS, DWS, } + { CNU, CNU, }, + { CNU, CNU, }, + { CNU, CNU, }, + { DWS, DWS, }, }); const CtxSet ContextSetCfg::InterDir = ContextSetCfg::addCtxSet ({ - { 111, 125, 110, 94, 192, }, - { 126, 111, 110, 94, 208, }, - { CNU, CNU, CNU, CNU, CNU, }, - { 0, 0, 4, 5, 0, }, + { 14, 6, 5, 4, 3, 40, }, + { 7, 6, 5, 4, 11, 40, }, + { CNU, CNU, CNU, CNU, CNU, CNU, }, + { 0, 0, 1, 4, 1, 0, }, }); const CtxSet ContextSetCfg::RefPic = ContextSetCfg::addCtxSet ({ - { 125, 139, }, - { 138, 168, }, + { 20, 20, }, + { 27, 35, }, { CNU, CNU, }, - { 4, 5, }, + { 0, 4, }, +}); + +const CtxSet ContextSetCfg::SubblockMergeFlag = ContextSetCfg::addCtxSet +({ + { 40, 51, 45, }, + { 48, 57, 44, }, + { CNU, CNU, CNU, }, + { 4, 4, 4, }, }); const CtxSet ContextSetCfg::AffineFlag = ContextSetCfg::addCtxSet ({ - { 179, 169, 171, }, - { 180, 168, 155, }, + { 12, 13, 6, }, + { 12, 13, 6, }, { CNU, CNU, CNU, }, - { 8, 5, 4, }, + { 4, 0, 0, }, }); const CtxSet ContextSetCfg::AffineType = ContextSetCfg::addCtxSet ({ - { 138, }, - { 153, }, + { 35, }, + { 35, }, { CNU, }, - { 4, }, + { 4, }, }); const CtxSet ContextSetCfg::AffMergeIdx = ContextSetCfg::addCtxSet -( { - { 109, }, - { 95, }, +({ + { 4, }, + { 5, }, { CNU, }, - { 0, }, -} ); + { 0, }, +}); -const CtxSet ContextSetCfg::GBiIdx = ContextSetCfg::addCtxSet +const CtxSet ContextSetCfg::BcwIdx = ContextSetCfg::addCtxSet ({ - // 4 ctx for 1st bin; 1 ctx for each of rest bins - { 228, CNU, CNU, CNU, 125, 155, 175, }, - { 242, CNU, CNU, CNU, 154, 170, 237, }, - { CNU, CNU, CNU, CNU, CNU, CNU, CNU, }, - { 4, DWS, DWS, DWS, 4, 0, 0, }, + { 5, }, + { 4, }, + { CNU, }, + { 0, }, }); const CtxSet ContextSetCfg::Mvd = ContextSetCfg::addCtxSet ({ - { 169, 183, }, - { 155, 154, }, - { 141, 156, }, - { 9, 5, }, + { 51, 58, }, + { 44, 43, }, + { 14, 45, }, + { 9, 5, }, +}); + +const CtxSet ContextSetCfg::BDPCMMode = ContextSetCfg::addCtxSet +({ + { 19, 28, }, + { 40, 36, }, + { 19, 35, }, + { 4, 4, }, }); const CtxSet ContextSetCfg::QtRootCbf = ContextSetCfg::addCtxSet ({ - { 109, }, - { 95, }, - { 110, }, - { 4, }, + { 12, }, + { 5, }, + { 6, }, + { 4, }, }); +const CtxSet ContextSetCfg::ACTFlag = ContextSetCfg::addCtxSet +({ + { CNU, }, + { CNU, }, + { CNU, }, + { DWS, }, + }); + const CtxSet ContextSetCfg::QtCbf[] = { ContextSetCfg::addCtxSet ({ - { 141, 127, 139, 140, }, - { 142, 127, 139, 140, }, - { CNU, 111, 124, 111, }, - { 1, 5, 9, 8, }, + { 15, 13, 5, 14, }, + { 23, 4, 20, 7, }, + { 7, 19, 5, 7, }, + { 5, 1, 8, 9, }, }), ContextSetCfg::addCtxSet ({ - { 163, 154, CNU, CNU, CNU, }, - { 164, 154, CNU, CNU, CNU, }, - { 109, CNU, CNU, CNU, CNU, }, - { 5, 8, DWS, DWS, DWS, }, + { 25, CNU, }, + { 25, CNU, }, + { 4, CNU, }, + { 5, DWS, }, }), ContextSetCfg::addCtxSet ({ - { 161, 154, }, - { 192, 154, }, - { 151, 155, }, - { 5, 5, }, - }), + { 9, 44, CNU, }, + { 25, 29, CNU, }, + { 33, 28, CNU, }, + { 2, 1, DWS, }, + }) }; const CtxSet ContextSetCfg::SigCoeffGroup[] = { ContextSetCfg::addCtxSet ({ - { 105, 155, }, - { 106, 156, }, - { 107, 158, }, - { 8, 5, }, + { 25, 45, }, + { 25, 30, }, + { 18, 31, }, + { 8, 5, }, }), ContextSetCfg::addCtxSet ({ - { 91, 155, }, - { 90, 141, }, - { 76, 127, }, - { 5, 8, }, - }), - ContextSetCfg::addCtxSet - ({ - { CNU, CNU, }, - { CNU, CNU, }, - { CNU, CNU, }, - { DWS, DWS, } - }), - ContextSetCfg::addCtxSet - ({ - { CNU, CNU, }, - { CNU, CNU, }, - { CNU, CNU, }, - { DWS, DWS, } - }), + { 25, 45, }, + { 25, 52, }, + { 25, 7, }, + { 5, 8, }, + }) }; const CtxSet ContextSetCfg::SigFlag[] = { ContextSetCfg::addCtxSet ({ - { 88, 166, 152, 182, 168, 154, 0, 167, 182, 168, 183, 155, 193, 213, 183, 183, 169, 185, }, - { 132, 152, 167, 168, 183, 140, 177, 182, 168, 154, 169, 155, 180, 213, 183, 169, 184, 156, }, - { 89, 138, 153, 139, 154, 140, 134, 139, 139, 140, 140, 141, 137, 170, 169, 170, 141, 157, }, - { 12, 9, 9, 9, 9, 10, 9, 9, 9, 9, 9, 9, 8, 8, 8, 8, 8, 9, }, + { 17, 41, 49, 36, 1, 49, 50, 37, 48, 51, 58, 45, }, + { 17, 41, 42, 29, 25, 49, 43, 37, 33, 51, 51, 30, }, + { 25, 19, 28, 14, 25, 20, 29, 30, 19, 37, 30, 38, }, + { 12, 9, 9, 10, 9, 9, 9, 10, 8, 8, 8, 10, }, }), ContextSetCfg::addCtxSet ({ - { 72, 167, 153, 168, 154, 155, 180, 199, 183, 199, 199, 186, }, - { 133, 138, 153, 139, 154, 140, 181, 229, 169, 229, 170, 157, }, - { 43, 153, 168, 169, 154, 155, 152, 215, 155, 201, 171, 143, }, - { 9, 9, 12, 9, 13, 13, 5, 5, 8, 8, 8, 9, }, + { 9, 49, 42, 21, 48, 59, 59, 53, }, + { 17, 19, 20, 29, 41, 59, 60, 38, }, + { 25, 27, 28, 37, 49, 53, 53, 46, }, + { 9, 9, 9, 13, 4, 5, 8, 9, }, }), ContextSetCfg::addCtxSet ({ - { 152, 156, 201, 186, 186, 187, 182, 248, 188, 232, 188, 205, 182, 223, 223, 223, 223, 223, }, - { 123, 142, 157, 172, 172, 218, 138, 249, 248, 248, 219, 223, 139, 223, 223, 223, 223, 223, }, - { 93, 142, 157, 143, 188, 175, 138, 238, 205, 238, 253, 237, 139, 223, 223, 223, 223, 253, }, - { 9, 12, 8, 8, 8, 8, 8, 8, 8, 8, 8, 5, 8, 0, 0, 0, 0, 0, }, + { 26, 45, 53, 46, 49, 54, 61, 39, 42, 39, 39, 39, }, + { 19, 38, 38, 46, 34, 54, 54, 39, 6, 39, 39, 39, }, + { 11, 38, 46, 54, 27, 39, 39, 39, 36, 39, 39, 39, }, + { 9, 13, 8, 8, 8, 8, 8, 5, 8, 0, 0, 0, }, }), ContextSetCfg::addCtxSet ({ - { 182, 171, 143, 158, 172, 189, 183, 223, 223, 223, 223, 223, }, - { 168, 156, 173, 216, 172, 219, 169, 223, 223, 223, 223, 223, }, - { 152, 173, 157, 187, 204, 253, 170, 223, 223, 223, 223, 223, }, - { 8, 9, 12, 8, 8, 8, 4, 0, 2, 2, 2, 2, }, + { 34, 45, 38, 31, 58, 39, 39, 39, }, + { 35, 45, 53, 54, 44, 39, 39, 39, }, + { 19, 46, 38, 39, 52, 39, 39, 39, }, + { 8, 12, 8, 8, 4, 0, 0, 0, }, }), ContextSetCfg::addCtxSet ({ - { 123, 173, 223, 191, 232, 251, 212, 223, 223, 236, 206, 223, 192, 223, 223, 223, 223, 223, }, - { 123, 175, 223, 175, 218, 223, 138, 223, 223, 223, 222, 223, 196, 223, 223, 223, 223, 223, }, - { 107, 174, 223, 238, 251, 223, 63, 223, 223, 238, 223, 238, 12, 223, 223, 223, 223, 223, }, - { 8, 8, 4, 8, 8, 8, 8, 0, 0, 4, 8, 5, 4, 2, 2, 2, 2, 1, }, + { 19, 54, 39, 39, 50, 39, 39, 39, 0, 39, 39, 39, }, + { 19, 39, 54, 39, 19, 39, 39, 39, 56, 39, 39, 39, }, + { 18, 39, 39, 39, 19, 39, 39, 39, 0, 39, 39, 39, }, + { 8, 8, 8, 8, 8, 0, 4, 4, 0, 0, 0, 0, }, }), ContextSetCfg::addCtxSet ({ - { 167, 201, 223, 248, 219, 223, 181, 223, 223, 223, 223, 223, }, - { 167, 171, 223, 175, 248, 223, 152, 223, 223, 223, 223, 223, }, - { 166, 234, 223, 236, 248, 223, 108, 223, 223, 223, 223, 223, }, - { 8, 8, 5, 8, 8, 8, 5, 1, 2, 2, 2, 2, }, - }), + { 34, 38, 54, 39, 41, 39, 39, 39, }, + { 34, 38, 62, 39, 26, 39, 39, 39, }, + { 26, 39, 39, 39, 19, 39, 39, 39, }, + { 8, 8, 8, 8, 4, 0, 0, 0, }, + }) }; - const CtxSet ContextSetCfg::ParFlag[] = { ContextSetCfg::addCtxSet ({ - { 121, 105, 136, 152, 138, 183, 90, 122, 167, 153, 168, 135, 152, 153, 168, 139, 151, 153, 139, 168, 154, }, - { 121, 119, 136, 137, 138, 153, 104, 122, 138, 153, 139, 106, 138, 153, 168, 139, 137, 153, 168, 139, 139, }, - { 121, 135, 137, 152, 138, 153, 91, 137, 138, 153, 139, 151, 138, 153, 139, 139, 138, 168, 139, 154, 139, }, - { 8, 9, 12, 13, 13, 13, 10, 13, 13, 13, 13, 13, 13, 13, 13, 13, 10, 13, 13, 13, 13, }, + { 33, 40, 25, 41, 26, 42, 25, 33, 26, 34, 27, 25, 41, 42, 42, 35, 33, 27, 35, 42, 43, }, + { 18, 17, 33, 18, 34, 42, 25, 33, 26, 42, 27, 25, 34, 42, 42, 35, 26, 27, 42, 20, 20, }, + { 33, 25, 18, 26, 34, 27, 25, 26, 19, 42, 35, 33, 19, 27, 35, 35, 34, 42, 20, 43, 20, }, + { 8, 9, 12, 13, 13, 13, 10, 13, 13, 13, 13, 13, 13, 13, 13, 13, 10, 13, 13, 13, 13, }, }), ContextSetCfg::addCtxSet ({ - { 151, 120, 152, 138, 153, 153, 136, 168, 154, 168, 154, }, - { 135, 120, 137, 138, 138, 153, 136, 153, 168, 139, 154, }, - { 136, 135, 152, 153, 138, 153, 136, 168, 154, 139, 154, }, - { 8, 10, 12, 12, 13, 13, 10, 10, 13, 13, 13, }, - }), + { 33, 25, 26, 19, 19, 27, 33, 42, 43, 35, 43, }, + { 25, 25, 26, 11, 19, 27, 33, 42, 50, 20, 43, }, + { 33, 25, 26, 42, 19, 27, 26, 50, 35, 20, 43, }, + { 9, 13, 12, 12, 13, 13, 13, 13, 13, 13, 13, }, + }) }; const CtxSet ContextSetCfg::GtxFlag[] = { ContextSetCfg::addCtxSet ({ - { 31, 73, 118, 75, 152, 109, 42, 44, 105, 107, 109, 0, 119, 136, 152, 124, 118, 136, 138, 153, 140, }, - { 14, 116, 86, 119, 106, 152, 0, 72, 120, 151, 138, 116, 90, 107, 152, 153, 104, 107, 123, 153, 154, }, - { 90, 72, 119, 135, 137, 138, 43, 60, 106, 137, 109, 58, 106, 108, 109, 124, 121, 138, 139, 154, 155, }, - { 4, 1, 8, 8, 4, 2, 5, 9, 9, 8, 9, 9, 9, 9, 8, 9, 9, 8, 9, 8, 8, }, + { 25, 0, 0, 17, 25, 18, 0, 9, 25, 33, 19, 0, 25, 33, 26, 20, 25, 33, 34, 35, 29, }, + { 17, 0, 1, 17, 25, 18, 0, 9, 25, 33, 34, 9, 25, 18, 26, 20, 25, 18, 19, 27, 21, }, + { 25, 1, 40, 25, 33, 11, 17, 25, 25, 18, 4, 17, 33, 26, 19, 5, 33, 19, 20, 28, 22, }, + { 1, 5, 9, 9, 9, 6, 5, 9, 10, 10, 9, 9, 9, 9, 9, 9, 6, 8, 9, 9, 9, }, }), ContextSetCfg::addCtxSet ({ - { 119, 101, 134, 151, 107, 123, 118, 122, 124, 140, 155, }, - { 117, 0, 90, 106, 92, 93, 147, 136, 138, 154, 140, }, - { 194, 40, 120, 122, 122, 138, 103, 121, 153, 154, 155, }, - { 2, 5, 8, 8, 8, 6, 6, 8, 8, 8, 7, }, + { 25, 1, 40, 33, 26, 4, 25, 33, 27, 36, 37, }, + { 17, 9, 25, 10, 3, 4, 17, 33, 19, 28, 29, }, + { 48, 9, 25, 18, 26, 35, 25, 26, 35, 28, 37, }, + { 1, 5, 8, 8, 8, 6, 6, 9, 8, 8, 10, }, }), ContextSetCfg::addCtxSet ({ - { 43, 177, 181, 168, 154, 170, 133, 167, 139, 154, 155, 164, 153, 154, 169, 155, 181, 183, 169, 185, 186, }, - { 101, 133, 137, 153, 139, 140, 134, 138, 139, 154, 155, 136, 153, 154, 140, 170, 138, 154, 155, 170, 186, }, - { 134, 120, 123, 153, 139, 140, 92, 124, 154, 125, 111, 138, 154, 140, 155, 141, 154, 140, 185, 171, 143, }, - { 8, 5, 9, 9, 12, 9, 9, 10, 13, 12, 10, 9, 10, 10, 10, 10, 8, 9, 8, 8, 10, }, + { 0, 0, 33, 34, 35, 36, 25, 34, 35, 28, 29, 40, 42, 43, 36, 30, 56, 43, 44, 45, 38, }, + { 0, 17, 26, 19, 20, 21, 25, 34, 20, 28, 29, 33, 27, 28, 29, 22, 34, 28, 44, 37, 38, }, + { 25, 25, 11, 27, 20, 21, 18, 12, 28, 21, 22, 34, 28, 29, 29, 30, 28, 29, 45, 30, 23, }, + { 9, 5, 10, 13, 13, 10, 9, 10, 13, 13, 13, 9, 10, 10, 10, 13, 8, 9, 9, 10, 13, }, }), ContextSetCfg::addCtxSet ({ - { 0, 178, 153, 154, 140, 140, 196, 170, 186, 157, 188, }, - { 0, 135, 153, 139, 125, 140, 182, 155, 156, 142, 159, }, - { 163, 136, 153, 154, 125, 140, 183, 170, 201, 187, 174, }, - { 6, 9, 10, 12, 12, 10, 5, 9, 8, 8, 9, }, - }), + { 0, 40, 42, 20, 21, 29, 57, 52, 53, 38, 46, }, + { 0, 25, 27, 20, 13, 6, 57, 52, 30, 38, 31, }, + { 40, 33, 27, 28, 21, 37, 51, 37, 53, 38, 46, }, + { 9, 9, 10, 12, 12, 10, 5, 9, 9, 9, 12, }, + }) }; const CtxSet ContextSetCfg::LastX[] = { ContextSetCfg::addCtxSet ({ - { 111, 111, 110, 111, 111, 139, 111, 126, 111, 139, 126, 126, 111, 111, 169, 154, 111, 110, 110, 139, CNU, CNU, CNU, CNU, CNU, }, - { 125, 110, 109, 125, 125, 123, 111, 111, 95, 123, 126, 111, 110, 95, 169, 154, 140, 139, 139, 138, CNU, CNU, CNU, CNU, CNU, }, - { 125, 140, 124, 111, 111, 109, 111, 126, 125, 123, 111, 141, 111, 125, 79, 155, 142, 170, 140, 183, CNU, CNU, CNU, CNU, CNU, }, - { 8, 5, 5, 5, 4, 4, 5, 4, 4, 0, 5, 1, 0, 0, 0, 1, 1, 0, 0, 0, DWS, DWS, DWS, DWS, DWS, }, + { 14, 6, 5, 7, 14, 4, 7, 7, 6, 12, 29, 7, 6, 6, 20, 28, 7, 13, 13, 20, }, + { 6, 13, 12, 6, 6, 4, 14, 14, 5, 12, 29, 14, 13, 5, 36, 28, 14, 13, 20, 19, }, + { 13, 5, 4, 6, 14, 4, 6, 14, 21, 11, 14, 7, 14, 13, 11, 21, 37, 37, 21, 50, }, + { 8, 5, 4, 5, 4, 4, 5, 4, 1, 0, 4, 1, 0, 0, 0, 1, 1, 0, 0, 0, }, }), ContextSetCfg::addCtxSet ({ - { 122, 124, 63, CNU, }, - { 138, 123, 92, CNU, }, - { 138, 108, 47, CNU, }, - { 2, 1, 1, DWS, }, - }), + { 11, 5, 3, }, + { 12, 4, 18, }, + { 12, 4, 3, }, + { 2, 1, 1, }, + }) }; const CtxSet ContextSetCfg::LastY[] = { ContextSetCfg::addCtxSet ({ - { 125, 125, 139, 125, 111, 139, 111, 111, 110, 110, 140, 126, 125, 125, 140, 139, 111, 110, 124, 181, CNU, CNU, CNU, CNU, CNU, }, - { 95, 95, 109, 110, 110, 108, 125, 111, 124, 123, 140, 111, 110, 124, 139, 125, 126, 110, 124, 182, CNU, CNU, CNU, CNU, CNU, }, - { 110, 110, 109, 125, 111, 123, 111, 126, 95, 108, 111, 127, 111, 95, 78, 169, 157, 141, 125, 138, CNU, CNU, CNU, CNU, CNU, }, - { 8, 5, 8, 5, 5, 4, 5, 5, 4, 0, 5, 5, 1, 0, 0, 1, 4, 1, 0, 0, DWS, DWS, DWS, DWS, DWS, }, + { 13, 5, 5, 6, 6, 12, 14, 6, 5, 5, 14, 7, 5, 12, 21, 13, 7, 13, 12, 41, }, + { 5, 5, 12, 6, 6, 19, 6, 14, 5, 19, 29, 7, 13, 5, 36, 21, 7, 13, 5, 27, }, + { 13, 5, 4, 6, 6, 11, 14, 14, 5, 11, 14, 22, 14, 12, 3, 21, 37, 52, 28, 34, }, + { 8, 5, 8, 5, 5, 4, 5, 5, 4, 0, 5, 5, 1, 0, 0, 1, 4, 0, 0, 0, }, }), ContextSetCfg::addCtxSet ({ - { 122, 124, 123, CNU, }, - { 108, 123, 121, CNU, }, - { 123, 123, 91, CNU, }, - { 2, 2, 2, DWS, }, - }), + { 11, 5, 19, }, + { 11, 4, 18, }, + { 12, 4, 3, }, + { 6, 2, 2, }, + }) }; - const CtxSet ContextSetCfg::MVPIdx = ContextSetCfg::addCtxSet ({ - { 153, }, - { 168, }, - { 168, }, - { 10, }, + { 34, }, + { 34, }, + { 42, }, + { 12, }, }); const CtxSet ContextSetCfg::SmvdFlag = ContextSetCfg::addCtxSet -( { - { 154, }, - { 125, }, +({ + { 50, }, + { 28, }, { CNU, }, - { 8, }, -} ); + { 5, }, +}); const CtxSet ContextSetCfg::SaoMergeFlag = ContextSetCfg::addCtxSet ({ - { 47, }, - { 244, }, - { 199, }, - { 0, }, + { 10, }, + { 60, }, + { 52, }, + { 0, }, }); const CtxSet ContextSetCfg::SaoTypeIdx = ContextSetCfg::addCtxSet ({ - { 47, }, - { 95, }, - { 95, }, - { 0, }, + { 10, }, + { 5, }, + { 5, }, + { 0, }, }); +const CtxSet ContextSetCfg::LFNSTIdx = ContextSetCfg::addCtxSet +({ + { 52, 37, 33, }, + { 45, 45, 18, }, + { CNU, 52, 33, }, + { 9, 9, 5, }, +}); -const CtxSet ContextSetCfg::TransquantBypassFlag = ContextSetCfg::addCtxSet +const CtxSet ContextSetCfg::PLTFlag = ContextSetCfg::addCtxSet ({ - { 154,}, - { 154,}, - { 154,}, - { DWS, } + { CNU, }, + { CNU, }, + { CNU, }, + { DWS, }, +}); + +const CtxSet ContextSetCfg::RotationFlag = ContextSetCfg::addCtxSet +({ + { CNU, }, + { CNU, }, + { CNU, }, + { DWS, }, +}); + +const CtxSet ContextSetCfg::RunTypeFlag = ContextSetCfg::addCtxSet +({ + { CNU, }, + { CNU, }, + { CNU, }, + { DWS, }, +}); + +const CtxSet ContextSetCfg::IdxRunModel = ContextSetCfg::addCtxSet +({ + { CNU, CNU, CNU, CNU, CNU, }, + { CNU, CNU, CNU, CNU, CNU, }, + { CNU, CNU, CNU, CNU, CNU, }, + { DWS, DWS, DWS, DWS, DWS, }, +}); + +const CtxSet ContextSetCfg::CopyRunModel = ContextSetCfg::addCtxSet +({ + { CNU, CNU, CNU, }, + { CNU, CNU, CNU, }, + { CNU, CNU, CNU, }, + { DWS, DWS, DWS, }, }); const CtxSet ContextSetCfg::RdpcmFlag = ContextSetCfg::addCtxSet ({ - { 139, 139,}, - { 139, 139,}, - { CNU, CNU,}, - { DWS, DWS, } + { CNU, CNU, }, + { CNU, CNU, }, + { CNU, CNU, }, + { DWS, DWS, }, }); const CtxSet ContextSetCfg::RdpcmDir = ContextSetCfg::addCtxSet ({ - { 139, 139,}, - { 139, 139,}, - { CNU, CNU,}, - { DWS, DWS, } + { CNU, CNU, }, + { CNU, CNU, }, + { CNU, CNU, }, + { DWS, DWS, }, +}); + +const CtxSet ContextSetCfg::TransformSkipFlag = ContextSetCfg::addCtxSet +({ + { 25, 17, }, + { 25, 17, }, + { 25, 1, }, + { 1, 1, }, }); -const CtxSet ContextSetCfg::MTSIndex = ContextSetCfg::addCtxSet +const CtxSet ContextSetCfg::MTSIdx = ContextSetCfg::addCtxSet ({ - { CNU, 155, 155, 140, 140, CNU, 216, 153, 153, 0, CNU, }, - { CNU, 155, 155, 140, 140, CNU, 233, 167, 153, 0, CNU, }, - { CNU, CNU, 140, 140, 140, CNU, 219, 138, 153, 0, CNU, }, - { DWS, 8, 8, 8, 8, DWS, 4, 8, 9, 3, DWS, }, + { 37, 25, 27, 0, }, + { 30, 40, 27, 0, }, + { 13, 0, 35, 0, }, + { 8, 0, 9, 0, }, }); const CtxSet ContextSetCfg::ISPMode = ContextSetCfg::addCtxSet ({ - { 152, 154, }, - { 166, 154, }, - { 152, 154, }, - { 8, 5, }, + { 33, 43, }, + { 33, 36, }, + { 33, 43, }, + { 9, 2, }, }); const CtxSet ContextSetCfg::SbtFlag = ContextSetCfg::addCtxSet -( { - { 168, 183, }, - { 197, 183, }, +({ + { 49, 50, }, + { 49, 50, }, { CNU, CNU, }, - { 4, 8, }, -} ); + { 1, 5, }, +}); const CtxSet ContextSetCfg::SbtQuadFlag = ContextSetCfg::addCtxSet -( { - { 168, }, - { 168, }, +({ + { 42, }, + { 42, }, { CNU, }, - { 9, }, -} ); + { 10, }, +}); const CtxSet ContextSetCfg::SbtHorFlag = ContextSetCfg::addCtxSet -( { - { 139, 154, 139, }, - { 139, 154, 139, }, +({ + { 35, 51, 27, }, + { 20, 43, 12, }, { CNU, CNU, CNU, }, - { 8, 5, 4, }, -} ); + { 8, 4, 1, }, +}); const CtxSet ContextSetCfg::SbtPosFlag = ContextSetCfg::addCtxSet -( { - { 154, }, - { 154, }, +({ + { 28, }, + { 28, }, { CNU, }, - { 13, }, -} ); + { 13, }, +}); const CtxSet ContextSetCfg::CrossCompPred = ContextSetCfg::addCtxSet ({ - { 154, 154, 154, 154, 154, 154, 154, 154, 154, 154,}, - { 154, 154, 154, 154, 154, 154, 154, 154, 154, 154,}, - { 154, 154, 154, 154, 154, 154, 154, 154, 154, 154,}, - { DWS, DWS, DWS, DWS, DWS, DWS, DWS, DWS, DWS, DWS, } + { CNU, CNU, CNU, CNU, CNU, CNU, CNU, CNU, CNU, CNU, }, + { CNU, CNU, CNU, CNU, CNU, CNU, CNU, CNU, CNU, CNU, }, + { CNU, CNU, CNU, CNU, CNU, CNU, CNU, CNU, CNU, CNU, }, + { DWS, DWS, DWS, DWS, DWS, DWS, DWS, DWS, DWS, DWS, }, }); const CtxSet ContextSetCfg::ChromaQpAdjFlag = ContextSetCfg::addCtxSet ({ - { 154,}, - { 154,}, - { 154,}, - { DWS, } + { CNU, }, + { CNU, }, + { CNU, }, + { DWS, }, }); const CtxSet ContextSetCfg::ChromaQpAdjIdc = ContextSetCfg::addCtxSet ({ - { 154,}, - { 154,}, - { 154,}, - { DWS, } + { CNU, }, + { CNU, }, + { CNU, }, + { DWS, }, }); const CtxSet ContextSetCfg::ImvFlag = ContextSetCfg::addCtxSet ({ - { 212, 199, 215, 180, 183, 242, }, - { 213, 229, 244, 166, 198, 244, }, - { CNU, CNU, CNU, 152, CNU, CNU, }, - { 1, 4, 4, 5, 1, 0, }, + { 51, 33, 50, 60, 45, }, + { 59, 48, 58, 60, 60, }, + { CNU, 34, CNU, CNU, CNU, }, + { 0, 5, 1, 0, 4, }, }); -const CtxSet ContextSetCfg::ctbAlfFlag = -{ - ContextSetCfg::addCtxSet - ( { - { 154, 186, 174, 183, 233, 250, 168, 248, 250, }, - { 139, 186, 203, 183, 247, 249, 183, 232, 249, }, - { 219, 236, 238, 232, 249, 235, 246, 234, 251, }, - { 0, 0, 4, 0, 0, 1, 0, 0, 1, }, - } ) -}; - -const CtxSet ContextSetCfg::MHIntraFlag = ContextSetCfg::addCtxSet +const CtxSet ContextSetCfg::ctbAlfFlag = ContextSetCfg::addCtxSet ({ - { 225, }, - { 197, }, - { CNU, }, - { 1, }, + { 26, 52, 46, 18, 61, 54, 18, 61, 54, }, + { 6, 23, 46, 12, 61, 54, 5, 46, 54, }, + { 39, 39, 39, 62, 39, 39, 31, 39, 39, }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, }, }); -const CtxSet ContextSetCfg::MHIntraPredMode = ContextSetCfg::addCtxSet +const CtxSet ContextSetCfg::ctbAlfAlternative = ContextSetCfg::addCtxSet ({ - { 156, CNU, CNU, CNU, }, - { 156, CNU, CNU, CNU, }, - { CNU, CNU, CNU, CNU, }, - { 9, DWS, DWS, DWS, }, + { 11, 11, }, + { 20, 12, }, + { 28, 28, }, + { 0, 0, }, }); -const CtxSet ContextSetCfg::TriangleFlag = ContextSetCfg::addCtxSet + +const CtxSet ContextSetCfg::AlfUseTemporalFilt = ContextSetCfg::addCtxSet ({ - { 149, 123, 123, }, - { 151, 152, 138, }, - { CNU, CNU, CNU, }, - { 8, 12, 9, }, + { 46, }, + { 53, }, + { 46, }, + { 0, }, }); -const CtxSet ContextSetCfg::TriangleIdx = ContextSetCfg::addCtxSet +const CtxSet ContextSetCfg::CiipFlag = ContextSetCfg::addCtxSet ({ + { 50, }, + { 50, }, { CNU, }, - { CNU, }, - { CNU, }, - { DWS, }, + { 1, }, }); -// clang-format on const CtxSet ContextSetCfg::IBCFlag = ContextSetCfg::addCtxSet ({ - { 0, 154, 141, }, - { 0, 153, 140, }, - { 132, 153, 125, }, - { 5, 5, 8, }, + { 0, 43, 45, }, + { 0, 42, 37, }, + { 17, 42, 36, }, + { 1, 5, 8, }, }); +const CtxSet ContextSetCfg::JointCbCrFlag = ContextSetCfg::addCtxSet +({ + { 43, 51, 45, }, + { 35, 44, 45, }, + { 35, 29, 51, }, + { 1, 1, 0, }, +}); + +const CtxSet ContextSetCfg::TsSigCoeffGroup = ContextSetCfg::addCtxSet +({ + { 18, 35, 37, }, + { 18, 12, 29, }, + { 18, 20, 38, }, + { 5, 8, 8, }, +}); + +const CtxSet ContextSetCfg::TsSigFlag = ContextSetCfg::addCtxSet +({ + { 25, 50, 37, }, + { 40, 35, 44, }, + { 25, 28, 38, }, + { 13, 13, 8, }, +}); + +const CtxSet ContextSetCfg::TsParFlag = ContextSetCfg::addCtxSet +({ + { 11, }, + { 3, }, + { 11, }, + { 6, }, +}); + +const CtxSet ContextSetCfg::TsGtxFlag = ContextSetCfg::addCtxSet +({ + { CNU, 10, 4, 4, 5, }, + { CNU, 2, 3, 3, 11, }, + { CNU, 10, 3, 3, 3, }, + { DWS, 1, 1, 1, 1, }, +}); + +const CtxSet ContextSetCfg::TsLrg1Flag = ContextSetCfg::addCtxSet +({ + { 19, 11, 4, 6, }, + { 18, 11, 4, 28, }, + { 11, 5, 5, 6, }, + { 4, 2, 1, 6, }, +}); + +const CtxSet ContextSetCfg::TsResidualSign = ContextSetCfg::addCtxSet +({ + { 28, 25, 53, 28, 33, 30, }, + { 5, 10, 53, 35, 25, 53, }, + { 20, 17, 46, 20, 25, 46, }, + { 1, 4, 4, 8, 8, 8, }, +}); +// clang-format on + const unsigned ContextSetCfg::NumberOfContexts = (unsigned)ContextSetCfg::sm_InitTables[0].size(); // combined sets +const CtxSet ContextSetCfg::Palette = { ContextSetCfg::RotationFlag, ContextSetCfg::RunTypeFlag, ContextSetCfg::IdxRunModel, ContextSetCfg::CopyRunModel }; const CtxSet ContextSetCfg::Sao = { ContextSetCfg::SaoMergeFlag, ContextSetCfg::SaoTypeIdx }; - +const CtxSet ContextSetCfg::Alf = { ContextSetCfg::ctbAlfFlag, ContextSetCfg::ctbAlfAlternative, ContextSetCfg::AlfUseTemporalFilt }; template <class BinProbModel> CtxStore<BinProbModel>::CtxStore() diff --git a/source/Lib/CommonLib/Contexts.h b/source/Lib/CommonLib/Contexts.h index e52842dead764732f268c30aadecc35b3fe2b18d..e831206ce0d8cb2ab264fcfc706bff726236383e 100644 --- a/source/Lib/CommonLib/Contexts.h +++ b/source/Lib/CommonLib/Contexts.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -67,7 +67,6 @@ class ProbModelTables { protected: static const BinFracBits m_binFracBits[256]; - static const uint16_t m_inistateToCount[128]; static const uint8_t m_RenormTable_32 [ 32]; // Std MP MPI }; @@ -199,37 +198,57 @@ public: static const CtxSet SplitQtFlag; static const CtxSet SplitHvFlag; static const CtxSet Split12Flag; + static const CtxSet ModeConsFlag; static const CtxSet SkipFlag; static const CtxSet MergeFlag; + static const CtxSet RegularMergeFlag; static const CtxSet MergeIdx; - static const CtxSet PartSize; static const CtxSet PredMode; static const CtxSet MultiRefLineIdx; static const CtxSet IntraLumaMpmFlag; + static const CtxSet IntraLumaPlanarFlag; + static const CtxSet CclmModeFlag; + static const CtxSet CclmModeIdx; static const CtxSet IntraChromaPredMode; + static const CtxSet MipFlag; static const CtxSet DeltaQP; static const CtxSet InterDir; static const CtxSet RefPic; static const CtxSet MmvdFlag; static const CtxSet MmvdMergeIdx; static const CtxSet MmvdStepMvpIdx; + static const CtxSet SubblockMergeFlag; static const CtxSet AffineFlag; static const CtxSet AffineType; static const CtxSet AffMergeIdx; static const CtxSet Mvd; + static const CtxSet BDPCMMode; static const CtxSet QtRootCbf; + static const CtxSet ACTFlag; static const CtxSet QtCbf [3]; // [ channel ] - static const CtxSet SigCoeffGroup [4]; // [ ChannelType ] + static const CtxSet SigCoeffGroup [2]; // [ ChannelType ] static const CtxSet LastX [2]; // [ ChannelType ] static const CtxSet LastY [2]; // [ ChannelType ] static const CtxSet SigFlag [6]; // [ ChannelType + State ] static const CtxSet ParFlag [2]; // [ ChannelType ] static const CtxSet GtxFlag [4]; // [ ChannelType + x ] + static const CtxSet TsSigCoeffGroup; + static const CtxSet TsSigFlag; + static const CtxSet TsParFlag; + static const CtxSet TsGtxFlag; + static const CtxSet TsLrg1Flag; + static const CtxSet TsResidualSign; static const CtxSet MVPIdx; static const CtxSet SaoMergeFlag; static const CtxSet SaoTypeIdx; - static const CtxSet MTSIndex; - static const CtxSet TransquantBypassFlag; + static const CtxSet TransformSkipFlag; + static const CtxSet MTSIdx; + static const CtxSet LFNSTIdx; + static const CtxSet PLTFlag; + static const CtxSet RotationFlag; + static const CtxSet RunTypeFlag; + static const CtxSet IdxRunModel; + static const CtxSet CopyRunModel; static const CtxSet RdpcmFlag; static const CtxSet RdpcmDir; static const CtxSet SbtFlag; @@ -240,21 +259,23 @@ public: static const CtxSet ChromaQpAdjFlag; static const CtxSet ChromaQpAdjIdc; static const CtxSet ImvFlag; - static const CtxSet GBiIdx; + static const CtxSet BcwIdx; static const CtxSet ctbAlfFlag; - static const CtxSet MHIntraFlag; - static const CtxSet MHIntraPredMode; - static const CtxSet TriangleFlag; - static const CtxSet TriangleIdx; + static const CtxSet ctbAlfAlternative; + static const CtxSet AlfUseTemporalFilt; + static const CtxSet CiipFlag; static const CtxSet SmvdFlag; static const CtxSet IBCFlag; static const CtxSet ISPMode; + static const CtxSet JointCbCrFlag; static const unsigned NumberOfContexts; // combined sets for less complex copying // NOTE: The contained CtxSet's should directly follow each other in the initalization list; // otherwise, you will copy more elements than you want !!! static const CtxSet Sao; + static const CtxSet Alf; + static const CtxSet Palette; public: static const std::vector<uint8_t>& getInitTable( unsigned initId ); @@ -419,7 +440,7 @@ private: CtxStore<BinProbModel_Std> m_CtxStore_Std; protected: unsigned m_GRAdaptStats[RExt__GOLOMB_RICE_ADAPTATION_STATISTICS_SETS]; -#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM +#if ENABLE_SPLIT_PARALLELISM public: int64_t cacheId; diff --git a/source/Lib/CommonLib/CrossCompPrediction.cpp b/source/Lib/CommonLib/CrossCompPrediction.cpp index ea637e7befec8514e47051d0cbe75e09baceb419..95d99cfc3c2fadfff5c0edc769090c708ddd0397 100644 --- a/source/Lib/CommonLib/CrossCompPrediction.cpp +++ b/source/Lib/CommonLib/CrossCompPrediction.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/source/Lib/CommonLib/CrossCompPrediction.h b/source/Lib/CommonLib/CrossCompPrediction.h index 5e35a662a6769747827c2a01469d3897c773b16b..f4bb776bd363db942a802d6bdb59c12d87813750 100644 --- a/source/Lib/CommonLib/CrossCompPrediction.h +++ b/source/Lib/CommonLib/CrossCompPrediction.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/source/Lib/CommonLib/DepQuant.cpp b/source/Lib/CommonLib/DepQuant.cpp index 8d46daf033252a13f098cbfa59a39d2e13198be8..af4a5757a7efbcb4a6074b813f4be1df3184acf1 100644 --- a/source/Lib/CommonLib/DepQuant.cpp +++ b/source/Lib/CommonLib/DepQuant.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -89,6 +89,10 @@ namespace DQIntern int nextSbbBelow; int posX; int posY; + ChannelType chType; + int sbtInfo; + int tuWidth; + int tuHeight; }; class Rom; @@ -128,19 +132,19 @@ namespace DQIntern Rom() : m_scansInitialized(false) {} ~Rom() { xUninitScanArrays(); } void init () { xInitScanArrays(); } - const NbInfoSbb* getNbInfoSbb( int hd, int vd, int ch ) const { return m_scanId2NbInfoSbbArray[hd][vd][ch]; } - const NbInfoOut* getNbInfoOut( int hd, int vd, int ch ) const { return m_scanId2NbInfoOutArray[hd][vd][ch]; } + const NbInfoSbb* getNbInfoSbb( int hd, int vd ) const { return m_scanId2NbInfoSbbArray[hd][vd]; } + const NbInfoOut* getNbInfoOut( int hd, int vd ) const { return m_scanId2NbInfoOutArray[hd][vd]; } const TUParameters* getTUPars ( const CompArea& area, const ComponentID compID ) const { - return m_tuParameters[g_aucLog2[area.width]][g_aucLog2[area.height]][toChannelType(compID)]; + return m_tuParameters[floorLog2(area.width)][floorLog2(area.height)][toChannelType(compID)]; } private: void xInitScanArrays (); void xUninitScanArrays (); private: bool m_scansInitialized; - NbInfoSbb* m_scanId2NbInfoSbbArray[ MAX_CU_DEPTH+1 ][ MAX_CU_DEPTH+1 ][ MAX_NUM_CHANNEL_TYPE ]; - NbInfoOut* m_scanId2NbInfoOutArray[ MAX_CU_DEPTH+1 ][ MAX_CU_DEPTH+1 ][ MAX_NUM_CHANNEL_TYPE ]; + NbInfoSbb* m_scanId2NbInfoSbbArray[ MAX_CU_DEPTH+1 ][ MAX_CU_DEPTH+1 ]; + NbInfoOut* m_scanId2NbInfoOutArray[ MAX_CU_DEPTH+1 ][ MAX_CU_DEPTH+1 ]; TUParameters* m_tuParameters [ MAX_CU_DEPTH+1 ][ MAX_CU_DEPTH+1 ][ MAX_NUM_CHANNEL_TYPE ]; }; @@ -157,8 +161,6 @@ namespace DQIntern uint32_t raster2id[ MAX_CU_SIZE * MAX_CU_SIZE ]; ::memset(raster2id, 0, sizeof(raster2id)); - for( int ch = 0; ch < MAX_NUM_CHANNEL_TYPE; ch++ ) - { for( int hd = 0; hd <= MAX_CU_DEPTH; hd++ ) { for( int vd = 0; vd <= MAX_CU_DEPTH; vd++ ) @@ -169,17 +171,17 @@ namespace DQIntern } const uint32_t blockWidth = (1 << hd); const uint32_t blockHeight = (1 << vd); - const uint32_t log2CGWidth = g_log2SbbSize[ch][hd][vd][0]; - const uint32_t log2CGHeight = g_log2SbbSize[ch][hd][vd][1]; + const uint32_t log2CGWidth = g_log2SbbSize[hd][vd][0]; + const uint32_t log2CGHeight = g_log2SbbSize[hd][vd][1]; const uint32_t groupWidth = 1 << log2CGWidth; const uint32_t groupHeight = 1 << log2CGHeight; const uint32_t groupSize = groupWidth * groupHeight; const CoeffScanType scanType = SCAN_DIAG; const SizeType blkWidthIdx = gp_sizeIdxInfo->idxFrom( blockWidth ); const SizeType blkHeightIdx = gp_sizeIdxInfo->idxFrom( blockHeight ); - const ScanElement * scanId2RP = g_scanOrder[ch][SCAN_GROUPED_4x4][scanType][blkWidthIdx][blkHeightIdx]; - NbInfoSbb*& sId2NbSbb = m_scanId2NbInfoSbbArray[hd][vd][ch]; - NbInfoOut*& sId2NbOut = m_scanId2NbInfoOutArray[hd][vd][ch]; + const ScanElement * scanId2RP = g_scanOrder[SCAN_GROUPED_4x4][scanType][blkWidthIdx][blkHeightIdx]; + NbInfoSbb*& sId2NbSbb = m_scanId2NbInfoSbbArray[hd][vd]; + NbInfoOut*& sId2NbOut = m_scanId2NbInfoOutArray[hd][vd]; // consider only non-zero-out region const uint32_t blkWidthNZOut = std::min<unsigned>( JVET_C0024_ZERO_OUT_TH, blockWidth ); const uint32_t blkHeightNZOut= std::min<unsigned>( JVET_C0024_ZERO_OUT_TH, blockHeight ); @@ -289,10 +291,12 @@ namespace DQIntern nbOut.maxDist -= scanId; } - m_tuParameters[hd][vd][ch] = new TUParameters( *this, blockWidth, blockHeight, ChannelType(ch) ); + for( int chId = 0; chId < MAX_NUM_CHANNEL_TYPE; chId++ ) + { + m_tuParameters[hd][vd][chId] = new TUParameters( *this, blockWidth, blockHeight, ChannelType(chId) ); + } } } - } m_scansInitialized = true; } @@ -306,19 +310,19 @@ namespace DQIntern { for( int vd = 0; vd <= MAX_CU_DEPTH; vd++ ) { - for( int ch = 0; ch < 2; ch++ ) + NbInfoSbb*& sId2NbSbb = m_scanId2NbInfoSbbArray[hd][vd]; + NbInfoOut*& sId2NbOut = m_scanId2NbInfoOutArray[hd][vd]; + if( sId2NbSbb ) { - NbInfoSbb*& sId2NbSbb = m_scanId2NbInfoSbbArray[hd][vd][ch]; - NbInfoOut*& sId2NbOut = m_scanId2NbInfoOutArray[hd][vd][ch]; - TUParameters*& tuPars = m_tuParameters [hd][vd][ch]; - if( sId2NbSbb ) - { - delete [] sId2NbSbb; - } - if( sId2NbOut ) - { - delete [] sId2NbOut; - } + delete [] sId2NbSbb; + } + if( sId2NbOut ) + { + delete [] sId2NbOut; + } + for( int chId = 0; chId < MAX_NUM_CHANNEL_TYPE; chId++ ) + { + TUParameters*& tuPars = m_tuParameters[hd][vd][chId]; if( tuPars ) { delete tuPars; @@ -341,30 +345,25 @@ namespace DQIntern const uint32_t nonzeroWidth = std::min<uint32_t>(JVET_C0024_ZERO_OUT_TH, m_width); const uint32_t nonzeroHeight = std::min<uint32_t>(JVET_C0024_ZERO_OUT_TH, m_height); m_numCoeff = nonzeroWidth * nonzeroHeight; - m_log2SbbWidth = g_log2SbbSize[m_chType][ g_aucLog2[m_width] ][ g_aucLog2[m_height] ][0]; - m_log2SbbHeight = g_log2SbbSize[m_chType][ g_aucLog2[m_width] ][ g_aucLog2[m_height] ][1]; + const int log2W = floorLog2( m_width ); + const int log2H = floorLog2( m_height ); + m_log2SbbWidth = g_log2SbbSize[ log2W ][ log2H ][0]; + m_log2SbbHeight = g_log2SbbSize[ log2W ][ log2H ][1]; m_log2SbbSize = m_log2SbbWidth + m_log2SbbHeight; m_sbbSize = ( 1 << m_log2SbbSize ); m_sbbMask = m_sbbSize - 1; m_widthInSbb = nonzeroWidth >> m_log2SbbWidth; m_heightInSbb = nonzeroHeight >> m_log2SbbHeight; m_numSbb = m_widthInSbb * m_heightInSbb; -#if HEVC_USE_MDCS -#error "MDCS is not supported" // use different function... - // m_scanType = CoeffScanType( TU::getCoefScanIdx( tu, m_compID ) ); -#else m_scanType = SCAN_DIAG; -#endif SizeType hsbb = gp_sizeIdxInfo->idxFrom( m_widthInSbb ); SizeType vsbb = gp_sizeIdxInfo->idxFrom( m_heightInSbb ); SizeType hsId = gp_sizeIdxInfo->idxFrom( m_width ); SizeType vsId = gp_sizeIdxInfo->idxFrom( m_height ); - m_scanSbbId2SbbPos = g_scanOrder [ chType ][ SCAN_UNGROUPED ][ m_scanType ][ hsbb ][ vsbb ]; - m_scanId2BlkPos = g_scanOrder [ chType ][ SCAN_GROUPED_4x4 ][ m_scanType ][ hsId ][ vsId ]; - int log2W = g_aucLog2[ m_width ]; - int log2H = g_aucLog2[ m_height ]; - m_scanId2NbInfoSbb = rom.getNbInfoSbb( log2W, log2H, chType ); - m_scanId2NbInfoOut = rom.getNbInfoOut( log2W, log2H, chType ); + m_scanSbbId2SbbPos = g_scanOrder [ SCAN_UNGROUPED ][ m_scanType ][ hsbb ][ vsbb ]; + m_scanId2BlkPos = g_scanOrder [ SCAN_GROUPED_4x4 ][ m_scanType ][ hsId ][ vsId ]; + m_scanId2NbInfoSbb = rom.getNbInfoSbb( log2W, log2H ); + m_scanId2NbInfoOut = rom.getNbInfoOut( log2W, log2H ); m_scanInfo = new ScanInfo[ m_numCoeff ]; for( int scanIdx = 0; scanIdx < m_numCoeff; scanIdx++ ) { @@ -375,6 +374,9 @@ namespace DQIntern void TUParameters::xSetScanInfo( ScanInfo& scanInfo, int scanIdx ) { + scanInfo.chType = m_chType; + scanInfo.tuWidth = m_width; + scanInfo.tuHeight = m_height; scanInfo.sbbSize = m_sbbSize; scanInfo.numSbb = m_numSbb; scanInfo.scanIdx = scanIdx; @@ -395,12 +397,12 @@ namespace DQIntern const int diag = m_scanId2BlkPos[nextScanIdx].x + m_scanId2BlkPos[nextScanIdx].y; if( m_chType == CHANNEL_TYPE_LUMA ) { - scanInfo.sigCtxOffsetNext = ( diag < 2 ? 12 : diag < 5 ? 6 : 0 ); + scanInfo.sigCtxOffsetNext = ( diag < 2 ? 8 : diag < 5 ? 4 : 0 ); scanInfo.gtxCtxOffsetNext = ( diag < 1 ? 16 : diag < 3 ? 11 : diag < 10 ? 6 : 1 ); } else { - scanInfo.sigCtxOffsetNext = ( diag < 2 ? 6 : 0 ); + scanInfo.sigCtxOffsetNext = ( diag < 2 ? 4 : 0 ); scanInfo.gtxCtxOffsetNext = ( diag < 1 ? 6 : 1 ); } scanInfo.nextInsidePos = nextScanIdx & m_sbbMask; @@ -446,7 +448,7 @@ namespace DQIntern static const unsigned sm_numCtxSetsSig = 3; static const unsigned sm_numCtxSetsGtx = 2; static const unsigned sm_maxNumSigSbbCtx = 2; - static const unsigned sm_maxNumSigCtx = 18; + static const unsigned sm_maxNumSigCtx = 12; static const unsigned sm_maxNumGtxCtx = 21; private: @@ -486,7 +488,7 @@ namespace DQIntern { bool rootCbfSoFar = false; bool isLastSubPartition = CU::isISPLast(*tu.cu, tu.Y(), compID); - uint32_t nTus = tu.cu->ispMode == HOR_INTRA_SUBPARTITIONS ? tu.cu->lheight() >> g_aucLog2[tu.lheight()] : tu.cu->lwidth() >> g_aucLog2[tu.lwidth()]; + uint32_t nTus = tu.cu->ispMode == HOR_INTRA_SUBPARTITIONS ? tu.cu->lheight() >> floorLog2(tu.lheight()) : tu.cu->lwidth() >> floorLog2(tu.lwidth()); if( isLastSubPartition ) { TransformUnit* tuPointer = tu.cu->firstTU; @@ -504,11 +506,11 @@ namespace DQIntern { prevLumaCbf = TU::getPrevTuCbfAtDepth(tu, compID, tu.depth); } - bits = fracBitsAccess.getFracBitsArray(Ctx::QtCbf[compID](DeriveCtx::CtxQtCbf(compID, tu.depth, prevLumaCbf, true))); + bits = fracBitsAccess.getFracBitsArray(Ctx::QtCbf[compID](DeriveCtx::CtxQtCbf(compID, prevLumaCbf, true))); } else { - bits = fracBitsAccess.getFracBitsArray(Ctx::QtCbf[compID](DeriveCtx::CtxQtCbf(compID, tu.depth, tu.cbf[COMPONENT_Cb]))); + bits = fracBitsAccess.getFracBitsArray(Ctx::QtCbf[compID](DeriveCtx::CtxQtCbf(compID, tu.cbf[COMPONENT_Cb]))); } cbfDeltaBits = lastCbfIsInferred ? 0 : int32_t(bits.intBits[1]) - int32_t(bits.intBits[0]); } @@ -520,12 +522,8 @@ namespace DQIntern int32_t bitOffset = ( xy ? cbfDeltaBits : 0 ); int32_t* lastBits = ( xy ? m_lastBitsY : m_lastBitsX ); const unsigned size = ( xy ? tuPars.m_height : tuPars.m_width ); - const unsigned log2Size = g_aucNextLog2[ size ]; -#if HEVC_USE_MDCS - const bool useYCtx = ( m_scanType == SCAN_VER ? ( xy == 0 ) : ( xy != 0 ) ); -#else + const unsigned log2Size = ceilLog2( size ); const bool useYCtx = ( xy != 0 ); -#endif const CtxSet& ctxSetLast = ( useYCtx ? Ctx::LastY : Ctx::LastX )[ chType ]; const unsigned lastShift = ( compID == COMPONENT_Y ? (log2Size+1)>>2 : Clip3<unsigned>(0,2,size>>3) ); const unsigned lastOffset = ( compID == COMPONENT_Y ? ( prefixCtx[log2Size] ) : 0 ); @@ -560,7 +558,7 @@ namespace DQIntern { BinFracBits* bits = m_sigFracBits [ ctxSetId ]; const CtxSet& ctxSet = Ctx::SigFlag [ chType + 2*ctxSetId ]; - const unsigned numCtx = ( chType == CHANNEL_TYPE_LUMA ? 18 : 12 ); + const unsigned numCtx = ( chType == CHANNEL_TYPE_LUMA ? 12 : 8 ); for( unsigned ctxId = 0; ctxId < numCtx; ctxId++ ) { bits[ ctxId ] = fracBitsAccess.getFracBitsArray( ctxSet( ctxId ) ); @@ -629,14 +627,13 @@ namespace DQIntern { public: Quantizer() {} - - void dequantBlock ( const TransformUnit& tu, const ComponentID compID, const QpParam& cQP, CoeffBuf& recCoeff ) const; - void initQuantBlock( const TransformUnit& tu, const ComponentID compID, const QpParam& cQP, const double lambda ); - - inline void preQuantCoeff(const TCoeff absCoeff, PQData *pqData) const; + void dequantBlock ( const TransformUnit& tu, const ComponentID compID, const QpParam& cQP, CoeffBuf& recCoeff, bool enableScalingLists, int* piDequantCoef ) const; + void initQuantBlock ( const TransformUnit& tu, const ComponentID compID, const QpParam& cQP, const double lambda, int gValue ); + inline void preQuantCoeff( const TCoeff absCoeff, PQData *pqData, int quanCoeff ) const; inline TCoeff getLastThreshold() const { return m_thresLast; } inline TCoeff getSSbbThreshold() const { return m_thresSSbb; } + inline int64_t getQScale() const { return m_QScale; } private: // quantization int m_QShift; @@ -666,15 +663,11 @@ namespace DQIntern } return y; } - - void Quantizer::initQuantBlock( const TransformUnit& tu, const ComponentID compID, const QpParam& cQP, const double lambda ) + void Quantizer::initQuantBlock(const TransformUnit& tu, const ComponentID compID, const QpParam& cQP, const double lambda, int gValue = -1) { -#if HEVC_USE_SCALING_LISTS - CHECK ( tu.cs->sps->getScalingListFlag(), "Scaling lists not supported" ); -#endif CHECKD( lambda <= 0.0, "Lambda must be greater than 0" ); - const int qpDQ = cQP.Qp + 1; + const int qpDQ = cQP.Qp(tu.mtsIdx[compID] == MTS_SKIP) + 1; const int qpPer = qpDQ / 6; const int qpRem = qpDQ - 6 * qpPer; const SPS& sps = *tu.cs->sps; @@ -683,33 +676,22 @@ namespace DQIntern const int channelBitDepth = sps.getBitDepth( chType ); const int maxLog2TrDynamicRange = sps.getMaxLog2TrDynamicRange( chType ); const int nomTransformShift = getTransformShift( channelBitDepth, area.size(), maxLog2TrDynamicRange ); - const bool clipTransformShift = ( tu.mtsIdx==1 && sps.getSpsRangeExtension().getExtendedPrecisionProcessingFlag() ); - const int transformShift = ( clipTransformShift ? std::max<int>( 0, nomTransformShift ) : nomTransformShift ); - + const bool clipTransformShift = ( tu.mtsIdx[compID] == MTS_SKIP && sps.getSpsRangeExtension().getExtendedPrecisionProcessingFlag()); + const bool needsSqrt2ScaleAdjustment = TU::needsSqrt2Scale(tu, compID); + const int transformShift = ( clipTransformShift ? std::max<int>( 0, nomTransformShift ) : nomTransformShift ) + (needsSqrt2ScaleAdjustment?-1:0); // quant parameters m_QShift = QUANT_SHIFT - 1 + qpPer + transformShift; m_QAdd = -( ( 3 << m_QShift ) >> 1 ); -#if HM_QTBT_AS_IN_JEM_QUANT - Intermediate_Int invShift = IQUANT_SHIFT + 1 - qpPer - transformShift + ( TU::needsBlockSizeTrafoScale( tu, compID ) ? ADJ_DEQUANT_SHIFT : 0 ); - m_QScale = ( TU::needsSqrt2Scale( tu, compID ) ? ( g_quantScales[ qpRem ] * 181 ) >> 7 : g_quantScales[ qpRem ] ); -#else Intermediate_Int invShift = IQUANT_SHIFT + 1 - qpPer - transformShift; - m_QScale = g_quantScales [ qpRem ]; -#endif + m_QScale = g_quantScales[needsSqrt2ScaleAdjustment?1:0][ qpRem ]; const unsigned qIdxBD = std::min<unsigned>( maxLog2TrDynamicRange + 1, 8*sizeof(Intermediate_Int) + invShift - IQUANT_SHIFT - 1 ); m_maxQIdx = ( 1 << (qIdxBD-1) ) - 4; - m_thresLast = TCoeff( ( int64_t(3) << m_QShift ) / ( 4 * m_QScale ) ); - m_thresSSbb = TCoeff( ( int64_t(3) << m_QShift ) / ( 4 * m_QScale ) ); - + m_thresLast = TCoeff((int64_t(4) << m_QShift)); + m_thresSSbb = TCoeff((int64_t(3) << m_QShift)); // distortion calculation parameters - const int64_t qScale = g_quantScales[ qpRem ]; -#if HM_QTBT_AS_IN_JEM_QUANT + const int64_t qScale = (gValue==-1) ? m_QScale : gValue; const int nomDShift = - SCALE_BITS - 2 * (nomTransformShift + DISTORTION_PRECISION_ADJUSTMENT(channelBitDepth)) + m_QShift; -#else - const int nomDShift = SCALE_BITS - 2 * (nomTransformShift + DISTORTION_PRECISION_ADJUSTMENT(channelBitDepth)) - + m_QShift + (TU::needsQP3Offset(tu, compID) ? 1 : 0); -#endif + SCALE_BITS - 2 * (nomTransformShift + DISTORTION_PRECISION_ADJUSTMENT(channelBitDepth)) + m_QShift + (needsSqrt2ScaleAdjustment ? 1 : 0); const double qScale2 = double( qScale * qScale ); const double nomDistFactor = ( nomDShift < 0 ? 1.0/(double(int64_t(1)<<(-nomDShift))*qScale2*lambda) : double(int64_t(1)<<nomDShift)/(qScale2*lambda) ); const int64_t pow2dfShift = (int64_t)( nomDistFactor * qScale2 ) + 1; @@ -720,23 +702,16 @@ namespace DQIntern m_DistOrgFact = (int64_t)( nomDistFactor * double(int64_t(1)<<(m_DistShift+1 )) + .5 ); } - void Quantizer::dequantBlock( const TransformUnit& tu, const ComponentID compID, const QpParam& cQP, CoeffBuf& recCoeff ) const + void Quantizer::dequantBlock( const TransformUnit& tu, const ComponentID compID, const QpParam& cQP, CoeffBuf& recCoeff, bool enableScalingLists, int* piDequantCoef) const { -#if HEVC_USE_SCALING_LISTS - CHECK ( tu.cs->sps->getScalingListFlag(), "Scaling lists not supported" ); -#endif //----- set basic parameters ----- const CompArea& area = tu.blocks[ compID ]; const int numCoeff = area.area(); const SizeType hsId = gp_sizeIdxInfo->idxFrom( area.width ); const SizeType vsId = gp_sizeIdxInfo->idxFrom( area.height ); -#if HEVC_USE_MDCS - const CoeffScanType scanType = CoeffScanType( TU::getCoefScanIdx( tu, compID ) ); -#else const CoeffScanType scanType = SCAN_DIAG; -#endif - const ScanElement *scan = g_scanOrder[toChannelType(compID)][SCAN_GROUPED_4x4][scanType][hsId][vsId]; + const ScanElement *scan = g_scanOrder[SCAN_GROUPED_4x4][scanType][hsId][vsId]; const TCoeff* qCoeff = tu.getCoeffs( compID ).buf; TCoeff* tCoeff = recCoeff.buf; @@ -757,7 +732,7 @@ namespace DQIntern } //----- set dequant parameters ----- - const int qpDQ = cQP.Qp + 1; + const int qpDQ = cQP.Qp(tu.mtsIdx[compID] == MTS_SKIP) + 1; const int qpPer = qpDQ / 6; const int qpRem = qpDQ - 6 * qpPer; const SPS& sps = *tu.cs->sps; @@ -767,22 +742,12 @@ namespace DQIntern const TCoeff minTCoeff = -( 1 << maxLog2TrDynamicRange ); const TCoeff maxTCoeff = ( 1 << maxLog2TrDynamicRange ) - 1; const int nomTransformShift = getTransformShift( channelBitDepth, area.size(), maxLog2TrDynamicRange ); - const bool clipTransformShift = ( tu.mtsIdx==1 && sps.getSpsRangeExtension().getExtendedPrecisionProcessingFlag() ); - const int transformShift = ( clipTransformShift ? std::max<int>( 0, nomTransformShift ) : nomTransformShift ); -#if HM_QTBT_AS_IN_JEM_QUANT - Intermediate_Int shift = IQUANT_SHIFT + 1 - qpPer - transformShift + ( TU::needsBlockSizeTrafoScale( tu, compID ) ? ADJ_DEQUANT_SHIFT : 0 ); - Intermediate_Int invQScale = g_invQuantScales[ qpRem ] * ( TU::needsSqrt2Scale( tu, compID ) ? 181 : 1 ); -#else - Intermediate_Int shift = IQUANT_SHIFT + 1 - qpPer - transformShift; - Intermediate_Int invQScale = g_invQuantScales[ qpRem ]; -#endif - if( shift < 0 ) - { - invQScale <<= -shift; - shift = 0; - } - Intermediate_Int add = ( 1 << shift ) >> 1; - + const bool clipTransformShift = ( tu.mtsIdx[compID] == MTS_SKIP && sps.getSpsRangeExtension().getExtendedPrecisionProcessingFlag()); + const bool needsSqrt2ScaleAdjustment = TU::needsSqrt2Scale(tu, compID); + const int transformShift = ( clipTransformShift ? std::max<int>( 0, nomTransformShift ) : nomTransformShift ) + (needsSqrt2ScaleAdjustment?-1:0); + Intermediate_Int shift = IQUANT_SHIFT + 1 - qpPer - transformShift + (enableScalingLists ? LOG2_SCALING_LIST_NEUTRAL_VALUE : 0); + Intermediate_Int invQScale = g_invQuantScales[needsSqrt2ScaleAdjustment?1:0][ qpRem ]; + Intermediate_Int add = (shift < 0) ? 0 : ((1 << shift) >> 1); //----- dequant coefficients ----- for( int state = 0, scanIdx = lastScanIdx; scanIdx >= 0; scanIdx-- ) { @@ -790,17 +755,23 @@ namespace DQIntern const TCoeff& level = qCoeff[ rasterPos ]; if( level ) { + if (enableScalingLists) + invQScale = piDequantCoef[rasterPos];//scalingfactor*levelScale + if (shift < 0 && (enableScalingLists || scanIdx == lastScanIdx)) + { + invQScale <<= -shift; + } Intermediate_Int qIdx = ( level << 1 ) + ( level > 0 ? -(state>>1) : (state>>1) ); - Intermediate_Int nomTCoeff = ( qIdx * invQScale + add ) >> shift; - tCoeff[ rasterPos ] = (TCoeff)Clip3<Intermediate_Int>( minTCoeff, maxTCoeff, nomTCoeff ); + int64_t nomTCoeff = ((int64_t)qIdx * (int64_t)invQScale + add) >> ((shift < 0) ? 0 : shift); + tCoeff[rasterPos] = (TCoeff)Clip3<int64_t>(minTCoeff, maxTCoeff, nomTCoeff); } state = ( 32040 >> ((state<<2)+((level&1)<<1)) ) & 3; // the 16-bit value "32040" represent the state transition table } } - inline void Quantizer::preQuantCoeff(const TCoeff absCoeff, PQData *pqData) const + inline void Quantizer::preQuantCoeff(const TCoeff absCoeff, PQData *pqData, int quanCoeff) const { - int64_t scaledOrg = int64_t( absCoeff ) * m_QScale; + int64_t scaledOrg = int64_t( absCoeff ) * quanCoeff; TCoeff qIdx = std::max<TCoeff>( 1, std::min<TCoeff>( m_maxQIdx, TCoeff( ( scaledOrg + m_QAdd ) >> m_QShift ) ) ); int64_t scaledAdd = qIdx * m_DistStepAdd - scaledOrg * m_DistOrgFact; PQData& pq_a = pqData[ qIdx & 3 ]; @@ -875,10 +846,10 @@ namespace DQIntern #define RICEMAX 32 const int32_t g_goRiceBits[4][RICEMAX] = { - { 32768, 65536, 98304, 131072, 163840, 196608, 262144, 262144, 327680, 327680, 327680, 327680, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752}, - { 65536, 65536, 98304, 98304, 131072, 131072, 163840, 163840, 196608, 196608, 229376, 229376, 294912, 294912, 294912, 294912, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984}, - { 98304, 98304, 98304, 98304, 131072, 131072, 131072, 131072, 163840, 163840, 163840, 163840, 196608, 196608, 196608, 196608, 229376, 229376, 229376, 229376, 262144, 262144, 262144, 262144, 327680, 327680, 327680, 327680, 327680, 327680, 327680, 327680}, - { 131072, 131072, 131072, 131072, 131072, 131072, 131072, 131072, 163840, 163840, 163840, 163840, 163840, 163840, 163840, 163840, 196608, 196608, 196608, 196608, 196608, 196608, 196608, 196608, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376} + { 32768, 65536, 98304, 131072, 163840, 196608, 262144, 262144, 327680, 327680, 327680, 327680, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752}, + { 65536, 65536, 98304, 98304, 131072, 131072, 163840, 163840, 196608, 196608, 229376, 229376, 294912, 294912, 294912, 294912, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984}, + { 98304, 98304, 98304, 98304, 131072, 131072, 131072, 131072, 163840, 163840, 163840, 163840, 196608, 196608, 196608, 196608, 229376, 229376, 229376, 229376, 262144, 262144, 262144, 262144, 327680, 327680, 327680, 327680, 327680, 327680, 327680, 327680}, + {131072, 131072, 131072, 131072, 131072, 131072, 131072, 131072, 163840, 163840, 163840, 163840, 163840, 163840, 163840, 163840, 196608, 196608, 196608, 196608, 196608, 196608, 196608, 196608, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376} }; class State @@ -903,77 +874,76 @@ namespace DQIntern m_goRicePar = 0; m_goRiceZero = 0; } - - void checkRdCosts( const ScanPosType spt, const PQData& pqDataA, const PQData& pqDataB, Decision& decisionA, Decision& decisionB) const + void checkRdCosts( const ScanPosType spt, const PQData& pqDataA, const PQData& pqDataB, Decision& decisionA, Decision& decisionB ) const { const int32_t* goRiceTab = g_goRiceBits[m_goRicePar]; int64_t rdCostA = m_rdCost + pqDataA.deltaDist; int64_t rdCostB = m_rdCost + pqDataB.deltaDist; int64_t rdCostZ = m_rdCost; - if( m_remRegBins >= 4 ) - { - if( pqDataA.absLevel < 4 ) - rdCostA += m_coeffFracBits.bits[pqDataA.absLevel]; - else + if( m_remRegBins >= 4 ) { - const unsigned value = (pqDataA.absLevel - 4) >> 1; - rdCostA += m_coeffFracBits.bits[pqDataA.absLevel - (value << 1)] + goRiceTab[value<RICEMAX ? value : RICEMAX-1]; + if( pqDataA.absLevel < 4 ) + rdCostA += m_coeffFracBits.bits[ pqDataA.absLevel ]; + else + { + const unsigned value = ( pqDataA.absLevel - 4 ) >> 1; + rdCostA += m_coeffFracBits.bits[ pqDataA.absLevel - ( value << 1 ) ] + goRiceTab[ value < RICEMAX ? value : RICEMAX - 1 ]; + } + if( pqDataB.absLevel < 4 ) + rdCostB += m_coeffFracBits.bits[ pqDataB.absLevel ]; + else + { + const unsigned value = ( pqDataB.absLevel - 4 ) >> 1; + rdCostB += m_coeffFracBits.bits[ pqDataB.absLevel - ( value << 1 ) ] + goRiceTab[ value < RICEMAX ? value : RICEMAX - 1 ]; + } + if( spt == SCAN_ISCSBB ) + { + rdCostA += m_sigFracBits.intBits[ 1 ]; + rdCostB += m_sigFracBits.intBits[ 1 ]; + rdCostZ += m_sigFracBits.intBits[ 0 ]; + } + else if( spt == SCAN_SOCSBB ) + { + rdCostA += m_sbbFracBits.intBits[ 1 ] + m_sigFracBits.intBits[ 1 ]; + rdCostB += m_sbbFracBits.intBits[ 1 ] + m_sigFracBits.intBits[ 1 ]; + rdCostZ += m_sbbFracBits.intBits[ 1 ] + m_sigFracBits.intBits[ 0 ]; + } + else if( m_numSigSbb ) + { + rdCostA += m_sigFracBits.intBits[ 1 ]; + rdCostB += m_sigFracBits.intBits[ 1 ]; + rdCostZ += m_sigFracBits.intBits[ 0 ]; + } + else + { + rdCostZ = decisionA.rdCost; + } } - if( pqDataB.absLevel < 4 ) - rdCostB += m_coeffFracBits.bits[pqDataB.absLevel]; else { - const unsigned value = (pqDataB.absLevel - 4) >> 1; - rdCostB += m_coeffFracBits.bits[pqDataB.absLevel - (value << 1)] + goRiceTab[value<RICEMAX ? value : RICEMAX-1]; + rdCostA += ( 1 << SCALE_BITS ) + goRiceTab[ pqDataA.absLevel <= m_goRiceZero ? pqDataA.absLevel - 1 : ( pqDataA.absLevel < RICEMAX ? pqDataA.absLevel : RICEMAX - 1 ) ]; + rdCostB += ( 1 << SCALE_BITS ) + goRiceTab[ pqDataB.absLevel <= m_goRiceZero ? pqDataB.absLevel - 1 : ( pqDataB.absLevel < RICEMAX ? pqDataB.absLevel : RICEMAX - 1 ) ]; + rdCostZ += goRiceTab[ m_goRiceZero ]; } - if( spt == SCAN_ISCSBB ) + if( rdCostA < decisionA.rdCost ) { - rdCostA += m_sigFracBits.intBits[1]; - rdCostB += m_sigFracBits.intBits[1]; - rdCostZ += m_sigFracBits.intBits[0]; + decisionA.rdCost = rdCostA; + decisionA.absLevel = pqDataA.absLevel; + decisionA.prevId = m_stateId; } - else if( spt == SCAN_SOCSBB ) + if( rdCostZ < decisionA.rdCost ) { - rdCostA += m_sbbFracBits.intBits[1] + m_sigFracBits.intBits[1]; - rdCostB += m_sbbFracBits.intBits[1] + m_sigFracBits.intBits[1]; - rdCostZ += m_sbbFracBits.intBits[1] + m_sigFracBits.intBits[0]; + decisionA.rdCost = rdCostZ; + decisionA.absLevel = 0; + decisionA.prevId = m_stateId; } - else if( m_numSigSbb ) - { - rdCostA += m_sigFracBits.intBits[1]; - rdCostB += m_sigFracBits.intBits[1]; - rdCostZ += m_sigFracBits.intBits[0]; - } - else + if( rdCostB < decisionB.rdCost ) { - rdCostZ = decisionA.rdCost; + decisionB.rdCost = rdCostB; + decisionB.absLevel = pqDataB.absLevel; + decisionB.prevId = m_stateId; } } - else - { - rdCostA += (1 << SCALE_BITS) + goRiceTab[pqDataA.absLevel <= m_goRiceZero ? pqDataA.absLevel - 1 : (pqDataA.absLevel<RICEMAX ? pqDataA.absLevel : RICEMAX-1)]; - rdCostB += (1 << SCALE_BITS) + goRiceTab[pqDataB.absLevel <= m_goRiceZero ? pqDataB.absLevel - 1 : (pqDataB.absLevel<RICEMAX ? pqDataB.absLevel : RICEMAX-1)]; - rdCostZ += goRiceTab[m_goRiceZero]; - } - if( rdCostA < decisionA.rdCost ) - { - decisionA.rdCost = rdCostA; - decisionA.absLevel = pqDataA.absLevel; - decisionA.prevId = m_stateId; - } - if( rdCostZ < decisionA.rdCost ) - { - decisionA.rdCost = rdCostZ; - decisionA.absLevel = 0; - decisionA.prevId = m_stateId; - } - if( rdCostB < decisionB.rdCost ) - { - decisionB.rdCost = rdCostB; - decisionB.absLevel = pqDataB.absLevel; - decisionB.prevId = m_stateId; - } - } inline void checkRdCostStart(int32_t lastOffset, const PQData &pqData, Decision &decision) const { @@ -1018,7 +988,7 @@ namespace DQIntern int64_t m_rdCost; uint16_t m_absLevelsAndCtxInit[24]; // 16x8bit for abs levels + 16x16bit for ctx init id int8_t m_numSigSbb; - int8_t m_remRegBins; + int m_remRegBins; int8_t m_refSbbCtxId; BinFracBits m_sbbFracBits; BinFracBits m_sigFracBits; @@ -1028,8 +998,10 @@ namespace DQIntern const int8_t m_stateId; const BinFracBits*const m_sigFracBitsArray; const CoeffFracBits*const m_gtxFracBitsArray; - const uint32_t*const m_goRiceZeroArray; CommonCtx& m_commonCtx; + public: + unsigned effWidth; + unsigned effHeight; }; @@ -1038,7 +1010,6 @@ namespace DQIntern , m_stateId ( stateId ) , m_sigFracBitsArray( rateEst.sigFlagBits(stateId) ) , m_gtxFracBitsArray( rateEst.gtxFracBits(stateId) ) - , m_goRiceZeroArray ( g_auiGoRicePosCoeff0[std::max(0,stateId-1)] ) , m_commonCtx ( commonCtx ) { } @@ -1059,11 +1030,6 @@ namespace DQIntern m_goRicePar = prvState->m_goRicePar; if( m_remRegBins >= 4 ) { - TCoeff rem = (decision.absLevel - 4) >> 1; - if( m_goRicePar < 3 && rem > (3<<m_goRicePar)-1 ) - { - m_goRicePar++; - } m_remRegBins -= (decision.absLevel < 2 ? decision.absLevel : 3); } ::memcpy( m_absLevelsAndCtxInit, prvState->m_absLevelsAndCtxInit, 48*sizeof(uint8_t) ); @@ -1072,15 +1038,8 @@ namespace DQIntern { m_numSigSbb = 1; m_refSbbCtxId = -1; - if ( scanInfo.sbbSize == 4 ) - { - m_remRegBins = MAX_NUM_REG_BINS_2x2SUBBLOCK - (decision.absLevel < 2 ? decision.absLevel : 3); - } - else - { - m_remRegBins = MAX_NUM_REG_BINS_4x4SUBBLOCK - (decision.absLevel < 2 ? decision.absLevel : 3); - } - m_goRicePar = ( ((decision.absLevel - 4) >> 1) > (3<<0)-1 ? 1 : 0 ); + int ctxBinSampleRatio = (scanInfo.chType == CHANNEL_TYPE_LUMA) ? MAX_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT_LUMA : MAX_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT_CHROMA; + m_remRegBins = (effWidth * effHeight *ctxBinSampleRatio) / 16 - (decision.absLevel < 2 ? decision.absLevel : 3); ::memset( m_absLevelsAndCtxInit, 0, 48*sizeof(uint8_t) ); } @@ -1125,8 +1084,44 @@ namespace DQIntern } #undef UPDATE TCoeff sumGt1 = sumAbs1 - sumNum; - m_sigFracBits = m_sigFracBitsArray[scanInfo.sigCtxOffsetNext + (sumAbs1 < 5 ? sumAbs1 : 5)]; + m_sigFracBits = m_sigFracBitsArray[scanInfo.sigCtxOffsetNext + std::min( (sumAbs1+1)>>1, 3 )]; m_coeffFracBits = m_gtxFracBitsArray[scanInfo.gtxCtxOffsetNext + (sumGt1 < 4 ? sumGt1 : 4)]; + + TCoeff sumAbs = m_absLevelsAndCtxInit[8 + scanInfo.nextInsidePos] >> 8; +#define UPDATE(k) {TCoeff t=levels[scanInfo.nextNbInfoSbb.inPos[k]]; sumAbs+=t; } + if (numIPos == 1) + { + UPDATE(0); + } + else if (numIPos == 2) + { + UPDATE(0); + UPDATE(1); + } + else if (numIPos == 3) + { + UPDATE(0); + UPDATE(1); + UPDATE(2); + } + else if (numIPos == 4) + { + UPDATE(0); + UPDATE(1); + UPDATE(2); + UPDATE(3); + } + else if (numIPos == 5) + { + UPDATE(0); + UPDATE(1); + UPDATE(2); + UPDATE(3); + UPDATE(4); + } +#undef UPDATE + int sumAll = std::max(std::min(31, (int)sumAbs - 4 * 5), 0); + m_goRicePar = g_auiGoRiceParsCoeff[sumAll]; } else { @@ -1165,7 +1160,7 @@ namespace DQIntern #undef UPDATE sumAbs = std::min<TCoeff>(31, sumAbs); m_goRicePar = g_auiGoRiceParsCoeff[sumAbs]; - m_goRiceZero = m_goRiceZeroArray[sumAbs]; + m_goRiceZero = g_auiGoRicePosCoeff0(m_stateId, m_goRicePar); } } } @@ -1203,7 +1198,7 @@ namespace DQIntern TCoeff sumNum = tinit & 7; TCoeff sumAbs1 = ( tinit >> 3 ) & 31; TCoeff sumGt1 = sumAbs1 - sumNum; - m_sigFracBits = m_sigFracBitsArray[ scanInfo.sigCtxOffsetNext + ( sumAbs1 < 5 ? sumAbs1 : 5 ) ]; + m_sigFracBits = m_sigFracBitsArray[ scanInfo.sigCtxOffsetNext + std::min( (sumAbs1+1)>>1, 3 ) ]; m_coeffFracBits = m_gtxFracBitsArray[ scanInfo.gtxCtxOffsetNext + ( sumGt1 < 4 ? sumGt1 : 4 ) ]; } } @@ -1228,13 +1223,14 @@ namespace DQIntern const int sigNSbb = ( ( scanInfo.nextSbbRight ? sbbFlags[ scanInfo.nextSbbRight ] : false ) || ( scanInfo.nextSbbBelow ? sbbFlags[ scanInfo.nextSbbBelow ] : false ) ? 1 : 0 ); currState.m_numSigSbb = 0; - if (scanInfo.sbbSize == 4) + if (prevState) { - currState.m_remRegBins = MAX_NUM_REG_BINS_2x2SUBBLOCK; + currState.m_remRegBins = prevState->m_remRegBins; } else { - currState.m_remRegBins = MAX_NUM_REG_BINS_4x4SUBBLOCK; + int ctxBinSampleRatio = (scanInfo.chType == CHANNEL_TYPE_LUMA) ? MAX_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT_LUMA : MAX_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT_CHROMA; + currState.m_remRegBins = (currState.effWidth * currState.effHeight *ctxBinSampleRatio) / 16; } currState.m_goRicePar = 0; currState.m_refSbbCtxId = currState.m_stateId; @@ -1291,12 +1287,12 @@ namespace DQIntern public: DepQuant(); - void quant ( TransformUnit& tu, const CCoeffBuf& srcCoeff, const ComponentID compID, const QpParam& cQP, const double lambda, const Ctx& ctx, TCoeff& absSum ); - void dequant ( const TransformUnit& tu, CoeffBuf& recCoeff, const ComponentID compID, const QpParam& cQP ) const; + void quant ( TransformUnit& tu, const CCoeffBuf& srcCoeff, const ComponentID compID, const QpParam& cQP, const double lambda, const Ctx& ctx, TCoeff& absSum, bool enableScalingLists, int* quantCoeff ); + void dequant ( const TransformUnit& tu, CoeffBuf& recCoeff, const ComponentID compID, const QpParam& cQP, bool enableScalingLists, int* quantCoeff ); private: - void xDecideAndUpdate ( const TCoeff absCoeff, const ScanInfo& scanInfo, bool zeroOut ); - void xDecide ( const ScanPosType spt, const TCoeff absCoeff, const int lastOffset, Decision* decisions, bool zeroOut ); + void xDecideAndUpdate ( const TCoeff absCoeff, const ScanInfo& scanInfo, bool zeroOut, int quantCoeff); + void xDecide ( const ScanPosType spt, const TCoeff absCoeff, const int lastOffset, Decision* decisions, bool zeroOut, int quantCoeff ); private: CommonCtx m_commonCtx; @@ -1323,9 +1319,9 @@ namespace DQIntern #undef TINIT - void DepQuant::dequant( const TransformUnit& tu, CoeffBuf& recCoeff, const ComponentID compID, const QpParam& cQP ) const + void DepQuant::dequant( const TransformUnit& tu, CoeffBuf& recCoeff, const ComponentID compID, const QpParam& cQP, bool enableScalingLists, int* piDequantCoef ) { - m_quant.dequantBlock( tu, compID, cQP, recCoeff ); + m_quant.dequantBlock( tu, compID, cQP, recCoeff, enableScalingLists, piDequantCoef ); } @@ -1334,7 +1330,7 @@ namespace DQIntern #undef DINIT - void DepQuant::xDecide( const ScanPosType spt, const TCoeff absCoeff, const int lastOffset, Decision* decisions, bool zeroOut) + void DepQuant::xDecide( const ScanPosType spt, const TCoeff absCoeff, const int lastOffset, Decision* decisions, bool zeroOut, int quanCoeff) { ::memcpy( decisions, startDec, 8*sizeof(Decision) ); @@ -1351,29 +1347,30 @@ namespace DQIntern } PQData pqData[4]; - m_quant.preQuantCoeff( absCoeff, pqData ); + m_quant.preQuantCoeff( absCoeff, pqData, quanCoeff ); m_prevStates[0].checkRdCosts( spt, pqData[0], pqData[2], decisions[0], decisions[2]); m_prevStates[1].checkRdCosts( spt, pqData[0], pqData[2], decisions[2], decisions[0]); m_prevStates[2].checkRdCosts( spt, pqData[3], pqData[1], decisions[1], decisions[3]); m_prevStates[3].checkRdCosts( spt, pqData[3], pqData[1], decisions[3], decisions[1]); if( spt==SCAN_EOCSBB ) { - m_skipStates[0].checkRdCostSkipSbb( decisions[0] ); - m_skipStates[1].checkRdCostSkipSbb( decisions[1] ); - m_skipStates[2].checkRdCostSkipSbb( decisions[2] ); - m_skipStates[3].checkRdCostSkipSbb( decisions[3] ); + m_skipStates[0].checkRdCostSkipSbb( decisions[0] ); + m_skipStates[1].checkRdCostSkipSbb( decisions[1] ); + m_skipStates[2].checkRdCostSkipSbb( decisions[2] ); + m_skipStates[3].checkRdCostSkipSbb( decisions[3] ); } + m_startState.checkRdCostStart( lastOffset, pqData[0], decisions[0] ); m_startState.checkRdCostStart( lastOffset, pqData[2], decisions[2] ); } - void DepQuant::xDecideAndUpdate( const TCoeff absCoeff, const ScanInfo& scanInfo, bool zeroOut ) + void DepQuant::xDecideAndUpdate( const TCoeff absCoeff, const ScanInfo& scanInfo, bool zeroOut, int quantCoeff ) { Decision* decisions = m_trellis[ scanInfo.scanIdx ]; std::swap( m_prevStates, m_currStates ); - xDecide( scanInfo.spt, absCoeff, lastOffset(scanInfo.scanIdx), decisions, zeroOut ); + xDecide( scanInfo.spt, absCoeff, lastOffset(scanInfo.scanIdx), decisions, zeroOut, quantCoeff ); if( scanInfo.scanIdx ) { @@ -1436,7 +1433,7 @@ namespace DQIntern } - void DepQuant::quant( TransformUnit& tu, const CCoeffBuf& srcCoeff, const ComponentID compID, const QpParam& cQP, const double lambda, const Ctx& ctx, TCoeff& absSum ) + void DepQuant::quant( TransformUnit& tu, const CCoeffBuf& srcCoeff, const ComponentID compID, const QpParam& cQP, const double lambda, const Ctx& ctx, TCoeff& absSum, bool enableScalingLists, int* quantCoeff ) { CHECKD( tu.cs->sps->getSpsRangeExtension().getExtendedPrecisionProcessingFlag(), "ext precision is not supported" ); @@ -1449,12 +1446,43 @@ namespace DQIntern ::memset( tu.getCoeffs( compID ).buf, 0x00, numCoeff*sizeof(TCoeff) ); absSum = 0; + const CompArea& area = tu.blocks[ compID ]; + const uint32_t width = area.width; + const uint32_t height = area.height; + const uint32_t lfnstIdx = tu.cu->lfnstIdx; + //===== scaling matrix ==== + //const int qpDQ = cQP.Qp + 1; + //const int qpPer = qpDQ / 6; + //const int qpRem = qpDQ - 6 * qpPer; + + //TCoeff thresTmp = thres; + bool zeroOut = false; + bool zeroOutforThres = false; + int effWidth = tuPars.m_width, effHeight = tuPars.m_height; + if( ( tu.mtsIdx[compID] > MTS_SKIP || (tu.cs->sps->getUseMTS() && tu.cu->sbtInfo != 0 && tuPars.m_height <= 32 && tuPars.m_width <= 32)) && compID == COMPONENT_Y) + { + effHeight = (tuPars.m_height == 32) ? 16 : tuPars.m_height; + effWidth = (tuPars.m_width == 32) ? 16 : tuPars.m_width; + zeroOut = (effHeight < tuPars.m_height || effWidth < tuPars.m_width); + } + zeroOutforThres = zeroOut || (32 < tuPars.m_height || 32 < tuPars.m_width); //===== find first test position ===== - int firstTestPos = numCoeff - 1; + int firstTestPos = numCoeff - 1; + if (lfnstIdx > 0 && tu.mtsIdx[compID] != MTS_SKIP && width >= 4 && height >= 4) + { + firstTestPos = ( ( width == 4 && height == 4 ) || ( width == 8 && height == 8 ) ) ? 7 : 15 ; + } + const TCoeff defaultQuantisationCoefficient = (TCoeff)m_quant.getQScale(); const TCoeff thres = m_quant.getLastThreshold(); for( ; firstTestPos >= 0; firstTestPos-- ) { - if (abs(tCoeff[tuPars.m_scanId2BlkPos[firstTestPos].idx]) > thres) + if (zeroOutforThres && (tuPars.m_scanId2BlkPos[firstTestPos].x >= ((tuPars.m_width == 32 && zeroOut) ? 16 : 32) + || tuPars.m_scanId2BlkPos[firstTestPos].y >= ((tuPars.m_height == 32 && zeroOut) ? 16 : 32))) + continue; + TCoeff thresTmp = (enableScalingLists) ? TCoeff(thres / (4 * quantCoeff[tuPars.m_scanId2BlkPos[firstTestPos].idx])) + : TCoeff(thres / (4 * defaultQuantisationCoefficient)); + + if (abs(tCoeff[tuPars.m_scanId2BlkPos[firstTestPos].idx]) > thresTmp) { break; } @@ -1473,20 +1501,28 @@ namespace DQIntern } m_startState.init(); - int effWidth = tuPars.m_width, effHeight = tuPars.m_height; - bool zeroOut = false; - if( ( tu.mtsIdx > 1 || ( tu.cu->sbtInfo != 0 && tuPars.m_height <= 32 && tuPars.m_width <= 32 ) ) && !tu.cu->transQuantBypass && compID == COMPONENT_Y ) + + int effectWidth = std::min(32, effWidth); + int effectHeight = std::min(32, effHeight); + for (int k = 0; k < 12; k++) { - effHeight = ( tuPars.m_height == 32 ) ? 16 : tuPars.m_height; - effWidth = ( tuPars.m_width == 32 ) ? 16 : tuPars.m_width; - zeroOut = ( effHeight < tuPars.m_height || effWidth < tuPars.m_width ); + m_allStates[k].effWidth = effectWidth; + m_allStates[k].effHeight = effectHeight; } + m_startState.effWidth = effectWidth; + m_startState.effHeight = effectHeight; //===== populate trellis ===== for( int scanIdx = firstTestPos; scanIdx >= 0; scanIdx-- ) { const ScanInfo& scanInfo = tuPars.m_scanInfo[ scanIdx ]; - xDecideAndUpdate( abs( tCoeff[ scanInfo.rasterPos ] ), scanInfo, zeroOut && ( scanInfo.posX >= effWidth || scanInfo.posY >= effHeight ) ); + if (enableScalingLists) + { + m_quant.initQuantBlock(tu, compID, cQP, lambda, quantCoeff[scanInfo.rasterPos]); + xDecideAndUpdate( abs( tCoeff[scanInfo.rasterPos]), scanInfo, (zeroOut && (scanInfo.posX >= effWidth || scanInfo.posY >= effHeight)), quantCoeff[scanInfo.rasterPos] ); + } + else + xDecideAndUpdate( abs( tCoeff[scanInfo.rasterPos]), scanInfo, (zeroOut && (scanInfo.posX >= effWidth || scanInfo.posY >= effHeight)), defaultQuantisationCoefficient ); } //===== find best path ===== @@ -1537,9 +1573,22 @@ DepQuant::~DepQuant() void DepQuant::quant( TransformUnit &tu, const ComponentID &compID, const CCoeffBuf &pSrc, TCoeff &uiAbsSum, const QpParam &cQP, const Ctx& ctx ) { - if( tu.cs->slice->getDepQuantEnabledFlag() ) + if ( tu.cs->picHeader->getDepQuantEnabledFlag() && (tu.mtsIdx[compID] != MTS_SKIP) ) { - static_cast<DQIntern::DepQuant*>(p)->quant( tu, pSrc, compID, cQP, Quant::m_dLambda, ctx, uiAbsSum ); + //===== scaling matrix ==== + const int qpDQ = cQP.Qp(tu.mtsIdx[compID] == MTS_SKIP) + 1; + const int qpPer = qpDQ / 6; + const int qpRem = qpDQ - 6 * qpPer; + const CompArea &rect = tu.blocks[compID]; + const int width = rect.width; + const int height = rect.height; + uint32_t scalingListType = getScalingListType(tu.cu->predMode, compID); + CHECK(scalingListType >= SCALING_LIST_NUM, "Invalid scaling list"); + const uint32_t log2TrWidth = floorLog2(width); + const uint32_t log2TrHeight = floorLog2(height); + const bool disableSMForLFNST = tu.cs->picHeader->getScalingListPresentFlag() ? tu.cs->picHeader->getScalingListAPS()->getScalingList().getDisableScalingMatrixForLfnstBlks() : false; + const bool enableScalingLists = getUseScalingList(width, height, (tu.mtsIdx[compID] == MTS_SKIP), tu.cu->lfnstIdx > 0, disableSMForLFNST); + static_cast<DQIntern::DepQuant*>(p)->quant( tu, pSrc, compID, cQP, Quant::m_dLambda, ctx, uiAbsSum, enableScalingLists, Quant::getQuantCoeff(scalingListType, qpRem, log2TrWidth, log2TrHeight) ); } else { @@ -1549,9 +1598,21 @@ void DepQuant::quant( TransformUnit &tu, const ComponentID &compID, const CCoeff void DepQuant::dequant( const TransformUnit &tu, CoeffBuf &dstCoeff, const ComponentID &compID, const QpParam &cQP ) { - if( tu.cs->slice->getDepQuantEnabledFlag() ) + if( tu.cs->picHeader->getDepQuantEnabledFlag() && (tu.mtsIdx[compID] != MTS_SKIP)) { - static_cast<DQIntern::DepQuant*>(p)->dequant( tu, dstCoeff, compID, cQP ); + const int qpDQ = cQP.Qp(tu.mtsIdx[compID] == MTS_SKIP) + 1; + const int qpPer = qpDQ / 6; + const int qpRem = qpDQ - 6 * qpPer; + const CompArea &rect = tu.blocks[compID]; + const int width = rect.width; + const int height = rect.height; + uint32_t scalingListType = getScalingListType(tu.cu->predMode, compID); + CHECK(scalingListType >= SCALING_LIST_NUM, "Invalid scaling list"); + const uint32_t log2TrWidth = floorLog2(width); + const uint32_t log2TrHeight = floorLog2(height); + const bool disableSMForLFNST = tu.cs->picHeader->getScalingListPresentFlag() ? tu.cs->picHeader->getScalingListAPS()->getScalingList().getDisableScalingMatrixForLfnstBlks() : false; + const bool enableScalingLists = getUseScalingList(width, height, (tu.mtsIdx[compID] == MTS_SKIP), tu.cu->lfnstIdx > 0, disableSMForLFNST); + static_cast<DQIntern::DepQuant*>(p)->dequant( tu, dstCoeff, compID, cQP, enableScalingLists, Quant::getDequantCoeff(scalingListType, qpRem, log2TrWidth, log2TrHeight) ); } else { diff --git a/source/Lib/CommonLib/DepQuant.h b/source/Lib/CommonLib/DepQuant.h index 5a26b46da88ee3a424d62cd500ecd91325fe36d6..eb2685a2e02adfb9277abb74d81e36b0a77f0fed 100644 --- a/source/Lib/CommonLib/DepQuant.h +++ b/source/Lib/CommonLib/DepQuant.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/source/Lib/CommonLib/HRD.cpp b/source/Lib/CommonLib/HRD.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e21e5178d58f02b2c26c5defc5052423483e79e2 --- /dev/null +++ b/source/Lib/CommonLib/HRD.cpp @@ -0,0 +1,34 @@ +/* The copyright in this software is being made available under the BSD +* License, included below. This software may be subject to other third party +* and contributor rights, including patent rights, and no such rights are +* granted under this license. +* +* Copyright (c) 2010-2020, ITU/ISO/IEC +* All rights reserved. +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are met: +* +* * Redistributions of source code must retain the above copyright notice, +* this list of conditions and the following disclaimer. +* * Redistributions in binary form must reproduce the above copyright notice, +* this list of conditions and the following disclaimer in the documentation +* and/or other materials provided with the distribution. +* * Neither the name of the ITU/ISO/IEC nor the names of its contributors may +* be used to endorse or promote products derived from this software without +* specific prior written permission. +* +* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS +* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +* THE POSSIBILITY OF SUCH DAMAGE. +*/ + + diff --git a/source/Lib/CommonLib/HRD.h b/source/Lib/CommonLib/HRD.h new file mode 100644 index 0000000000000000000000000000000000000000..b236a10b74d690b6f1a7f9bd897328f1c6055fee --- /dev/null +++ b/source/Lib/CommonLib/HRD.h @@ -0,0 +1,189 @@ +/* The copyright in this software is being made available under the BSD +* License, included below. This software may be subject to other third party +* and contributor rights, including patent rights, and no such rights are +* granted under this license. +* +* Copyright (c) 2010-2020, ITU/ISO/IEC +* All rights reserved. +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are met: +* +* * Redistributions of source code must retain the above copyright notice, +* this list of conditions and the following disclaimer. +* * Redistributions in binary form must reproduce the above copyright notice, +* this list of conditions and the following disclaimer in the documentation +* and/or other materials provided with the distribution. +* * Neither the name of the ITU/ISO/IEC nor the names of its contributors may +* be used to endorse or promote products derived from this software without +* specific prior written permission. +* +* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS +* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +* THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +#ifndef __HRD__ +#define __HRD__ + +#include "Common.h" +#include "SEI.h" + +class TimingInfo +{ +protected: + bool m_timingInfoPresentFlag; + uint32_t m_numUnitsInTick; + uint32_t m_timeScale; + int m_numTicksPocDiffOneMinus1; + +public: + TimingInfo() + : m_timingInfoPresentFlag (false) + , m_numUnitsInTick (1001) + , m_timeScale (60000) + , m_numTicksPocDiffOneMinus1 (0) + {} + + void setTimingInfoPresentFlag( bool flag ) { m_timingInfoPresentFlag = flag; } + bool getTimingInfoPresentFlag( ) const { return m_timingInfoPresentFlag; } + + void setNumUnitsInTick( uint32_t value ) { m_numUnitsInTick = value; } + uint32_t getNumUnitsInTick( ) const { return m_numUnitsInTick; } + void setTimeScale( uint32_t value ) { m_timeScale = value; } + uint32_t getTimeScale( ) const { return m_timeScale; } + + void setNumTicksPocDiffOneMinus1(int x) { m_numTicksPocDiffOneMinus1 = x; } + int getNumTicksPocDiffOneMinus1( ) const { return m_numTicksPocDiffOneMinus1; } +}; + +struct HrdSubLayerInfo +{ + bool fixedPicRateFlag; + bool fixedPicRateWithinCvsFlag; + uint32_t picDurationInTcMinus1; + bool lowDelayHrdFlag; + uint32_t cpbCntMinus1; + uint32_t bitRateValueMinus1[MAX_CPB_CNT][2]; + uint32_t cpbSizeValue [MAX_CPB_CNT][2]; + uint32_t ducpbSizeValue [MAX_CPB_CNT][2]; + bool cbrFlag [MAX_CPB_CNT][2]; + uint32_t duBitRateValue [MAX_CPB_CNT][2]; +}; + +class HRDParameters +{ +private: + bool m_nalHrdParametersPresentFlag; + bool m_vclHrdParametersPresentFlag; + uint32_t m_tickDivisorMinus2; + bool m_generalDecodingUnitHrdParamsPresentFlag; + uint32_t m_bitRateScale; + uint32_t m_cpbSizeScale; + uint32_t m_cpbSizeDuScale; + HrdSubLayerInfo m_HRD[MAX_TLAYER]; + +public: + HRDParameters() + :m_nalHrdParametersPresentFlag (false) + ,m_vclHrdParametersPresentFlag (false) + ,m_tickDivisorMinus2 (0) + ,m_generalDecodingUnitHrdParamsPresentFlag (false) + ,m_bitRateScale (0) + ,m_cpbSizeScale (0) + ,m_cpbSizeDuScale (0) + {} + + virtual ~HRDParameters() {} + + void setNalHrdParametersPresentFlag( bool flag ) { m_nalHrdParametersPresentFlag = flag; } + bool getNalHrdParametersPresentFlag( ) const { return m_nalHrdParametersPresentFlag; } + + void setVclHrdParametersPresentFlag( bool flag ) { m_vclHrdParametersPresentFlag = flag; } + bool getVclHrdParametersPresentFlag( ) const { return m_vclHrdParametersPresentFlag; } + + + void setTickDivisorMinus2( uint32_t value ) { m_tickDivisorMinus2 = value; } + uint32_t getTickDivisorMinus2( ) const { return m_tickDivisorMinus2; } + + + void setGeneralDecodingUnitHrdParamsPresentFlag( bool flag) { m_generalDecodingUnitHrdParamsPresentFlag = flag; } + bool getGeneralDecodingUnitHrdParamsPresentFlag( ) const { return m_generalDecodingUnitHrdParamsPresentFlag; } + + void setBitRateScale( uint32_t value ) { m_bitRateScale = value; } + uint32_t getBitRateScale( ) const { return m_bitRateScale; } + + void setCpbSizeScale( uint32_t value ) { m_cpbSizeScale = value; } + uint32_t getCpbSizeScale( ) const { return m_cpbSizeScale; } + void setCpbSizeDuScale( uint32_t value ) { m_cpbSizeDuScale = value; } + uint32_t getCpbSizeDuScale( ) const { return m_cpbSizeDuScale; } + + + void setFixedPicRateFlag( int layer, bool flag ) { m_HRD[layer].fixedPicRateFlag = flag; } + bool getFixedPicRateFlag( int layer ) const { return m_HRD[layer].fixedPicRateFlag; } + + void setFixedPicRateWithinCvsFlag( int layer, bool flag ) { m_HRD[layer].fixedPicRateWithinCvsFlag = flag; } + bool getFixedPicRateWithinCvsFlag( int layer ) const { return m_HRD[layer].fixedPicRateWithinCvsFlag; } + + void setPicDurationInTcMinus1( int layer, uint32_t value ) { m_HRD[layer].picDurationInTcMinus1 = value; } + uint32_t getPicDurationInTcMinus1( int layer ) const { return m_HRD[layer].picDurationInTcMinus1; } + + void setLowDelayHrdFlag( int layer, bool flag ) { m_HRD[layer].lowDelayHrdFlag = flag; } + bool getLowDelayHrdFlag( int layer ) const { return m_HRD[layer].lowDelayHrdFlag; } + + void setCpbCntMinus1( int layer, uint32_t value ) { m_HRD[layer].cpbCntMinus1 = value; } + uint32_t getCpbCntMinus1( int layer ) const { return m_HRD[layer].cpbCntMinus1; } + + void setBitRateValueMinus1( int layer, int cpbcnt, int nalOrVcl, uint32_t value ) { m_HRD[layer].bitRateValueMinus1[cpbcnt][nalOrVcl] = value; } + uint32_t getBitRateValueMinus1( int layer, int cpbcnt, int nalOrVcl ) const { return m_HRD[layer].bitRateValueMinus1[cpbcnt][nalOrVcl]; } + + void setCpbSizeValueMinus1( int layer, int cpbcnt, int nalOrVcl, uint32_t value ) { m_HRD[layer].cpbSizeValue[cpbcnt][nalOrVcl] = value; } + uint32_t getCpbSizeValueMinus1( int layer, int cpbcnt, int nalOrVcl ) const { return m_HRD[layer].cpbSizeValue[cpbcnt][nalOrVcl]; } + void setDuCpbSizeValueMinus1( int layer, int cpbcnt, int nalOrVcl, uint32_t value ) { m_HRD[layer].ducpbSizeValue[cpbcnt][nalOrVcl] = value; } + uint32_t getDuCpbSizeValueMinus1( int layer, int cpbcnt, int nalOrVcl ) const { return m_HRD[layer].ducpbSizeValue[cpbcnt][nalOrVcl]; } + void setDuBitRateValueMinus1( int layer, int cpbcnt, int nalOrVcl, uint32_t value ) { m_HRD[layer].duBitRateValue[cpbcnt][nalOrVcl] = value; } + uint32_t getDuBitRateValueMinus1(int layer, int cpbcnt, int nalOrVcl ) const { return m_HRD[layer].duBitRateValue[cpbcnt][nalOrVcl]; } + void setCbrFlag( int layer, int cpbcnt, int nalOrVcl, bool value ) { m_HRD[layer].cbrFlag[cpbcnt][nalOrVcl] = value; } + bool getCbrFlag( int layer, int cpbcnt, int nalOrVcl ) const { return m_HRD[layer].cbrFlag[cpbcnt][nalOrVcl]; } + + bool getCpbDpbDelaysPresentFlag( ) const { return getNalHrdParametersPresentFlag() || getVclHrdParametersPresentFlag(); } +}; + +class HRD +{ +public: + HRD() + :m_bufferingPeriodInitialized (false) + {}; + + virtual ~HRD() + {}; + + void setHRDParameters(HRDParameters &hrdParam) { m_hrdParams=hrdParam; } + HRDParameters getHRDParameters() const { return m_hrdParams; } + const HRDParameters& getHRDParameters() { return m_hrdParams; } + + void setTimingInfo(TimingInfo &timingInfo) { m_timingInfo=timingInfo; } + TimingInfo getTimingInfo() const { return m_timingInfo; } + const TimingInfo& getTimingInfo() { return m_timingInfo; } + + void setBufferingPeriodSEI(const SEIBufferingPeriod* bp) { bp->copyTo(m_bufferingPeriodSEI); m_bufferingPeriodInitialized = true; } + const SEIBufferingPeriod* getBufferingPeriodSEI() const { return m_bufferingPeriodInitialized ? &m_bufferingPeriodSEI : nullptr; } + +protected: + HRDParameters m_hrdParams; + TimingInfo m_timingInfo; + bool m_bufferingPeriodInitialized; + SEIBufferingPeriod m_bufferingPeriodSEI; +}; + +#endif //__HRD__ diff --git a/source/Lib/CommonLib/Hash.cpp b/source/Lib/CommonLib/Hash.cpp index 2301f6845b148de9e435cb6f72cbd4dbcb0fddfc..5657f3cb3fd66b4aa877f0a03eba16ac9afd4a32 100644 --- a/source/Lib/CommonLib/Hash.cpp +++ b/source/Lib/CommonLib/Hash.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -109,6 +109,10 @@ TComHash::TComHash() { m_lookupTable = NULL; tableHasContent = false; + for (int i = 0; i < 5; i++) + { + hashPic[i] = NULL; + } } TComHash::~TComHash() @@ -120,12 +124,21 @@ TComHash::~TComHash() m_lookupTable = NULL; } } - -void TComHash::create() +void TComHash::create(int picWidth, int picHeight) { - if (m_lookupTable != NULL) + if (m_lookupTable) { clearAll(); + } + if (!hashPic[0]) + { + for (int k = 0; k < 5; k++) + { + hashPic[k] = new uint16_t[picWidth*picHeight]; + } + } + if (m_lookupTable) + { return; } int maxAddr = 1 << (m_CRCBits + m_blockSizeBits); @@ -136,6 +149,14 @@ void TComHash::create() void TComHash::clearAll() { + if (hashPic[0]) + { + for (int k = 0; k < 5; k++) + { + delete[] hashPic[k]; + hashPic[k] = NULL; + } + } tableHasContent = false; if (m_lookupTable == NULL) { @@ -251,83 +272,6 @@ void TComHash::generateBlock2x2HashValue(const PelUnitBuf &curPicBuf, int picWid delete[] p; } -void TComHash::generateRectangleHashValue(int picWidth, int picHeight, int width, int height, uint32_t* srcPicBlockHash[2], uint32_t* dstPicBlockHash[2], bool* srcPicBlockSameInfo[3], bool* dstPicBlockSameInfo[3]) -{ - //at present, only support 1:2(2:1) retangle hash value - CHECK(width != (height << 1) && (width << 1) != height, "Wrong") - bool isHorizontal = width == (height << 1) ? true : false; - - int xEnd = picWidth - width + 1; - int yEnd = picHeight - height + 1; - - int srcWidth = width >> 1; - int quadWidth = width >> 2; - int srcHeight = height >> 1; - int quadHeight = height >> 2; - - int length = 2 * sizeof(uint32_t); - uint32_t* p = new uint32_t[2]; - int pos = 0; - if (isHorizontal) - { - for (int yPos = 0; yPos < yEnd; yPos++) - { - for (int xPos = 0; xPos < xEnd; xPos++) - { - p[0] = srcPicBlockHash[0][pos]; - p[1] = srcPicBlockHash[0][pos + srcWidth]; - dstPicBlockHash[0][pos] = TComHash::getCRCValue1((unsigned char*)p, length); - - p[0] = srcPicBlockHash[1][pos]; - p[1] = srcPicBlockHash[1][pos + srcWidth]; - dstPicBlockHash[1][pos] = TComHash::getCRCValue2((unsigned char*)p, length); - - dstPicBlockSameInfo[0][pos] = srcPicBlockSameInfo[0][pos] && srcPicBlockSameInfo[0][pos + quadWidth] && srcPicBlockSameInfo[0][pos + srcWidth]; - dstPicBlockSameInfo[1][pos] = srcPicBlockSameInfo[1][pos] && srcPicBlockSameInfo[1][pos + srcWidth]; - pos++; - } - pos += width - 1; - } - } - else - { - for (int yPos = 0; yPos < yEnd; yPos++) - { - for (int xPos = 0; xPos < xEnd; xPos++) - { - p[0] = srcPicBlockHash[0][pos]; - p[1] = srcPicBlockHash[0][pos + srcHeight * picWidth]; - dstPicBlockHash[0][pos] = TComHash::getCRCValue1((unsigned char*)p, length); - - p[0] = srcPicBlockHash[1][pos]; - p[1] = srcPicBlockHash[1][pos + srcHeight * picWidth]; - dstPicBlockHash[1][pos] = TComHash::getCRCValue2((unsigned char*)p, length); - - dstPicBlockSameInfo[0][pos] = srcPicBlockSameInfo[0][pos] && srcPicBlockSameInfo[0][pos + srcHeight * picWidth]; - dstPicBlockSameInfo[1][pos] = srcPicBlockSameInfo[1][pos] && srcPicBlockSameInfo[1][pos + quadHeight * picWidth] && srcPicBlockSameInfo[1][pos + srcHeight * picWidth]; - - pos++; - } - pos += width - 1; - } - } - - int widthMinus1 = width - 1; - int heightMinus1 = height - 1; - pos = 0; - - for (int yPos = 0; yPos < yEnd; yPos++) - { - for (int xPos = 0; xPos < xEnd; xPos++) - { - dstPicBlockSameInfo[2][pos] = (!dstPicBlockSameInfo[0][pos] && !dstPicBlockSameInfo[1][pos]) || (((xPos & widthMinus1) == 0) && ((yPos & heightMinus1) == 0)); - pos++; - } - pos += width - 1; - } - - delete[] p; -} void TComHash::generateBlockHashValue(int picWidth, int picHeight, int width, int height, uint32_t* srcPicBlockHash[2], uint32_t* dstPicBlockHash[2], bool* srcPicBlockSameInfo[3], bool* dstPicBlockSameInfo[3]) { @@ -341,7 +285,7 @@ void TComHash::generateBlockHashValue(int picWidth, int picHeight, int width, in int length = 4 * sizeof(uint32_t); - uint32_t* p = new uint32_t[4]; + uint32_t p[4]; int pos = 0; for (int yPos = 0; yPos < yEnd; yPos++) { @@ -372,23 +316,18 @@ void TComHash::generateBlockHashValue(int picWidth, int picHeight, int width, in if (width >= 4) { - int widthMinus1 = width - 1; - int heightMinus1 = height - 1; pos = 0; for (int yPos = 0; yPos < yEnd; yPos++) { for (int xPos = 0; xPos < xEnd; xPos++) { - dstPicBlockSameInfo[2][pos] = (!dstPicBlockSameInfo[0][pos] && !dstPicBlockSameInfo[1][pos]) || (((xPos & widthMinus1) == 0) && ((yPos & heightMinus1) == 0)); + dstPicBlockSameInfo[2][pos] = (!dstPicBlockSameInfo[0][pos] && !dstPicBlockSameInfo[1][pos]); pos++; } pos += width - 1; } } - - delete[] p; - } void TComHash::addToHashMapByRowWithPrecalData(uint32_t* picHash[2], bool* picIsSame, int picWidth, int picHeight, int width, int height) @@ -404,12 +343,14 @@ void TComHash::addToHashMapByRowWithPrecalData(uint32_t* picHash[2], bool* picIs addValue <<= m_CRCBits; int crcMask = 1 << m_CRCBits; crcMask -= 1; + int blockIdx = floorLog2(width) - 2; for (int xPos = 0; xPos < xEnd; xPos++) { for (int yPos = 0; yPos < yEnd; yPos++) { int pos = yPos * picWidth + xPos; + hashPic[blockIdx][pos] = (uint16_t)(srcHash[1][pos] & crcMask); //valid data if (srcIsAdded[pos]) { @@ -557,7 +498,36 @@ bool TComHash::isBlock2x2ColSameValue(unsigned char* p, bool includeAllComponent return true; } +bool TComHash::isHorizontalPerfectLuma(const Pel* srcPel, int stride, int width, int height) +{ + for (int i = 0; i < height; i++) + { + for (int j = 1; j < width; j++) + { + if (srcPel[j] != srcPel[0]) + { + return false; + } + } + srcPel += stride; + } + return true; +} +bool TComHash::isVerticalPerfectLuma(const Pel* srcPel, int stride, int width, int height) +{ + for (int i = 0; i < width; i++) + { + for (int j = 1; j < height; j++) + { + if (srcPel[j*stride + i] != srcPel[i]) + { + return false; + } + } + } + return true; +} bool TComHash::getBlockHashValue(const PelUnitBuf &curPicBuf, int width, int height, int xStart, int yStart, const BitDepths bitDepths, uint32_t& hashValue1, uint32_t& hashValue2) { int addValue = m_blockSizeToIndex[width][height]; @@ -575,7 +545,7 @@ bool TComHash::getBlockHashValue(const PelUnitBuf &curPicBuf, int width, int hei } unsigned char* p = new unsigned char[length]; - uint32_t* toHash = new uint32_t[4]; + uint32_t toHash[4]; int block2x2Num = (width*height) >> 2; @@ -682,8 +652,6 @@ bool TComHash::getBlockHashValue(const PelUnitBuf &curPicBuf, int width, int hei hashValue1 = (hashValueBuffer[0][dstIdx][0] & crcMask) + addValue; hashValue2 = hashValueBuffer[1][dstIdx][0]; - delete[] toHash; - for (int i = 0; i < 2; i++) { for (int j = 0; j < 2; j++) @@ -712,8 +680,6 @@ void TComHash::initBlockSizeToIndex() m_blockSizeToIndex[32][32] = 2; m_blockSizeToIndex[64][64] = 3; m_blockSizeToIndex[4][4] = 4; - m_blockSizeToIndex[4][8] = 5; - m_blockSizeToIndex[8][4] = 6; } uint32_t TComHash::getCRCValue1(unsigned char* p, int length) diff --git a/source/Lib/CommonLib/Hash.h b/source/Lib/CommonLib/Hash.h index d69787cfc70eb38306153b2fc0e913793098fa66..2a47b0ffb60fd3c46044d73439588bbe73e35c3f 100644 --- a/source/Lib/CommonLib/Hash.h +++ b/source/Lib/CommonLib/Hash.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -91,7 +91,7 @@ struct TComHash public: TComHash(); ~TComHash(); - void create(); + void create(int picWidth, int picHeight); void clearAll(); void addToTable(uint32_t hashValue, const BlockHash& blockHash); int count(uint32_t hashValue); @@ -102,11 +102,10 @@ public: void generateBlock2x2HashValue(const PelUnitBuf &curPicBuf, int picWidth, int picHeight, const BitDepths bitDepths, uint32_t* picBlockHash[2], bool* picBlockSameInfo[3]); void generateBlockHashValue(int picWidth, int picHeight, int width, int height, uint32_t* srcPicBlockHash[2], uint32_t* dstPicBlockHash[2], bool* srcPicBlockSameInfo[3], bool* dstPicBlockSameInfo[3]); - void generateRectangleHashValue(int picWidth, int picHeight, int width, int height, uint32_t* srcPicBlockHash[2], uint32_t* dstPicBlockHash[2], bool* srcPicBlockSameInfo[3], bool* dstPicBlockSameInfo[3]); void addToHashMapByRowWithPrecalData(uint32_t* srcHash[2], bool* srcIsSame, int picWidth, int picHeight, int width, int height); bool isInitial() { return tableHasContent; } void setInitial() { tableHasContent = true; } - + uint16_t* getHashPic(int baseSize) const { return hashPic[floorLog2(baseSize) - 2]; } public: @@ -117,10 +116,13 @@ public: static bool isBlock2x2ColSameValue(unsigned char* p, bool includeAllComponent = true); static bool getBlockHashValue(const PelUnitBuf &curPicBuf, int width, int height, int xStart, int yStart, const BitDepths bitDepths, uint32_t& hashValue1, uint32_t& hashValue2); static void initBlockSizeToIndex(); + static bool isHorizontalPerfectLuma(const Pel* srcPel, int stride, int width, int height); + static bool isVerticalPerfectLuma(const Pel* srcPel, int stride, int width, int height); private: std::vector<BlockHash>** m_lookupTable; bool tableHasContent; + uint16_t* hashPic[5];//4x4 ~ 64x64 private: static const int m_CRCBits = 16; diff --git a/source/Lib/CommonLib/IbcHashMap.cpp b/source/Lib/CommonLib/IbcHashMap.cpp index 9d876292deeedcab45d80f3057b14018b8ba17a4..3b0b2d2f12038f79e7bdcf1166d9d0c5a5d03bce 100644 --- a/source/Lib/CommonLib/IbcHashMap.cpp +++ b/source/Lib/CommonLib/IbcHashMap.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -274,6 +274,7 @@ bool IbcHashMap::ibcHashMatch(const Area& lumaArea, std::vector<Position>& cand, // find the block with least candidates size_t minSize = MAX_UINT; unsigned int targetHashOneBlock = 0; + Position targetBlockOffsetInCu(0, 0); for (SizeType y = 0; y < lumaArea.height && minSize > 1; y += MIN_PU_SIZE) { for (SizeType x = 0; x < lumaArea.width && minSize > 1; x += MIN_PU_SIZE) @@ -283,6 +284,7 @@ bool IbcHashMap::ibcHashMatch(const Area& lumaArea, std::vector<Position>& cand, { minSize = m_hash2Pos[hash].size(); targetHashOneBlock = hash; + targetBlockOffsetInCu.repositionTo(Position(x, y)); } } } @@ -294,11 +296,12 @@ bool IbcHashMap::ibcHashMatch(const Area& lumaArea, std::vector<Position>& cand, // check whether whole block match for (std::vector<Position>::iterator refBlockPos = candOneBlock.begin(); refBlockPos != candOneBlock.end(); refBlockPos++) { - Position bottomRight = refBlockPos->offset(lumaArea.width - 1, lumaArea.height - 1); + Position topLeft = refBlockPos->offset(-targetBlockOffsetInCu.x, -targetBlockOffsetInCu.y); + Position bottomRight = topLeft.offset(lumaArea.width - 1, lumaArea.height - 1); bool wholeBlockMatch = true; if (lumaArea.width > MIN_PU_SIZE || lumaArea.height > MIN_PU_SIZE) { - if (!cs.isDecomp(bottomRight, cs.chType) || bottomRight.x >= m_picWidth || bottomRight.y >= m_picHeight) + if (!cs.isDecomp(bottomRight, CHANNEL_TYPE_LUMA) || bottomRight.x >= m_picWidth || bottomRight.y >= m_picHeight || topLeft.x < 0 || topLeft.y < 0) { continue; } @@ -307,20 +310,21 @@ bool IbcHashMap::ibcHashMatch(const Area& lumaArea, std::vector<Position>& cand, for (SizeType x = 0; x < lumaArea.width && wholeBlockMatch; x += MIN_PU_SIZE) { // whether the reference block and current block has the same hash - wholeBlockMatch &= (m_pos2Hash[lumaArea.pos().y + y][lumaArea.pos().x + x] == m_pos2Hash[refBlockPos->y + y][refBlockPos->x + x]); + wholeBlockMatch &= (m_pos2Hash[lumaArea.pos().y + y][lumaArea.pos().x + x] == m_pos2Hash[topLeft.y + y][topLeft.x + x]); } } } else { - if (abs(refBlockPos->x - lumaArea.x) > searchRange4SmallBlk || abs(refBlockPos->y - lumaArea.y) > searchRange4SmallBlk || !cs.isDecomp(bottomRight, cs.chType)) + CHECK(topLeft != *refBlockPos, "4x4 target block should not have offset!"); + if (abs(topLeft.x - lumaArea.x) > searchRange4SmallBlk || abs(topLeft.y - lumaArea.y) > searchRange4SmallBlk || !cs.isDecomp(bottomRight, CHANNEL_TYPE_LUMA)) { continue; } } if (wholeBlockMatch) { - cand.push_back(*refBlockPos); + cand.push_back(topLeft); if (cand.size() > maxCand) { break; @@ -349,5 +353,82 @@ int IbcHashMap::getHashHitRatio(const Area& lumaArea) return 100 * hit / total; } +int IbcHashMap::calHashBlkMatchPerc(const Area& lumaArea) +{ + int maxX = std::min((int)(lumaArea.x + lumaArea.width), m_picWidth); + int maxY = std::min((int)(lumaArea.y + lumaArea.height), m_picHeight); + int maxUsage[100]; + unsigned int mostSelHash[100]; + + static int numExcludedHashValue = 36; + + for (int i = 0; i < numExcludedHashValue; i++) + { + maxUsage[i] = 0; + mostSelHash[i] = 0; + } + + for (std::unordered_map<unsigned int, std::vector<Position>>::iterator it = m_hash2Pos.begin(); it != m_hash2Pos.end(); ++it) + { + unsigned int hash = it->first; + int usage = (int)it->second.size(); + assert(usage == m_hash2Pos[hash].size()); + + int insertPos = -1; + for (insertPos = 0; insertPos < numExcludedHashValue; insertPos++) + { + if (usage > maxUsage[insertPos]) + { + break; + } + } + assert(insertPos <= numExcludedHashValue); + + if (insertPos < numExcludedHashValue) + { + for (int i = (numExcludedHashValue - 1); i >= (insertPos + 1); i--) + { + maxUsage[i] = maxUsage[i - 1]; + mostSelHash[i] = mostSelHash[i - 1]; + } + maxUsage[insertPos] = usage; + mostSelHash[insertPos] = hash; + } + } + + int hit = 0, total = 0; + for (int y = lumaArea.y; y < maxY; y += MIN_PU_SIZE) + { + for (int x = lumaArea.x; x < maxX; x += MIN_PU_SIZE) + { + unsigned int hash = m_pos2Hash[y][x]; + + bool excludedHash = false; + for (int i = 0; i < numExcludedHashValue && !excludedHash; i++) + { + if (hash == mostSelHash[i]) + { + excludedHash = true; + } + } + + if (excludedHash) + { + continue; + } + + hit += (m_hash2Pos[hash].size() > 1); + total++; + } + } + if (total == 0) + { + return 0; + } + else + { + return 100 * hit / total; + } +} //! \} diff --git a/source/Lib/CommonLib/IbcHashMap.h b/source/Lib/CommonLib/IbcHashMap.h index e343aab6e042abdd15ff0227a3ba38d921f24fdd..bd90e10fdb3ffa8f68a6363e75f89aa379b4c024 100644 --- a/source/Lib/CommonLib/IbcHashMap.h +++ b/source/Lib/CommonLib/IbcHashMap.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -82,6 +82,8 @@ public: bool ibcHashMatch(const Area& lumaArea, std::vector<Position>& cand, const CodingStructure& cs, const int maxCand, const int searchRange4SmallBlk); int getHashHitRatio(const Area& lumaArea); + int calHashBlkMatchPerc(const Area& lumaArea); + #ifdef TARGET_SIMD_X86 void initIbcHashMapX86(); template <X86_VEXT vext> diff --git a/source/Lib/CommonLib/InterPrediction.cpp b/source/Lib/CommonLib/InterPrediction.cpp index 1967564ce5c6593d9ab9dd4fb38c30270c1571a4..cad694b0a6fdc0afab936f7c074e8dc5207766a5 100644 --- a/source/Lib/CommonLib/InterPrediction.cpp +++ b/source/Lib/CommonLib/InterPrediction.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -57,6 +57,9 @@ InterPrediction::InterPrediction() , m_maxCompIDToPred ( MAX_NUM_COMPONENT ) , m_pcRdCost ( nullptr ) , m_storedMv ( nullptr ) +, m_skipPROF (false) +, m_encOnly (false) +, m_isBi (false) , m_gradX0(nullptr) , m_gradY0(nullptr) , m_gradX1(nullptr) @@ -124,6 +127,9 @@ void InterPrediction::destroy() } m_triangleBuf.destroy(); + m_colorTransResiBuf[0].destroy(); + m_colorTransResiBuf[1].destroy(); + m_colorTransResiBuf[2].destroy(); if (m_storedMv != nullptr) { @@ -146,9 +152,10 @@ void InterPrediction::destroy() xFree(m_cRefSamplesDMVRL1[ch]); m_cRefSamplesDMVRL1[ch] = nullptr; } + m_IBCBuffer.destroy(); } -void InterPrediction::init( RdCost* pcRdCost, ChromaFormat chromaFormatIDC ) +void InterPrediction::init( RdCost* pcRdCost, ChromaFormat chromaFormatIDC, const int ctuSize ) { m_pcRdCost = pcRdCost; @@ -186,6 +193,9 @@ void InterPrediction::init( RdCost* pcRdCost, ChromaFormat chromaFormatIDC ) } m_triangleBuf.create(UnitArea(chromaFormatIDC, Area(0, 0, MAX_CU_SIZE, MAX_CU_SIZE))); + m_colorTransResiBuf[0].create(UnitArea(chromaFormatIDC, Area(0, 0, MAX_CU_SIZE, MAX_CU_SIZE))); + m_colorTransResiBuf[1].create(UnitArea(chromaFormatIDC, Area(0, 0, MAX_CU_SIZE, MAX_CU_SIZE))); + m_colorTransResiBuf[2].create(UnitArea(chromaFormatIDC, Area(0, 0, MAX_CU_SIZE, MAX_CU_SIZE))); m_iRefListIdx = -1; @@ -214,42 +224,11 @@ void InterPrediction::init( RdCost* pcRdCost, ChromaFormat chromaFormatIDC ) const int MVBUFFER_SIZE = MAX_CU_SIZE / MIN_PU_SIZE; m_storedMv = new Mv[MVBUFFER_SIZE*MVBUFFER_SIZE]; } -} - -bool checkIdenticalMotion( const PredictionUnit &pu, bool checkAffine ) -{ - const Slice &slice = *pu.cs->slice; - - if( slice.isInterB() && !pu.cs->pps->getWPBiPred() ) + if (m_IBCBuffer.bufs.empty()) { - if( pu.refIdx[0] >= 0 && pu.refIdx[1] >= 0 ) - { - int RefPOCL0 = slice.getRefPic( REF_PIC_LIST_0, pu.refIdx[0] )->getPOC(); - int RefPOCL1 = slice.getRefPic( REF_PIC_LIST_1, pu.refIdx[1] )->getPOC(); - - if( RefPOCL0 == RefPOCL1 ) - { - if( !pu.cu->affine ) - { - if( pu.mv[0] == pu.mv[1] ) - { - return true; - } - } - else - { - CHECK( !checkAffine, "In this case, checkAffine should be on." ); - if ( (pu.cu->affineType == AFFINEMODEL_4PARAM && (pu.mvAffi[0][0] == pu.mvAffi[1][0]) && (pu.mvAffi[0][1] == pu.mvAffi[1][1])) - || (pu.cu->affineType == AFFINEMODEL_6PARAM && (pu.mvAffi[0][0] == pu.mvAffi[1][0]) && (pu.mvAffi[0][1] == pu.mvAffi[1][1]) && (pu.mvAffi[0][2] == pu.mvAffi[1][2])) ) - { - return true; - } - } - } - } + m_IBCBufferWidth = g_IBCBufferSize / ctuSize; + m_IBCBuffer.create(UnitArea(chromaFormatIDC, Area(0, 0, m_IBCBufferWidth, ctuSize))); } - - return false; } // ==================================================================================================================== @@ -291,7 +270,7 @@ bool InterPrediction::xCheckIdenticalMotion( const PredictionUnit &pu ) return false; } -void InterPrediction::xSubPuMC( PredictionUnit& pu, PelUnitBuf& predBuf, const RefPicList &eRefPicList /*= REF_PIC_LIST_X*/ ) +void InterPrediction::xSubPuMC( PredictionUnit& pu, PelUnitBuf& predBuf, const RefPicList &eRefPicList /*= REF_PIC_LIST_X*/, const bool luma /*= true*/, const bool chroma /*= true*/) { // compute the location of the current PU @@ -324,6 +303,12 @@ void InterPrediction::xSubPuMC( PredictionUnit& pu, PelUnitBuf& predBuf, const R int fstStep = (!verMC ? puHeight : puWidth); int secStep = (!verMC ? puWidth : puHeight); +#if JVET_Q0487_SCALING_WINDOW_ISSUES + bool scaled = pu.cu->slice->getRefPic( REF_PIC_LIST_0, 0 )->isRefScaled( pu.cs->pps ) || ( pu.cs->slice->getSliceType() == B_SLICE ? pu.cu->slice->getRefPic( REF_PIC_LIST_1, 0 )->isRefScaled( pu.cs->pps ) : false ); +#else + bool scaled = pu.cu->slice->getScalingRatio( REF_PIC_LIST_0, 0 ) != SCALE_1X || ( pu.cs->slice->getSliceType() == B_SLICE ? pu.cu->slice->getScalingRatio( REF_PIC_LIST_1, 0 ) != SCALE_1X : false ); +#endif + m_subPuMC = true; for (int fstDim = fstStart; fstDim < fstEnd; fstDim += fstStep) @@ -340,7 +325,7 @@ void InterPrediction::xSubPuMC( PredictionUnit& pu, PelUnitBuf& predBuf, const R while (later < secEnd) { const MotionInfo &laterMi = !verMC ? pu.getMotionInfo(Position{ later, fstDim }) : pu.getMotionInfo(Position{ fstDim, later }); - if (laterMi == curMi) + if (!scaled && laterMi == curMi) { length += secStep; } @@ -358,7 +343,7 @@ void InterPrediction::xSubPuMC( PredictionUnit& pu, PelUnitBuf& predBuf, const R PelUnitBuf subPredBuf = predBuf.subBuf(UnitAreaRelative(pu, subPu)); subPu.mmvdEncOptMode = 0; subPu.mvRefine = false; - motionCompensation(subPu, subPredBuf, eRefPicList); + motionCompensation(subPu, subPredBuf, eRefPicList, luma, chroma); secDim = later - secStep; } } @@ -366,36 +351,99 @@ void InterPrediction::xSubPuMC( PredictionUnit& pu, PelUnitBuf& predBuf, const R pu.cu->affine = isAffine; } - -void InterPrediction::xChromaMC(PredictionUnit &pu, PelUnitBuf& pcYuvPred) +void InterPrediction::xSubPuBio(PredictionUnit& pu, PelUnitBuf& predBuf, const RefPicList &eRefPicList /*= REF_PIC_LIST_X*/, PelUnitBuf* yuvDstTmp /*= NULL*/) { - // separated tree, chroma - const CompArea lumaArea = CompArea(COMPONENT_Y, pu.chromaFormat, pu.Cb().lumaPos(), recalcSize(pu.chromaFormat, CHANNEL_TYPE_CHROMA, CHANNEL_TYPE_LUMA, pu.Cb().size())); + // compute the location of the current PU + Position puPos = pu.lumaPos(); + Size puSize = pu.lumaSize(); + +#if JVET_J0090_MEMORY_BANDWITH_MEASURE + JVET_J0090_SET_CACHE_ENABLE(true); + int mvShift = (MV_FRACTIONAL_BITS_INTERNAL); + for (int k = 0; k < NUM_REF_PIC_LIST_01; k++) + { + RefPicList refId = (RefPicList)k; + const Picture* refPic = pu.cu->slice->getRefPic(refId, pu.refIdx[refId]); + for (int compID = 0; compID < MAX_NUM_COMPONENT; compID++) + { + Mv cMv = pu.mv[refId]; + int mvshiftTemp = mvShift + getComponentScaleX((ComponentID)compID, pu.chromaFormat); + int filtersize = (compID == (COMPONENT_Y)) ? NTAPS_LUMA : NTAPS_CHROMA; + cMv += Mv(-(((filtersize >> 1) - 1) << mvshiftTemp), -(((filtersize >> 1) - 1) << mvshiftTemp)); + bool wrapRef = false; + if (pu.cs->sps->getWrapAroundEnabledFlag()) + { + wrapRef = wrapClipMv(cMv, pu.blocks[0].pos(), pu.blocks[0].size(), pu.cs->sps, pu.cs->pps); + } + else + { + clipMv(cMv, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps, *pu.cs->pps); + } + + int width = predBuf.bufs[compID].width + (filtersize - 1); + int height = predBuf.bufs[compID].height + (filtersize - 1); + + CPelBuf refBuf; + Position recOffset = pu.blocks[compID].pos().offset(cMv.getHor() >> mvshiftTemp, cMv.getVer() >> mvshiftTemp); + refBuf = refPic->getRecoBuf(CompArea((ComponentID)compID, pu.chromaFormat, recOffset, pu.blocks[compID].size()), wrapRef); + + JVET_J0090_SET_REF_PICTURE(refPic, (ComponentID)compID); + for (int row = 0; row < height; row++) + { + for (int col = 0; col < width; col++) + { + JVET_J0090_CACHE_ACCESS(((Pel *)refBuf.buf) + row * refBuf.stride + col, __FILE__, __LINE__); + } + } + } + } + JVET_J0090_SET_CACHE_ENABLE(false); +#endif PredictionUnit subPu; + subPu.cs = pu.cs; subPu.cu = pu.cu; - - Picture * refPic = pu.cu->slice->getPic(); - for (int y = lumaArea.y; y < lumaArea.y + lumaArea.height; y += MIN_PU_SIZE) + subPu.mergeType = pu.mergeType; + subPu.mmvdMergeFlag = pu.mmvdMergeFlag; + subPu.mmvdEncOptMode = pu.mmvdEncOptMode; + subPu.mergeFlag = pu.mergeFlag; + subPu.ciipFlag = pu.ciipFlag; + subPu.mvRefine = pu.mvRefine; + subPu.refIdx[0] = pu.refIdx[0]; + subPu.refIdx[1] = pu.refIdx[1]; + int fstStart = puPos.y; + int secStart = puPos.x; + int fstEnd = puPos.y + puSize.height; + int secEnd = puPos.x + puSize.width; + int fstStep = std::min((int)MAX_BDOF_APPLICATION_REGION, (int)puSize.height); + int secStep = std::min((int)MAX_BDOF_APPLICATION_REGION, (int)puSize.width); + for (int fstDim = fstStart; fstDim < fstEnd; fstDim += fstStep) { - for (int x = lumaArea.x; x < lumaArea.x + lumaArea.width; x += MIN_PU_SIZE) + for (int secDim = secStart; secDim < secEnd; secDim += secStep) { - const MotionInfo &curMi = pu.cs->picture->cs->getMotionInfo(Position{ x, y }); + int x = secDim; + int y = fstDim; + int dx = secStep; + int dy = fstStep; - subPu.UnitArea::operator=(UnitArea(pu.chromaFormat, Area(x, y, MIN_PU_SIZE, MIN_PU_SIZE))); - PelUnitBuf subPredBuf = pcYuvPred.subBuf(UnitAreaRelative(pu, subPu)); + const MotionInfo &curMi = pu.getMotionInfo(Position{ x, y }); + + subPu.UnitArea::operator=(UnitArea(pu.chromaFormat, Area(x, y, dx, dy))); + subPu = curMi; + PelUnitBuf subPredBuf = predBuf.subBuf(UnitAreaRelative(pu, subPu)); - xPredInterBlk(COMPONENT_Cb, subPu, refPic, curMi.mv[0], subPredBuf, false, pu.cu->slice->clpRng(COMPONENT_Cb) - , false - , true); - xPredInterBlk(COMPONENT_Cr, subPu, refPic, curMi.mv[0], subPredBuf, false, pu.cu->slice->clpRng(COMPONENT_Cr) - , false - , true); + if (yuvDstTmp) + { + PelUnitBuf subPredBufTmp = yuvDstTmp->subBuf(UnitAreaRelative(pu, subPu)); + motionCompensation(subPu, subPredBuf, eRefPicList, true, true, &subPredBufTmp); + } + else + motionCompensation(subPu, subPredBuf, eRefPicList); } } + JVET_J0090_SET_CACHE_ENABLE(true); } - void InterPrediction::xPredInterUni(const PredictionUnit& pu, const RefPicList& eRefPicList, PelUnitBuf& pcYuvPred, const bool& bi , const bool& bioApplied , const bool luma, const bool chroma @@ -406,6 +454,7 @@ void InterPrediction::xPredInterUni(const PredictionUnit& pu, const RefPicList& int iRefIdx = pu.refIdx[eRefPicList]; Mv mv[3]; bool isIBC = false; + CHECK( !CU::isIBC( *pu.cu ) && pu.lwidth() == 4 && pu.lheight() == 4, "invalid 4x4 inter blocks" ); if (CU::isIBC(*pu.cu)) { isIBC = true; @@ -422,10 +471,21 @@ void InterPrediction::xPredInterUni(const PredictionUnit& pu, const RefPicList& { mv[0] = pu.mv[eRefPicList]; } - if ( !pu.cu->affine ) - clipMv(mv[0], pu.cu->lumaPos(), - pu.cu->lumaSize(), - sps); + + if( !pu.cu->affine ) + { +#if JVET_Q0487_SCALING_WINDOW_ISSUES + if( pu.cu->slice->getRefPic( eRefPicList, iRefIdx )->isRefScaled( pu.cs->pps ) == false ) +#else + if( pu.cu->slice->getScalingRatio( eRefPicList, iRefIdx ) == SCALE_1X ) +#endif + { + if( !sps.getWrapAroundEnabledFlag() ) + { + clipMv( mv[0], pu.cu->lumaPos(), pu.cu->lumaSize(), sps, *pu.cs->pps ); + } + } + } for( uint32_t comp = COMPONENT_Y; comp < pcYuvPred.bufs.size() && comp <= m_maxCompIDToPred; comp++ ) { @@ -437,7 +497,9 @@ void InterPrediction::xPredInterUni(const PredictionUnit& pu, const RefPicList& if ( pu.cu->affine ) { CHECK( bioApplied, "BIO is not allowed with affine" ); - xPredAffineBlk( compID, pu, pu.cu->slice->getRefPic( eRefPicList, iRefIdx ), mv, pcYuvPred, bi, pu.cu->slice->clpRng( compID ) ); + m_iRefListIdx = eRefPicList; + bool genChromaMv = (!luma && chroma && compID == COMPONENT_Cb); + xPredAffineBlk( compID, pu, pu.cu->slice->getRefPic( eRefPicList, iRefIdx )->unscaledPic, mv, pcYuvPred, bi, pu.cu->slice->clpRng( compID ), genChromaMv, pu.cu->slice->getScalingRatio( eRefPicList, iRefIdx )); } else { @@ -450,22 +512,26 @@ void InterPrediction::xPredInterUni(const PredictionUnit& pu, const RefPicList& } else { - xPredInterBlk(compID, pu, pu.cu->slice->getRefPic(eRefPicList, iRefIdx), mv[0], pcYuvPred, bi, pu.cu->slice->clpRng(compID) - , bioApplied - , isIBC - ); + xPredInterBlk( compID, pu, pu.cu->slice->getRefPic( eRefPicList, iRefIdx )->unscaledPic, mv[0], pcYuvPred, bi, pu.cu->slice->clpRng( compID ), bioApplied, isIBC, pu.cu->slice->getScalingRatio( eRefPicList, iRefIdx ) ); } } } } -void InterPrediction::xPredInterBi(PredictionUnit& pu, PelUnitBuf &pcYuvPred) +void InterPrediction::xPredInterBi(PredictionUnit &pu, PelUnitBuf &pcYuvPred, const bool luma, const bool chroma, PelUnitBuf *yuvPredTmp /*= NULL*/) { const PPS &pps = *pu.cs->pps; const Slice &slice = *pu.cs->slice; + CHECK( !pu.cu->affine && pu.refIdx[0] >= 0 && pu.refIdx[1] >= 0 && ( pu.lwidth() + pu.lheight() == 12 ), "invalid 4x8/8x4 bi-predicted blocks" ); + WPScalingParam *wp0; + WPScalingParam *wp1; + int refIdx0 = pu.refIdx[REF_PIC_LIST_0]; + int refIdx1 = pu.refIdx[REF_PIC_LIST_1]; + pu.cs->slice->getWpScaling(REF_PIC_LIST_0, refIdx0, wp0); + pu.cs->slice->getWpScaling(REF_PIC_LIST_1, refIdx1, wp1); bool bioApplied = false; - if (pu.cs->sps->getBDOFEnabledFlag()) + if (pu.cs->sps->getBDOFEnabledFlag() && (!pu.cs->picHeader->getDisBdofFlag())) { if (pu.cu->affine || m_subPuMC) { @@ -473,24 +539,29 @@ void InterPrediction::xPredInterBi(PredictionUnit& pu, PelUnitBuf &pcYuvPred) } else { - const bool biocheck0 = !(pps.getWPBiPred() && slice.getSliceType() == B_SLICE); + const bool biocheck0 = !((wp0[COMPONENT_Y].bPresentFlag || wp1[COMPONENT_Y].bPresentFlag) && slice.getSliceType() == B_SLICE); const bool biocheck1 = !(pps.getUseWP() && slice.getSliceType() == P_SLICE); if (biocheck0 && biocheck1 - && PU::isBiPredFromDifferentDir(pu) - && !(pu.Y().height == 4 || (pu.Y().width == 4 && pu.Y().height == 8)) + && PU::isBiPredFromDifferentDirEqDistPoc(pu) + && (pu.Y().height >= 8) + && (pu.Y().width >= 8) + && ((pu.Y().height * pu.Y().width) >= 128) ) { bioApplied = true; } } + if (bioApplied && pu.ciipFlag) + bioApplied = false; + if (bioApplied && pu.cu->smvdMode) { bioApplied = false; } - if (pu.cu->cs->sps->getUseGBi() && bioApplied && pu.cu->GBiIdx != GBI_DEFAULT) + if (pu.cu->cs->sps->getUseBcw() && bioApplied && pu.cu->BcwIdx != BCW_DEFAULT) { bioApplied = false; } @@ -500,6 +571,18 @@ void InterPrediction::xPredInterBi(PredictionUnit& pu, PelUnitBuf &pcYuvPred) } bool dmvrApplied = false; dmvrApplied = (pu.mvRefine) && PU::checkDMVRCondition(pu); + +#if JVET_Q0487_SCALING_WINDOW_ISSUES + bool refIsScaled = ( refIdx0 < 0 ? false : pu.cu->slice->getRefPic( REF_PIC_LIST_0, refIdx0 )->isRefScaled( pu.cs->pps ) ) || + ( refIdx1 < 0 ? false : pu.cu->slice->getRefPic( REF_PIC_LIST_1, refIdx1 )->isRefScaled( pu.cs->pps ) ); + dmvrApplied = dmvrApplied && !refIsScaled; + bioApplied = bioApplied && !refIsScaled; +#else + bool samePicSize = ( refIdx0 < 0 ? true : pu.cu->slice->getScalingRatio( REF_PIC_LIST_0, refIdx0 ) == SCALE_1X ) && ( refIdx1 < 0 ? true : pu.cu->slice->getScalingRatio( REF_PIC_LIST_1, refIdx1 ) == SCALE_1X ); + dmvrApplied = dmvrApplied && samePicSize; + bioApplied = bioApplied && samePicSize; +#endif + for (uint32_t refList = 0; refList < NUM_REF_PIC_LIST_01; refList++) { if( pu.refIdx[refList] < 0) @@ -521,10 +604,14 @@ void InterPrediction::xPredInterBi(PredictionUnit& pu, PelUnitBuf &pcYuvPred) if (pu.refIdx[0] >= 0 && pu.refIdx[1] >= 0) { if (dmvrApplied) - continue; // mc will happen in processDMVR + { + if (yuvPredTmp) + xPredInterUni(pu, eRefPicList, pcMbBuf, true, false, luma, chroma); + continue; + } xPredInterUni ( pu, eRefPicList, pcMbBuf, true , bioApplied - , true, true + , luma, chroma ); } else @@ -533,43 +620,51 @@ void InterPrediction::xPredInterBi(PredictionUnit& pu, PelUnitBuf &pcYuvPred) { xPredInterUni ( pu, eRefPicList, pcMbBuf, true , bioApplied - , true, true + , luma, chroma ); } else { xPredInterUni( pu, eRefPicList, pcMbBuf, pu.cu->triangle , bioApplied - , true, true + , luma, chroma ); } } } - if (dmvrApplied) - { - xProcessDMVR(pu, pcYuvPred, slice.clpRngs(), bioApplied); - } - - CPelUnitBuf srcPred0 = ( pu.chromaFormat == CHROMA_400 ? CPelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[0][0], pcYuvPred.Y())) : CPelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[0][0], pcYuvPred.Y()), PelBuf(m_acYuvPred[0][1], pcYuvPred.Cb()), PelBuf(m_acYuvPred[0][2], pcYuvPred.Cr())) ); CPelUnitBuf srcPred1 = ( pu.chromaFormat == CHROMA_400 ? CPelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[1][0], pcYuvPred.Y())) : CPelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[1][0], pcYuvPred.Y()), PelBuf(m_acYuvPred[1][1], pcYuvPred.Cb()), PelBuf(m_acYuvPred[1][2], pcYuvPred.Cr())) ); - if( pps.getWPBiPred() && slice.getSliceType() == B_SLICE ) + const bool lumaOnly = luma && !chroma; + const bool chromaOnly = !luma && chroma; + if( !pu.cu->triangle && (!dmvrApplied) && (!bioApplied) && pps.getWPBiPred() && slice.getSliceType() == B_SLICE && pu.cu->BcwIdx==BCW_DEFAULT) { - xWeightedPredictionBi( pu, srcPred0, srcPred1, pcYuvPred, m_maxCompIDToPred ); + xWeightedPredictionBi( pu, srcPred0, srcPred1, pcYuvPred, m_maxCompIDToPred, lumaOnly, chromaOnly ); + if (yuvPredTmp) + yuvPredTmp->copyFrom(pcYuvPred); } - else if( pps.getUseWP() && slice.getSliceType() == P_SLICE ) + else if( !pu.cu->triangle && pps.getUseWP() && slice.getSliceType() == P_SLICE ) { - xWeightedPredictionUni( pu, srcPred0, REF_PIC_LIST_0, pcYuvPred, -1, m_maxCompIDToPred ); + xWeightedPredictionUni( pu, srcPred0, REF_PIC_LIST_0, pcYuvPred, -1, m_maxCompIDToPred, lumaOnly, chromaOnly ); + if (yuvPredTmp) + yuvPredTmp->copyFrom(pcYuvPred); } else { - if (dmvrApplied == false) + if (dmvrApplied) + { + if (yuvPredTmp) + { + yuvPredTmp->addAvg(srcPred0, srcPred1, slice.clpRngs(), false); + } + xProcessDMVR(pu, pcYuvPred, slice.clpRngs(), bioApplied); + } + else { - xWeightedAverage( pu, srcPred0, srcPred1, pcYuvPred, slice.getSPS()->getBitDepths(), slice.clpRngs(), bioApplied ); + xWeightedAverage( pu, srcPred0, srcPred1, pcYuvPred, slice.getSPS()->getBitDepths(), slice.clpRngs(), bioApplied, lumaOnly, chromaOnly, yuvPredTmp ); } } } @@ -577,6 +672,7 @@ void InterPrediction::xPredInterBi(PredictionUnit& pu, PelUnitBuf &pcYuvPred) void InterPrediction::xPredInterBlk ( const ComponentID& compID, const PredictionUnit& pu, const Picture* refPic, const Mv& _mv, PelUnitBuf& dstPic, const bool& bi, const ClpRng& clpRng , const bool& bioApplied , bool isIBC + , const std::pair<int, int> scalingRatio , SizeType dmvrWidth , SizeType dmvrHeight , bool bilinearMC @@ -591,8 +687,24 @@ void InterPrediction::xPredInterBlk ( const ComponentID& compID, const Predictio int shiftHor = MV_FRACTIONAL_BITS_INTERNAL + ::getComponentScaleX(compID, chFmt); int shiftVer = MV_FRACTIONAL_BITS_INTERNAL + ::getComponentScaleY(compID, chFmt); - int xFrac = _mv.hor & ((1 << shiftHor) - 1); - int yFrac = _mv.ver & ((1 << shiftVer) - 1); + bool wrapRef = false; + Mv mv(_mv); + if( !isIBC && pu.cs->sps->getWrapAroundEnabledFlag() ) + { + wrapRef = wrapClipMv( mv, pu.blocks[0].pos(), pu.blocks[0].size(), pu.cs->sps, pu.cs->pps ); + } + + bool useAltHpelIf = pu.cu->imv == IMV_HPEL; + + if( !isIBC && xPredInterBlkRPR( scalingRatio, *pu.cs->pps, CompArea( compID, chFmt, pu.blocks[compID], Size( dstPic.bufs[compID].width, dstPic.bufs[compID].height ) ), refPic, mv, dstPic.bufs[compID].buf, dstPic.bufs[compID].stride, bi, wrapRef, clpRng, 0, useAltHpelIf ) ) + { + CHECK( bilinearMC, "DMVR should be disabled with RPR" ); + CHECK( bioApplied, "BDOF should be disabled with RPR" ); + } + else + { + int xFrac = mv.hor & ((1 << shiftHor) - 1); + int yFrac = mv.ver & ((1 << shiftVer) - 1); if (isIBC) { xFrac = yFrac = 0; @@ -605,13 +717,13 @@ void InterPrediction::xPredInterBlk ( const ComponentID& compID, const Predictio CPelBuf refBuf; { - Position offset = pu.blocks[compID].pos().offset( _mv.getHor() >> shiftHor, _mv.getVer() >> shiftVer ); + Position offset = pu.blocks[compID].pos().offset( mv.getHor() >> shiftHor, mv.getVer() >> shiftVer ); if (dmvrWidth) { - refBuf = refPic->getRecoBuf(CompArea(compID, chFmt, offset, Size(dmvrWidth, dmvrHeight))); + refBuf = refPic->getRecoBuf(CompArea(compID, chFmt, offset, Size(dmvrWidth, dmvrHeight)), wrapRef); } else - refBuf = refPic->getRecoBuf( CompArea( compID, chFmt, offset, pu.blocks[compID].size() ) ); + refBuf = refPic->getRecoBuf( CompArea( compID, chFmt, offset, pu.blocks[compID].size() ), wrapRef); } if (NULL != srcPadBuf) @@ -640,13 +752,14 @@ void InterPrediction::xPredInterBlk ( const ComponentID& compID, const Predictio dstBuf.buf = m_filteredBlockTmp[2 + m_iRefListIdx][compID] + 2 * dstBuf.stride + 2; } + if( yFrac == 0 ) { - m_if.filterHor(compID, (Pel*)refBuf.buf, refBuf.stride, dstBuf.buf, dstBuf.stride, backupWidth, backupHeight, xFrac, rndRes, chFmt, clpRng, bilinearMC, bilinearMC); + m_if.filterHor(compID, (Pel*)refBuf.buf, refBuf.stride, dstBuf.buf, dstBuf.stride, backupWidth, backupHeight, xFrac, rndRes, chFmt, clpRng, bilinearMC, bilinearMC, useAltHpelIf); } else if( xFrac == 0 ) { - m_if.filterVer(compID, (Pel*)refBuf.buf, refBuf.stride, dstBuf.buf, dstBuf.stride, backupWidth, backupHeight, yFrac, true, rndRes, chFmt, clpRng, bilinearMC, bilinearMC); + m_if.filterVer(compID, (Pel*)refBuf.buf, refBuf.stride, dstBuf.buf, dstBuf.stride, backupWidth, backupHeight, yFrac, true, rndRes, chFmt, clpRng, bilinearMC, bilinearMC, useAltHpelIf); } else { @@ -659,15 +772,17 @@ void InterPrediction::xPredInterBlk ( const ComponentID& compID, const Predictio { vFilterSize = NTAPS_BILINEAR; } - m_if.filterHor(compID, (Pel*)refBuf.buf - ((vFilterSize >> 1) - 1) * refBuf.stride, refBuf.stride, tmpBuf.buf, tmpBuf.stride, backupWidth, backupHeight + vFilterSize - 1, xFrac, false, chFmt, clpRng, bilinearMC, bilinearMC); + m_if.filterHor(compID, (Pel*)refBuf.buf - ((vFilterSize >> 1) - 1) * refBuf.stride, refBuf.stride, tmpBuf.buf, tmpBuf.stride, backupWidth, backupHeight + vFilterSize - 1, xFrac, false, chFmt, clpRng, bilinearMC, bilinearMC, useAltHpelIf); JVET_J0090_SET_CACHE_ENABLE( false ); - m_if.filterVer(compID, (Pel*)tmpBuf.buf + ((vFilterSize >> 1) - 1) * tmpBuf.stride, tmpBuf.stride, dstBuf.buf, dstBuf.stride, backupWidth, backupHeight, yFrac, false, rndRes, chFmt, clpRng, bilinearMC, bilinearMC); + m_if.filterVer(compID, (Pel*)tmpBuf.buf + ((vFilterSize >> 1) - 1) * tmpBuf.stride, tmpBuf.stride, dstBuf.buf, dstBuf.stride, backupWidth, backupHeight, yFrac, false, rndRes, chFmt, clpRng, bilinearMC, bilinearMC, useAltHpelIf); } - JVET_J0090_SET_CACHE_ENABLE( true ); + JVET_J0090_SET_CACHE_ENABLE((srcPadStride == 0) && (bioApplied == false)); // Enabled only in non-DMVR-non-BDOF process, In DMVR process, srcPadStride is always non-zero if (bioApplied && compID == COMPONENT_Y) { const int shift = std::max<int>(2, (IF_INTERNAL_PREC - clpRng.bd)); - const Pel* refPel = refBuf.buf - refBuf.stride - 1; + int xOffset = (xFrac < 8) ? 1 : 0; + int yOffset = (yFrac < 8) ? 1 : 0; + const Pel* refPel = refBuf.buf - yOffset * refBuf.stride - xOffset; Pel* dstPel = m_filteredBlockTmp[2 + m_iRefListIdx][compID] + dstBuf.stride + 1; for (int w = 0; w < (width - 2 * BIO_EXTEND_SIZE); w++) { @@ -675,7 +790,7 @@ void InterPrediction::xPredInterBlk ( const ComponentID& compID, const Predictio dstPel[w] = val - (Pel)IF_INTERNAL_OFFS; } - refPel = refBuf.buf - 1; + refPel = refBuf.buf + (1 - yOffset)*refBuf.stride - xOffset; dstPel = m_filteredBlockTmp[2 + m_iRefListIdx][compID] + 2 * dstBuf.stride + 1; for (int h = 0; h < (height - 2 * BIO_EXTEND_SIZE - 2); h++) { @@ -689,7 +804,7 @@ void InterPrediction::xPredInterBlk ( const ComponentID& compID, const Predictio dstPel += dstBuf.stride; } - refPel = refBuf.buf + (height - 2 * BIO_EXTEND_SIZE - 2)*refBuf.stride - 1; + refPel = refBuf.buf + (height - 2 * BIO_EXTEND_SIZE - 2 + 1 - yOffset)*refBuf.stride - xOffset; dstPel = m_filteredBlockTmp[2 + m_iRefListIdx][compID] + (height - 2 * BIO_EXTEND_SIZE)*dstBuf.stride + 1; for (int w = 0; w < (width - 2 * BIO_EXTEND_SIZE); w++) { @@ -703,24 +818,51 @@ void InterPrediction::xPredInterBlk ( const ComponentID& compID, const Predictio dstBuf.buf = backupDstBufPtr; dstBuf.stride = backupDstBufStride; } + } } -void InterPrediction::xPredAffineBlk( const ComponentID& compID, const PredictionUnit& pu, const Picture* refPic, const Mv* _mv, PelUnitBuf& dstPic, const bool& bi, const ClpRng& clpRng ) +bool InterPrediction::isSubblockVectorSpreadOverLimit( int a, int b, int c, int d, int predType ) { - if ( (pu.cu->affineType == AFFINEMODEL_6PARAM && _mv[0] == _mv[1] && _mv[0] == _mv[2]) - || (pu.cu->affineType == AFFINEMODEL_4PARAM && _mv[0] == _mv[1]) - ) + int s4 = ( 4 << 11 ); + int filterTap = 6; + + if ( predType == 3 ) + { + int refBlkWidth = std::max( std::max( 0, 4 * a + s4 ), std::max( 4 * c, 4 * a + 4 * c + s4 ) ) - std::min( std::min( 0, 4 * a + s4 ), std::min( 4 * c, 4 * a + 4 * c + s4 ) ); + int refBlkHeight = std::max( std::max( 0, 4 * b ), std::max( 4 * d + s4, 4 * b + 4 * d + s4 ) ) - std::min( std::min( 0, 4 * b ), std::min( 4 * d + s4, 4 * b + 4 * d + s4 ) ); + refBlkWidth = ( refBlkWidth >> 11 ) + filterTap + 3; + refBlkHeight = ( refBlkHeight >> 11 ) + filterTap + 3; + + if ( refBlkWidth * refBlkHeight > ( filterTap + 9 ) * ( filterTap + 9 ) ) + { + return true; + } + } + else { - Mv mvTemp = _mv[0]; - clipMv( mvTemp, pu.cu->lumaPos(), - pu.cu->lumaSize(), - *pu.cs->sps ); - xPredInterBlk( compID, pu, refPic, mvTemp, dstPic, bi, clpRng - , false - , false - ); - return; + int refBlkWidth = std::max( 0, 4 * a + s4 ) - std::min( 0, 4 * a + s4 ); + int refBlkHeight = std::max( 0, 4 * b ) - std::min( 0, 4 * b ); + refBlkWidth = ( refBlkWidth >> 11 ) + filterTap + 3; + refBlkHeight = ( refBlkHeight >> 11 ) + filterTap + 3; + if ( refBlkWidth * refBlkHeight > ( filterTap + 9 ) * ( filterTap + 5 ) ) + { + return true; + } + + refBlkWidth = std::max( 0, 4 * c ) - std::min( 0, 4 * c ); + refBlkHeight = std::max( 0, 4 * d + s4 ) - std::min( 0, 4 * d + s4 ); + refBlkWidth = ( refBlkWidth >> 11 ) + filterTap + 3; + refBlkHeight = ( refBlkHeight >> 11 ) + filterTap + 3; + if ( refBlkWidth * refBlkHeight > ( filterTap + 5 ) * ( filterTap + 9 ) ) + { + return true; + } } + return false; +} + +void InterPrediction::xPredAffineBlk(const ComponentID &compID, const PredictionUnit &pu, const Picture *refPic, const Mv *_mv, PelUnitBuf &dstPic, const bool &bi, const ClpRng &clpRng, bool genChromaMv, const std::pair<int, int> scalingRatio) +{ JVET_J0090_SET_REF_PICTURE( refPic, compID ); const ChromaFormat chFmt = pu.chromaFormat; @@ -739,7 +881,7 @@ void InterPrediction::xPredAffineBlk( const ComponentID& compID, const Predictio int blockHeight = AFFINE_MIN_BLOCK_SIZE; CHECK(blockWidth > (width >> iScaleX ), "Sub Block width > Block width"); - CHECK(blockHeight > (height >> iScaleX), "Sub Block height > Block height"); + CHECK(blockHeight > (height >> iScaleY), "Sub Block height > Block height"); const int MVBUFFER_SIZE = MAX_CU_SIZE / MIN_PU_SIZE; const int cxWidth = width >> iScaleX; @@ -749,12 +891,12 @@ void InterPrediction::xPredAffineBlk( const ComponentID& compID, const Predictio const int iBit = MAX_CU_DEPTH; int iDMvHorX, iDMvHorY, iDMvVerX, iDMvVerY; - iDMvHorX = (mvRT - mvLT).getHor() << (iBit - g_aucLog2[cxWidth]); - iDMvHorY = (mvRT - mvLT).getVer() << (iBit - g_aucLog2[cxWidth]); + iDMvHorX = (mvRT - mvLT).getHor() << (iBit - floorLog2(cxWidth)); + iDMvHorY = (mvRT - mvLT).getVer() << (iBit - floorLog2(cxWidth)); if ( pu.cu->affineType == AFFINEMODEL_6PARAM ) { - iDMvVerX = (mvLB - mvLT).getHor() << (iBit - g_aucLog2[cxHeight]); - iDMvVerY = (mvLB - mvLT).getVer() << (iBit - g_aucLog2[cxHeight]); + iDMvVerX = (mvLB - mvLT).getHor() << (iBit - floorLog2(cxHeight)); + iDMvVerY = (mvLB - mvLT).getVer() << (iBit - floorLog2(cxHeight)); } else { @@ -767,16 +909,161 @@ void InterPrediction::xPredAffineBlk( const ComponentID& compID, const Predictio const SPS &sps = *pu.cs->sps; const int iMvShift = 4; const int iOffset = 8; - const int iHorMax = ( sps.getPicWidthInLumaSamples() + iOffset - pu.Y().x - 1 ) << iMvShift; + const int iHorMax = ( pu.cs->pps->getPicWidthInLumaSamples() + iOffset - pu.Y().x - 1 ) << iMvShift; const int iHorMin = ( -(int)pu.cs->pcv->maxCUWidth - iOffset - (int)pu.Y().x + 1 ) << iMvShift; - const int iVerMax = ( sps.getPicHeightInLumaSamples() + iOffset - pu.Y().y - 1 ) << iMvShift; + const int iVerMax = ( pu.cs->pps->getPicHeightInLumaSamples() + iOffset - pu.Y().y - 1 ) << iMvShift; const int iVerMin = ( -(int)pu.cs->pcv->maxCUHeight - iOffset - (int)pu.Y().y + 1 ) << iMvShift; - PelBuf tmpBuf = PelBuf(m_filteredBlockTmp[0][compID], pu.blocks[compID]); const int vFilterSize = isLuma(compID) ? NTAPS_LUMA : NTAPS_CHROMA; const int shift = iBit - 4 + MV_FRACTIONAL_BITS_INTERNAL; + bool wrapRef = false; + const bool subblkMVSpreadOverLimit = isSubblockVectorSpreadOverLimit( iDMvHorX, iDMvHorY, iDMvVerX, iDMvVerY, pu.interDir ); + + bool enablePROF = (sps.getUsePROF()) && (!m_skipPROF) && (compID == COMPONENT_Y); + enablePROF &= (!pu.cs->picHeader->getDisProfFlag()); + enablePROF &= !((pu.cu->affineType == AFFINEMODEL_6PARAM && _mv[0] == _mv[1] && _mv[0] == _mv[2]) || (pu.cu->affineType == AFFINEMODEL_4PARAM && _mv[0] == _mv[1])); + enablePROF &= !subblkMVSpreadOverLimit; + const int profThres = 1 << (iBit + (m_isBi ? 1 : 0)); + enablePROF &= !m_encOnly || pu.cu->slice->getCheckLDC() || iDMvHorX > profThres || iDMvHorY > profThres || iDMvVerX > profThres || iDMvVerY > profThres || iDMvHorX < -profThres || iDMvHorY < -profThres || iDMvVerX < -profThres || iDMvVerY < -profThres; +#if JVET_Q0487_SCALING_WINDOW_ISSUES + enablePROF &= (refPic->isRefScaled( pu.cs->pps ) == false); +#else + enablePROF &= pu.cs->pps->getPicWidthInLumaSamples() == refPic->getPicWidthInLumaSamples() && pu.cs->pps->getPicHeightInLumaSamples() == refPic->getPicHeightInLumaSamples(); + enablePROF &= scalingRatio == SCALE_1X; +#endif + + + bool isLast = enablePROF ? false : !bi; + + const int cuExtW = AFFINE_MIN_BLOCK_SIZE + PROF_BORDER_EXT_W * 2; + const int cuExtH = AFFINE_MIN_BLOCK_SIZE + PROF_BORDER_EXT_H * 2; + + PelBuf gradXExt(m_gradBuf[0], cuExtW, cuExtH); + PelBuf gradYExt(m_gradBuf[1], cuExtW, cuExtH); + const int MAX_FILTER_SIZE = std::max<int>(NTAPS_LUMA, NTAPS_CHROMA); + const int dstExtW = ((blockWidth + PROF_BORDER_EXT_W * 2 + 7) >> 3) << 3; + const int dstExtH = blockHeight + PROF_BORDER_EXT_H * 2; + PelBuf dstExtBuf(m_filteredBlockTmp[1][compID], dstExtW, dstExtH); + + const int refExtH = dstExtH + MAX_FILTER_SIZE - 1; + PelBuf tmpBuf = PelBuf(m_filteredBlockTmp[0][compID], dstExtW, refExtH); + + PelBuf &dstBuf = dstPic.bufs[compID]; + + int *dMvScaleHor = m_dMvBuf[m_iRefListIdx]; + int *dMvScaleVer = m_dMvBuf[m_iRefListIdx] + 16; + + if (enablePROF) + { + int* dMvH = dMvScaleHor; + int* dMvV = dMvScaleVer; + int quadHorX = iDMvHorX << 2; + int quadHorY = iDMvHorY << 2; + int quadVerX = iDMvVerX << 2; + int quadVerY = iDMvVerY << 2; + + dMvH[0] = ((iDMvHorX + iDMvVerX) << 1) - ((quadHorX + quadVerX) << 1); + dMvV[0] = ((iDMvHorY + iDMvVerY) << 1) - ((quadHorY + quadVerY) << 1); + + for (int w = 1; w < blockWidth; w++) + { + dMvH[w] = dMvH[w - 1] + quadHorX; + dMvV[w] = dMvV[w - 1] + quadHorY; + } + + dMvH += blockWidth; + dMvV += blockWidth; + for (int h = 1; h < blockHeight; h++) + { + for (int w = 0; w < blockWidth; w++) + { + dMvH[w] = dMvH[w - blockWidth] + quadVerX; + dMvV[w] = dMvV[w - blockWidth] + quadVerY; + } + dMvH += blockWidth; + dMvV += blockWidth; + } + + const int mvShift = 8; + const int dmvLimit = ( 1 << 5 ) - 1; + + if (!g_pelBufOP.roundIntVector) + { + for (int idx = 0; idx < blockWidth * blockHeight; idx++) + { + roundAffineMv(dMvScaleHor[idx], dMvScaleVer[idx], mvShift); + dMvScaleHor[idx] = Clip3( -dmvLimit, dmvLimit, dMvScaleHor[idx] ); + dMvScaleVer[idx] = Clip3( -dmvLimit, dmvLimit, dMvScaleVer[idx] ); + } + } + else + { + int sz = blockWidth * blockHeight; + g_pelBufOP.roundIntVector(dMvScaleHor, sz, mvShift, dmvLimit); + g_pelBufOP.roundIntVector(dMvScaleVer, sz, mvShift, dmvLimit); + } + } + int scaleXLuma = ::getComponentScaleX(COMPONENT_Y, chFmt); + int scaleYLuma = ::getComponentScaleY(COMPONENT_Y, chFmt); + + if (genChromaMv && pu.chromaFormat != CHROMA_444) + { + CHECK(compID == COMPONENT_Y, "Chroma only subblock MV calculation should not apply to Luma"); + int lumaBlockWidth = AFFINE_MIN_BLOCK_SIZE; + int lumaBlockHeight = AFFINE_MIN_BLOCK_SIZE; + + CHECK(lumaBlockWidth > (width >> scaleXLuma), "Sub Block width > Block width"); + CHECK(lumaBlockHeight > (height >> scaleYLuma), "Sub Block height > Block height"); + + const int cxWidthLuma = width >> scaleXLuma; + const int cxHeightLuma = height >> scaleYLuma; + const int halfBWLuma = lumaBlockWidth >> 1; + const int halfBHLuma = lumaBlockHeight >> 1; + + int dMvHorXLuma, dMvHorYLuma, dMvVerXLuma, dMvVerYLuma; + dMvHorXLuma = (mvRT - mvLT).getHor() << (iBit - floorLog2(cxWidthLuma)); + dMvHorYLuma = (mvRT - mvLT).getVer() << (iBit - floorLog2(cxWidthLuma)); + if (pu.cu->affineType == AFFINEMODEL_6PARAM) + { + dMvVerXLuma = (mvLB - mvLT).getHor() << (iBit - floorLog2(cxHeightLuma)); + dMvVerYLuma = (mvLB - mvLT).getVer() << (iBit - floorLog2(cxHeightLuma)); + } + else + { + dMvVerXLuma = -dMvHorYLuma; + dMvVerYLuma = dMvHorXLuma; + } + + const bool subblkMVSpreadOverLimitLuma = isSubblockVectorSpreadOverLimit(dMvHorXLuma, dMvHorYLuma, dMvVerXLuma, dMvVerYLuma, pu.interDir); + + // get luma MV block by block + for (int h = 0; h < cxHeightLuma; h += lumaBlockHeight) + { + for (int w = 0; w < cxWidthLuma; w += lumaBlockWidth) + { + int mvScaleTmpHor, mvScaleTmpVer; + if (!subblkMVSpreadOverLimitLuma) + { + mvScaleTmpHor = iMvScaleHor + dMvHorXLuma * (halfBWLuma + w) + dMvVerXLuma * (halfBHLuma + h); + mvScaleTmpVer = iMvScaleVer + dMvHorYLuma * (halfBWLuma + w) + dMvVerYLuma * (halfBHLuma + h); + } + else + { + mvScaleTmpHor = iMvScaleHor + dMvHorXLuma * (cxWidthLuma >> 1) + dMvVerXLuma * (cxHeightLuma >> 1); + mvScaleTmpVer = iMvScaleVer + dMvHorYLuma * (cxWidthLuma >> 1) + dMvVerYLuma * (cxHeightLuma >> 1); + } + roundAffineMv(mvScaleTmpHor, mvScaleTmpVer, shift); + Mv tmpMv(mvScaleTmpHor, mvScaleTmpVer); + tmpMv.clipToStorageBitDepth(); + mvScaleTmpHor = tmpMv.getHor(); + mvScaleTmpVer = tmpMv.getVer(); + + m_storedMv[h / AFFINE_MIN_BLOCK_SIZE * MVBUFFER_SIZE + w / AFFINE_MIN_BLOCK_SIZE].set(mvScaleTmpHor, mvScaleTmpVer); + } + } + } // get prediction block by block for ( int h = 0; h < cxHeight; h += blockHeight ) { @@ -784,10 +1071,18 @@ void InterPrediction::xPredAffineBlk( const ComponentID& compID, const Predictio { int iMvScaleTmpHor, iMvScaleTmpVer; - if(compID == COMPONENT_Y) + if (compID == COMPONENT_Y || pu.chromaFormat == CHROMA_444) { - iMvScaleTmpHor = iMvScaleHor + iDMvHorX * (iHalfBW + w) + iDMvVerX * (iHalfBH + h); - iMvScaleTmpVer = iMvScaleVer + iDMvHorY * (iHalfBW + w) + iDMvVerY * (iHalfBH + h); + if ( !subblkMVSpreadOverLimit ) + { + iMvScaleTmpHor = iMvScaleHor + iDMvHorX * (iHalfBW + w) + iDMvVerX * (iHalfBH + h); + iMvScaleTmpVer = iMvScaleVer + iDMvHorY * (iHalfBW + w) + iDMvVerY * (iHalfBH + h); + } + else + { + iMvScaleTmpHor = iMvScaleHor + iDMvHorX * ( cxWidth >> 1 ) + iDMvVerX * ( cxHeight >> 1 ); + iMvScaleTmpVer = iMvScaleVer + iDMvHorY * ( cxWidth >> 1 ) + iDMvVerY * ( cxHeight >> 1 ); + } roundAffineMv(iMvScaleTmpHor, iMvScaleTmpVer, shift); Mv tmpMv(iMvScaleTmpHor, iMvScaleTmpVer); tmpMv.clipToStorageBitDepth(); @@ -799,34 +1094,57 @@ void InterPrediction::xPredAffineBlk( const ComponentID& compID, const Predictio { m_storedMv[h / AFFINE_MIN_BLOCK_SIZE * MVBUFFER_SIZE + w / AFFINE_MIN_BLOCK_SIZE].set(iMvScaleTmpHor, iMvScaleTmpVer); Mv tmpMv(iMvScaleTmpHor, iMvScaleTmpVer); - clipMv(tmpMv, Position(pu.Y().x + w, pu.Y().y + h), Size(blockWidth, blockHeight), sps); + wrapRef = wrapClipMv( tmpMv, Position( pu.Y().x + w, pu.Y().y + h ), Size( blockWidth, blockHeight ), &sps, pu.cs->pps ); iMvScaleTmpHor = tmpMv.getHor(); iMvScaleTmpVer = tmpMv.getVer(); } else { + wrapRef = false; m_storedMv[h / AFFINE_MIN_BLOCK_SIZE * MVBUFFER_SIZE + w / AFFINE_MIN_BLOCK_SIZE].set(iMvScaleTmpHor, iMvScaleTmpVer); - iMvScaleTmpHor = std::min<int>(iHorMax, std::max<int>(iHorMin, iMvScaleTmpHor)); - iMvScaleTmpVer = std::min<int>(iVerMax, std::max<int>(iVerMin, iMvScaleTmpVer)); +#if JVET_Q0487_SCALING_WINDOW_ISSUES + if( refPic->isRefScaled( pu.cs->pps ) == false ) +#else + if( scalingRatio == SCALE_1X ) +#endif + { + iMvScaleTmpHor = std::min<int>(iHorMax, std::max<int>(iHorMin, iMvScaleTmpHor)); + iMvScaleTmpVer = std::min<int>(iVerMax, std::max<int>(iVerMin, iMvScaleTmpVer)); + } } } else { Mv curMv = m_storedMv[((h << iScaleY) / AFFINE_MIN_BLOCK_SIZE) * MVBUFFER_SIZE + ((w << iScaleX) / AFFINE_MIN_BLOCK_SIZE)] + - m_storedMv[((h << iScaleY) / AFFINE_MIN_BLOCK_SIZE + 1)* MVBUFFER_SIZE + ((w << iScaleX) / AFFINE_MIN_BLOCK_SIZE + 1)]; + m_storedMv[((h << iScaleY) / AFFINE_MIN_BLOCK_SIZE + iScaleY)* MVBUFFER_SIZE + ((w << iScaleX) / AFFINE_MIN_BLOCK_SIZE + iScaleX)]; roundAffineMv(curMv.hor, curMv.ver, 1); if (sps.getWrapAroundEnabledFlag()) { - clipMv(curMv, Position(pu.Y().x + (w << iScaleX), pu.Y().y + (h << iScaleY)), Size(blockWidth << iScaleX, blockHeight << iScaleY), sps); + wrapRef = wrapClipMv( curMv, Position( pu.Y().x + ( w << iScaleX ), pu.Y().y + ( h << iScaleY ) ), Size( blockWidth << iScaleX, blockHeight << iScaleY ), &sps, pu.cs->pps ); } else { - curMv.hor = std::min<int>(iHorMax, std::max<int>(iHorMin, curMv.hor)); - curMv.ver = std::min<int>(iVerMax, std::max<int>(iVerMin, curMv.ver)); + wrapRef = false; +#if JVET_Q0487_SCALING_WINDOW_ISSUES + if( refPic->isRefScaled( pu.cs->pps ) == false ) +#else + if( scalingRatio == SCALE_1X ) +#endif + { + curMv.hor = std::min<int>(iHorMax, std::max<int>(iHorMin, curMv.hor)); + curMv.ver = std::min<int>(iVerMax, std::max<int>(iVerMin, curMv.ver)); + } } iMvScaleTmpHor = curMv.hor; iMvScaleTmpVer = curMv.ver; } + + if( xPredInterBlkRPR( scalingRatio, *pu.cs->pps, CompArea( compID, chFmt, pu.blocks[compID].offset( w, h ), Size( blockWidth, blockHeight ) ), refPic, Mv( iMvScaleTmpHor, iMvScaleTmpVer ), dstBuf.buf + w + h * dstBuf.stride, dstBuf.stride, bi, wrapRef, clpRng, 2 ) ) + { + CHECK( enablePROF, "PROF should be disabled with RPR" ); + } + else + { // get the MV in high precision int xFrac, yFrac, xInt, yInt; @@ -851,43 +1169,80 @@ void InterPrediction::xPredAffineBlk( const ComponentID& compID, const Predictio yFrac = iMvScaleTmpVer & 31; } - const CPelBuf refBuf = refPic->getRecoBuf( CompArea( compID, chFmt, pu.blocks[compID].offset(xInt + w, yInt + h), pu.blocks[compID] ) ); - PelBuf &dstBuf = dstPic.bufs[compID]; + const CPelBuf refBuf = refPic->getRecoBuf( CompArea( compID, chFmt, pu.blocks[compID].offset(xInt + w, yInt + h), pu.blocks[compID] ), wrapRef ); + + Pel* ref = (Pel*) refBuf.buf; + Pel* dst = dstBuf.buf + w + h * dstBuf.stride; + + int refStride = refBuf.stride; + int dstStride = dstBuf.stride; + + int bw = blockWidth; + int bh = blockHeight; + + if (enablePROF) + { + dst = dstExtBuf.bufAt(PROF_BORDER_EXT_W, PROF_BORDER_EXT_H); + dstStride = dstExtBuf.stride; + } if ( yFrac == 0 ) { - m_if.filterHor( compID, (Pel*) refBuf.buf, refBuf.stride, dstBuf.buf + w + h * dstBuf.stride, dstBuf.stride, blockWidth, blockHeight, xFrac, !bi, chFmt, clpRng ); + m_if.filterHor( compID, (Pel*) ref, refStride, dst, dstStride, bw, bh, xFrac, isLast, chFmt, clpRng); } else if ( xFrac == 0 ) { - m_if.filterVer( compID, (Pel*) refBuf.buf, refBuf.stride, dstBuf.buf + w + h * dstBuf.stride, dstBuf.stride, blockWidth, blockHeight, yFrac, true, !bi, chFmt, clpRng ); + m_if.filterVer( compID, (Pel*) ref, refStride, dst, dstStride, bw, bh, yFrac, true, isLast, chFmt, clpRng); } else { - m_if.filterHor( compID, (Pel*) refBuf.buf - ((vFilterSize>>1) -1)*refBuf.stride, refBuf.stride, tmpBuf.buf, tmpBuf.stride, blockWidth, blockHeight+vFilterSize-1, xFrac, false, chFmt, clpRng); + m_if.filterHor( compID, (Pel*)ref - ((vFilterSize>>1) -1)*refStride, refStride, tmpBuf.buf, tmpBuf.stride, bw, bh+vFilterSize-1, xFrac, false, chFmt, clpRng); JVET_J0090_SET_CACHE_ENABLE( false ); - m_if.filterVer( compID, tmpBuf.buf + ((vFilterSize>>1) -1)*tmpBuf.stride, tmpBuf.stride, dstBuf.buf + w + h * dstBuf.stride, dstBuf.stride, blockWidth, blockHeight, yFrac, false, !bi, chFmt, clpRng); + m_if.filterVer( compID, tmpBuf.buf + ((vFilterSize>>1) -1)*tmpBuf.stride, tmpBuf.stride, dst, dstStride, bw, bh, yFrac, false, isLast, chFmt, clpRng); JVET_J0090_SET_CACHE_ENABLE( true ); } - } - } -} + if (enablePROF) + { + const int shift = std::max<int>(2, (IF_INTERNAL_PREC - clpRng.bd)); + const int xOffset = xFrac >> 3; + const int yOffset = yFrac >> 3; -int getMSB( unsigned x ) -{ - int msb = 0, bits = ( sizeof(int) << 3 ), y = 1; - while( x > 1u ) - { - bits >>= 1; - y = x >> bits; - if( y ) - { - x = y; - msb += bits; + const int refOffset = (blockHeight + 1) * refStride; + const int dstOffset = (blockHeight + 1)* dstStride; + + const Pel* refPel = ref - (1 - yOffset) * refStride + xOffset - 1; + Pel* dstPel = dst - dstStride - 1; + for (int pw = 0; pw < blockWidth + 2; pw++) + { + dstPel[pw] = leftShift_round(refPel[pw], shift) - (Pel)IF_INTERNAL_OFFS; + dstPel[pw+dstOffset] = leftShift_round(refPel[pw+refOffset], shift) - (Pel)IF_INTERNAL_OFFS; + } + + refPel = ref + yOffset * refBuf.stride + xOffset; + dstPel = dst; + for (int ph = 0; ph < blockHeight; ph++, refPel += refStride, dstPel += dstStride) + { + dstPel[-1] = leftShift_round(refPel[-1], shift) - (Pel)IF_INTERNAL_OFFS; + dstPel[blockWidth] = leftShift_round(refPel[blockWidth], shift) - (Pel)IF_INTERNAL_OFFS; + } + + PelBuf gradXBuf = gradXExt.subBuf(0, 0, blockWidth + 2, blockHeight + 2); + PelBuf gradYBuf = gradYExt.subBuf(0, 0, blockWidth + 2, blockHeight + 2); + g_pelBufOP.profGradFilter(dstExtBuf.buf, dstExtBuf.stride, blockWidth + 2, blockHeight + 2, gradXBuf.stride, gradXBuf.buf, gradYBuf.buf, clpRng.bd); + + const int shiftNum = std::max<int>(2, (IF_INTERNAL_PREC - clpRng.bd)); + const Pel offset = (1 << (shiftNum - 1)) + IF_INTERNAL_OFFS; + Pel* src = dstExtBuf.bufAt(PROF_BORDER_EXT_W, PROF_BORDER_EXT_H); + Pel* gX = gradXBuf.bufAt(PROF_BORDER_EXT_W, PROF_BORDER_EXT_H); + Pel* gY = gradYBuf.bufAt(PROF_BORDER_EXT_W, PROF_BORDER_EXT_H); + + Pel * dstY = dstBuf.bufAt(w, h); + + g_pelBufOP.applyPROF(dstY, dstBuf.stride, src, dstExtBuf.stride, blockWidth, blockHeight, gX, gY, gradXBuf.stride, dMvScaleHor, dMvScaleVer, blockWidth, bi, shiftNum, offset, clpRng); + } + } } } - msb += y; - return msb; } void InterPrediction::applyBiOptFlow(const PredictionUnit &pu, const CPelUnitBuf &yuvSrc0, const CPelUnitBuf &yuvSrc1, const int &refIdx0, const int &refIdx1, PelUnitBuf &yuvDst, const BitDepths &clipBitDepths) @@ -938,15 +1293,7 @@ void InterPrediction::applyBiOptFlow(const PredictionUnit &pu, const CPelUnitBuf const int bitDepth = clipBitDepths.recon[toChannelType(COMPONENT_Y)]; const int shiftNum = IF_INTERNAL_PREC + 1 - bitDepth; const int offset = (1 << (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS; - const int limit = (bitDepth>12)? 2 : ((int)1 << (4 + IF_INTERNAL_PREC - bitDepth - 5)); - - int* dotProductTemp1 = m_dotProduct1; - int* dotProductTemp2 = m_dotProduct2; - int* dotProductTemp3 = m_dotProduct3; - int* dotProductTemp5 = m_dotProduct5; - int* dotProductTemp6 = m_dotProduct6; - - xCalcBIOPar(srcY0Temp, srcY1Temp, gradX0, gradX1, gradY0, gradY1, dotProductTemp1, dotProductTemp2, dotProductTemp3, dotProductTemp5, dotProductTemp6, src0Stride, src1Stride, widthG, widthG, heightG, bitDepth); + const int limit = ( 1 << 4 ) - 1; int xUnit = (width >> 2); int yUnit = (height >> 2); @@ -959,42 +1306,27 @@ void InterPrediction::applyBiOptFlow(const PredictionUnit &pu, const CPelUnitBuf { for (int xu = 0; xu < xUnit; xu++) { - if (m_bioPredSubBlkDist[yu*xUnit + xu] < m_bioSubBlkDistThres) - { - srcY0Temp = srcY0 + (stridePredMC + 1) + ((yu*src0Stride + xu) << 2); - srcY1Temp = srcY1 + (stridePredMC + 1) + ((yu*src1Stride + xu) << 2); - dstY0 = dstY + ((yu*dstStride + xu) << 2); - PelBuf dstPelBuf(dstY0, dstStride, Size(4, 4)); - dstPelBuf.addAvg(CPelBuf(srcY0Temp, src0Stride, Size(4, 4)), CPelBuf(srcY1Temp, src1Stride, Size(4, 4)), clpRng); - continue; - } - - int sGxdI = 0, sGydI = 0, sGxGy = 0, sGx2 = 0, sGy2 = 0; - int tmpx = 0, tmpy = 0; - - dotProductTemp1 = m_dotProduct1 + offsetPos + ((yu*widthG + xu) << 2); - dotProductTemp2 = m_dotProduct2 + offsetPos + ((yu*widthG + xu) << 2); - dotProductTemp3 = m_dotProduct3 + offsetPos + ((yu*widthG + xu) << 2); - dotProductTemp5 = m_dotProduct5 + offsetPos + ((yu*widthG + xu) << 2); - dotProductTemp6 = m_dotProduct6 + offsetPos + ((yu*widthG + xu) << 2); - - xCalcBlkGradient(xu << 2, yu << 2, dotProductTemp1, dotProductTemp2, dotProductTemp3, dotProductTemp5, dotProductTemp6, sGx2, sGy2, sGxGy, sGxdI, sGydI, widthG, heightG, (1 << 2)); - - if (sGx2 > 0) - { - tmpx = rightShiftMSB(sGxdI << 3, sGx2); - tmpx = Clip3(-limit, limit, tmpx); - } - if (sGy2 > 0) - { - int mainsGxGy = sGxGy >> 12; - int secsGxGy = sGxGy & ((1 << 12) - 1); - int tmpData = tmpx * mainsGxGy; - tmpData = ((tmpData << 12) + tmpx*secsGxGy) >> 1; - tmpy = rightShiftMSB(((sGydI << 3) - tmpData), sGy2); - tmpy = Clip3(-limit, limit, tmpy); - } - + int tmpx = 0, tmpy = 0; + int sumAbsGX = 0, sumAbsGY = 0, sumDIX = 0, sumDIY = 0; + int sumSignGY_GX = 0; + + Pel* pGradX0Tmp = m_gradX0 + (xu << 2) + (yu << 2) * widthG; + Pel* pGradX1Tmp = m_gradX1 + (xu << 2) + (yu << 2) * widthG; + Pel* pGradY0Tmp = m_gradY0 + (xu << 2) + (yu << 2) * widthG; + Pel* pGradY1Tmp = m_gradY1 + (xu << 2) + (yu << 2) * widthG; + const Pel* SrcY1Tmp = srcY1 + (xu << 2) + (yu << 2) * src1Stride; + const Pel* SrcY0Tmp = srcY0 + (xu << 2) + (yu << 2) * src0Stride; + + g_pelBufOP.calcBIOSums(SrcY0Tmp, SrcY1Tmp, pGradX0Tmp, pGradX1Tmp, pGradY0Tmp, pGradY1Tmp, xu, yu, src0Stride, src1Stride, widthG, bitDepth, &sumAbsGX, &sumAbsGY, &sumDIX, &sumDIY, &sumSignGY_GX); + tmpx = (sumAbsGX == 0 ? 0 : rightShiftMSB(sumDIX << 2, sumAbsGX)); + tmpx = Clip3(-limit, limit, tmpx); + + int mainsGxGy = sumSignGY_GX >> 12; + int secsGxGy = sumSignGY_GX & ((1 << 12) - 1); + int tmpData = tmpx * mainsGxGy; + tmpData = ((tmpData << 12) + tmpx*secsGxGy) >> 1; + tmpy = (sumAbsGY == 0 ? 0 : rightShiftMSB(((sumDIY << 2) - tmpData), sumAbsGY)); + tmpy = Clip3(-limit, limit, tmpy); srcY0Temp = srcY0 + (stridePredMC + 1) + ((yu*src0Stride + xu) << 2); srcY1Temp = srcY1 + (stridePredMC + 1) + ((yu*src0Stride + xu) << 2); gradX0 = m_gradX0 + offsetPos + ((yu*widthG + xu) << 2); @@ -1009,39 +1341,6 @@ void InterPrediction::applyBiOptFlow(const PredictionUnit &pu, const CPelUnitBuf } -bool InterPrediction::xCalcBiPredSubBlkDist(const PredictionUnit &pu, const Pel* pYuvSrc0, const int src0Stride, const Pel* pYuvSrc1, const int src1Stride, const BitDepths &clipBitDepths) -{ - const int width = pu.lwidth(); - const int height = pu.lheight(); - const int clipbd = clipBitDepths.recon[toChannelType(COMPONENT_Y)]; - const uint32_t distortionShift = DISTORTION_PRECISION_ADJUSTMENT(clipbd); - const int shift = std::max<int>(2, (IF_INTERNAL_PREC - clipbd)); - const int xUnit = (width >> 2); - const int yUnit = (height >> 2); - - m_bioDistThres = (shift <= 5) ? (((32 << (clipbd - 8))*width*height) >> (5 - shift)) : (((32 << (clipbd - 8))*width*height) << (shift - 5)); - m_bioSubBlkDistThres = (shift <= 5) ? (((64 << (clipbd - 8)) << 4) >> (5 - shift)) : (((64 << (clipbd - 8)) << 4) << (shift - 5)); - - m_bioDistThres >>= distortionShift; - m_bioSubBlkDistThres >>= distortionShift; - - DistParam cDistParam; - Distortion dist = 0; - for (int yu = 0, blkIdx = 0; yu < yUnit; yu++) - { - for (int xu = 0; xu < xUnit; xu++, blkIdx++) - { - const Pel* pPred0 = pYuvSrc0 + ((yu*src0Stride + xu) << 2); - const Pel* pPred1 = pYuvSrc1 + ((yu*src1Stride + xu) << 2); - - m_pcRdCost->setDistParam(cDistParam, pPred0, pPred1, src0Stride, src1Stride, clipbd, COMPONENT_Y, (1 << 2), (1 << 2), 0, 1, false, true); - m_bioPredSubBlkDist[blkIdx] = cDistParam.distFunc(cDistParam); - dist += m_bioPredSubBlkDist[blkIdx]; - } - } - - return (dist >= m_bioDistThres); -} void InterPrediction::xAddBIOAvg4(const Pel* src0, int src0Stride, const Pel* src1, int src1Stride, Pel *dst, int dstStride, const Pel *gradX0, const Pel *gradX1, const Pel *gradY0, const Pel*gradY1, int gradStride, int width, int height, int tmpx, int tmpy, int shift, int offset, const ClpRng& clpRng) { @@ -1063,17 +1362,21 @@ void InterPrediction::xCalcBlkGradient(int sx, int sy, int *arraysGx2, int g_pelBufOP.calcBlkGradient(sx, sy, arraysGx2, arraysGxGy, arraysGxdI, arraysGy2, arraysGydI, sGx2, sGy2, sGxGy, sGxdI, sGydI, width, height, unitSize); } -void InterPrediction::xWeightedAverage(const PredictionUnit& pu, const CPelUnitBuf& pcYuvSrc0, const CPelUnitBuf& pcYuvSrc1, PelUnitBuf& pcYuvDst, const BitDepths& clipBitDepths, const ClpRngs& clpRngs, const bool& bioApplied ) +void InterPrediction::xWeightedAverage(const PredictionUnit& pu, const CPelUnitBuf& pcYuvSrc0, const CPelUnitBuf& pcYuvSrc1, PelUnitBuf& pcYuvDst, const BitDepths& clipBitDepths, const ClpRngs& clpRngs, const bool& bioApplied, bool lumaOnly, bool chromaOnly, PelUnitBuf* yuvDstTmp /*= NULL*/) { + CHECK( (chromaOnly && lumaOnly), "should not happen" ); + const int iRefIdx0 = pu.refIdx[0]; const int iRefIdx1 = pu.refIdx[1]; if( iRefIdx0 >= 0 && iRefIdx1 >= 0 ) { - if( pu.cu->GBiIdx != GBI_DEFAULT ) + if( pu.cu->BcwIdx != BCW_DEFAULT && (yuvDstTmp || !pu.ciipFlag) ) { - CHECK(bioApplied, "GBi is disallowed with BIO"); - pcYuvDst.addWeightedAvg(pcYuvSrc0, pcYuvSrc1, clpRngs, pu.cu->GBiIdx); + CHECK(bioApplied, "Bcw is disallowed with BIO"); + pcYuvDst.addWeightedAvg(pcYuvSrc0, pcYuvSrc1, clpRngs, pu.cu->BcwIdx, chromaOnly, lumaOnly); + if (yuvDstTmp) + yuvDstTmp->addAvg(pcYuvSrc0, pcYuvSrc1, clpRngs, chromaOnly, lumaOnly); return; } if (bioApplied) @@ -1083,26 +1386,68 @@ void InterPrediction::xWeightedAverage(const PredictionUnit& pu, const CPelUnitB const Pel* pSrcY0 = m_filteredBlockTmp[2][COMPONENT_Y] + 2 * src0Stride + 2; const Pel* pSrcY1 = m_filteredBlockTmp[3][COMPONENT_Y] + 2 * src1Stride + 2; - bool bioEnabled = xCalcBiPredSubBlkDist(pu, pSrcY0, src0Stride, pSrcY1, src1Stride, clipBitDepths); + bool bioEnabled = true; if (bioEnabled) { applyBiOptFlow(pu, pcYuvSrc0, pcYuvSrc1, iRefIdx0, iRefIdx1, pcYuvDst, clipBitDepths); + if (yuvDstTmp) + yuvDstTmp->bufs[0].addAvg(CPelBuf(pSrcY0, src0Stride, pu.lumaSize()), CPelBuf(pSrcY1, src1Stride, pu.lumaSize()), clpRngs.comp[0]); } else { pcYuvDst.bufs[0].addAvg(CPelBuf(pSrcY0, src0Stride, pu.lumaSize()), CPelBuf(pSrcY1, src1Stride, pu.lumaSize()), clpRngs.comp[0]); + if (yuvDstTmp) + yuvDstTmp->bufs[0].copyFrom(pcYuvDst.bufs[0]); } } - pcYuvDst.addAvg(pcYuvSrc0, pcYuvSrc1, clpRngs, bioApplied); - } - else if( iRefIdx0 >= 0 && iRefIdx1 < 0 ) - { + if (pu.cs->pps->getWPBiPred()) + { + const int iRefIdx0 = pu.refIdx[0]; + const int iRefIdx1 = pu.refIdx[1]; + WPScalingParam *pwp0; + WPScalingParam *pwp1; + getWpScaling(pu.cu->slice, iRefIdx0, iRefIdx1, pwp0, pwp1); + if (!bioApplied) + { + if (!chromaOnly) + addWeightBiComponent(pcYuvSrc0, pcYuvSrc1, pu.cu->slice->clpRngs(), pwp0, pwp1, pcYuvDst, true, COMPONENT_Y); + } + if (!lumaOnly) + { + addWeightBiComponent(pcYuvSrc0, pcYuvSrc1, pu.cu->slice->clpRngs(), pwp0, pwp1, pcYuvDst, true, COMPONENT_Cb); + addWeightBiComponent(pcYuvSrc0, pcYuvSrc1, pu.cu->slice->clpRngs(), pwp0, pwp1, pcYuvDst, true, COMPONENT_Cr); + } + } + else + { + if (!bioApplied && (lumaOnly || chromaOnly)) + { + pcYuvDst.addAvg(pcYuvSrc0, pcYuvSrc1, clpRngs, chromaOnly, lumaOnly); + } + else + pcYuvDst.addAvg(pcYuvSrc0, pcYuvSrc1, clpRngs, bioApplied); + } + if (yuvDstTmp) + { + if (bioApplied) + { + yuvDstTmp->bufs[1].copyFrom(pcYuvDst.bufs[1]); + yuvDstTmp->bufs[2].copyFrom(pcYuvDst.bufs[2]); + } + else + yuvDstTmp->copyFrom(pcYuvDst, lumaOnly, chromaOnly); + } + } + else if( iRefIdx0 >= 0 && iRefIdx1 < 0 ) + { if( pu.cu->triangle ) { pcYuvDst.copyFrom( pcYuvSrc0 ); } else - pcYuvDst.copyClip( pcYuvSrc0, clpRngs ); + pcYuvDst.copyClip( pcYuvSrc0, clpRngs, lumaOnly, chromaOnly ); + if (yuvDstTmp) + yuvDstTmp->copyFrom( pcYuvDst, lumaOnly, chromaOnly ); } else if( iRefIdx0 < 0 && iRefIdx1 >= 0 ) { @@ -1111,29 +1456,41 @@ void InterPrediction::xWeightedAverage(const PredictionUnit& pu, const CPelUnitB pcYuvDst.copyFrom( pcYuvSrc1 ); } else - pcYuvDst.copyClip( pcYuvSrc1, clpRngs ); + pcYuvDst.copyClip( pcYuvSrc1, clpRngs, lumaOnly, chromaOnly ); + if (yuvDstTmp) + yuvDstTmp->copyFrom(pcYuvDst, lumaOnly, chromaOnly); } } + void InterPrediction::motionCompensation( PredictionUnit &pu, PelUnitBuf &predBuf, const RefPicList &eRefPicList , const bool luma, const bool chroma + , PelUnitBuf* predBufWOBIO /*= NULL*/ ) { - // dual tree handling for IBC as the only ref - if ((!luma || !chroma) && eRefPicList == REF_PIC_LIST_0) + CHECK(predBufWOBIO && pu.ciipFlag, "the case should not happen!"); + + if (!pu.cs->pcv->isEncoder) { - if (!luma && chroma) + if (CU::isIBC(*pu.cu)) { - xChromaMC(pu, predBuf); + CHECK(!luma, "IBC only for Chroma is not allowed."); + xIntraBlockCopy(pu, predBuf, COMPONENT_Y); + if (chroma) + { + xIntraBlockCopy(pu, predBuf, COMPONENT_Cb); + xIntraBlockCopy(pu, predBuf, COMPONENT_Cr); + } return; } - else // (luma && !chroma) - { + } + // dual tree handling for IBC as the only ref + if ((!luma || !chroma) && eRefPicList == REF_PIC_LIST_0) + { xPredInterUni(pu, eRefPicList, predBuf, false , false , luma, chroma); return; - } } // else, go with regular MC below CodingStructure &cs = *pu.cs; @@ -1142,38 +1499,110 @@ void InterPrediction::motionCompensation( PredictionUnit &pu, PelUnitBuf &predBu if( eRefPicList != REF_PIC_LIST_X ) { - if( ( ( sliceType == P_SLICE && pps.getUseWP() ) || ( sliceType == B_SLICE && pps.getWPBiPred() ) ) ) + CHECK(predBufWOBIO != NULL, "the case should not happen!"); + if ((CU::isIBC(*pu.cu) == false) && ((sliceType == P_SLICE && pps.getUseWP()) || (sliceType == B_SLICE && pps.getWPBiPred()))) { xPredInterUni ( pu, eRefPicList, predBuf, true , false - , true, true + , luma, chroma + ); + xWeightedPredictionUni( pu, predBuf, eRefPicList, predBuf, -1, m_maxCompIDToPred + , (luma && !chroma), (!luma && chroma) ); - xWeightedPredictionUni( pu, predBuf, eRefPicList, predBuf, -1, m_maxCompIDToPred ); } else { xPredInterUni( pu, eRefPicList, predBuf, false , false - , true, true + , luma, chroma ); } } else { + + CHECK( !pu.cu->affine && pu.refIdx[0] >= 0 && pu.refIdx[1] >= 0 && ( pu.lwidth() + pu.lheight() == 12 ), "invalid 4x8/8x4 bi-predicted blocks" ); + WPScalingParam *wp0; + WPScalingParam *wp1; + int refIdx0 = pu.refIdx[REF_PIC_LIST_0]; + int refIdx1 = pu.refIdx[REF_PIC_LIST_1]; + pu.cs->slice->getWpScaling(REF_PIC_LIST_0, refIdx0, wp0); + pu.cs->slice->getWpScaling(REF_PIC_LIST_1, refIdx1, wp1); + bool bioApplied = false; + const Slice &slice = *pu.cs->slice; + if (pu.cs->sps->getBDOFEnabledFlag() && (!pu.cs->picHeader->getDisBdofFlag())) + { + + if (pu.cu->affine || m_subPuMC) + { + bioApplied = false; + } + else + { + const bool biocheck0 = !((wp0[COMPONENT_Y].bPresentFlag || wp1[COMPONENT_Y].bPresentFlag) && slice.getSliceType() == B_SLICE); + const bool biocheck1 = !(pps.getUseWP() && slice.getSliceType() == P_SLICE); + if (biocheck0 + && biocheck1 + && PU::isBiPredFromDifferentDirEqDistPoc(pu) + && (pu.Y().height >= 8) + && (pu.Y().width >= 8) + && ((pu.Y().height * pu.Y().width) >= 128) + ) + { + bioApplied = true; + } + } + + if (bioApplied && pu.ciipFlag) + { + bioApplied = false; + } + + if (bioApplied && pu.cu->smvdMode) + { + bioApplied = false; + } + if (pu.cu->cs->sps->getUseBcw() && bioApplied && pu.cu->BcwIdx != BCW_DEFAULT) + { + bioApplied = false; + } + if (pu.mmvdEncOptMode == 2 && pu.mmvdMergeFlag) + { + bioApplied = false; + } + } + +#if JVET_Q0487_SCALING_WINDOW_ISSUES + bool refIsScaled = ( refIdx0 < 0 ? false : pu.cu->slice->getRefPic( REF_PIC_LIST_0, refIdx0 )->isRefScaled( pu.cs->pps ) ) || + ( refIdx1 < 0 ? false : pu.cu->slice->getRefPic( REF_PIC_LIST_1, refIdx1 )->isRefScaled( pu.cs->pps ) ); + bioApplied = refIsScaled ? false : bioApplied; +#else + bioApplied = ( ( refIdx0 < 0 ? true : pu.cu->slice->getScalingRatio( REF_PIC_LIST_0, refIdx0 ) == SCALE_1X ) && ( refIdx1 < 0 ? true : pu.cu->slice->getScalingRatio( REF_PIC_LIST_1, refIdx1 ) == SCALE_1X ) ) ? bioApplied : false; +#endif + bool dmvrApplied = false; + dmvrApplied = (pu.mvRefine) && PU::checkDMVRCondition(pu); + if ((pu.lumaSize().width > MAX_BDOF_APPLICATION_REGION || pu.lumaSize().height > MAX_BDOF_APPLICATION_REGION) && pu.mergeType != MRG_TYPE_SUBPU_ATMVP && (bioApplied && !dmvrApplied)) + { + xSubPuBio(pu, predBuf, eRefPicList, predBufWOBIO); + } + else if (pu.mergeType != MRG_TYPE_DEFAULT_N && pu.mergeType != MRG_TYPE_IBC) { - xSubPuMC( pu, predBuf, eRefPicList ); + CHECK(predBufWOBIO != NULL, "the case should not happen!"); + xSubPuMC( pu, predBuf, eRefPicList, luma, chroma ); } else if( xCheckIdenticalMotion( pu ) ) { xPredInterUni( pu, REF_PIC_LIST_0, predBuf, false , false - , true, true + , luma, chroma ); + if (predBufWOBIO) + predBufWOBIO->copyFrom(predBuf, (luma && !chroma), (chroma && !luma)); } else { - xPredInterBi( pu, predBuf ); + xPredInterBi(pu, predBuf, luma, chroma, predBufWOBIO); } } return; @@ -1206,20 +1635,7 @@ void InterPrediction::motionCompensation( PredictionUnit &pu, const RefPicList & int InterPrediction::rightShiftMSB(int numer, int denom) { - int d; - int msbIdx = 0; - for (msbIdx = 0; msbIdx<32; msbIdx++) - { - if (denom < ((int)1 << msbIdx)) - { - break; - } - } - - int shiftIdx = msbIdx - 1; - d = (numer >> shiftIdx); - - return d; + return numer >> floorLog2(denom); } void InterPrediction::motionCompensation4Triangle( CodingUnit &cu, MergeCtx &triangleMrgCtx, const bool splitDir, const uint8_t candIdx0, const uint8_t candIdx1 ) @@ -1259,125 +1675,83 @@ void InterPrediction::weightedTriangleBlk( PredictionUnit &pu, const bool splitD { if( channel == CHANNEL_TYPE_LUMA ) { - xWeightedTriangleBlk( pu, pu.lumaSize().width, pu.lumaSize().height, COMPONENT_Y, splitDir, predDst, predSrc0, predSrc1 ); + m_if.weightedTriangleBlk( pu, pu.lumaSize().width, pu.lumaSize().height, COMPONENT_Y, splitDir, predDst, predSrc0, predSrc1 ); } else if( channel == CHANNEL_TYPE_CHROMA ) { - xWeightedTriangleBlk( pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cb, splitDir, predDst, predSrc0, predSrc1 ); - xWeightedTriangleBlk( pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cr, splitDir, predDst, predSrc0, predSrc1 ); + m_if.weightedTriangleBlk( pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cb, splitDir, predDst, predSrc0, predSrc1 ); + m_if.weightedTriangleBlk( pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cr, splitDir, predDst, predSrc0, predSrc1 ); } else { - xWeightedTriangleBlk( pu, pu.lumaSize().width, pu.lumaSize().height, COMPONENT_Y, splitDir, predDst, predSrc0, predSrc1 ); - xWeightedTriangleBlk( pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cb, splitDir, predDst, predSrc0, predSrc1 ); - xWeightedTriangleBlk( pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cr, splitDir, predDst, predSrc0, predSrc1 ); + m_if.weightedTriangleBlk( pu, pu.lumaSize().width, pu.lumaSize().height, COMPONENT_Y, splitDir, predDst, predSrc0, predSrc1 ); + m_if.weightedTriangleBlk( pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cb, splitDir, predDst, predSrc0, predSrc1 ); + m_if.weightedTriangleBlk( pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cr, splitDir, predDst, predSrc0, predSrc1 ); } } -void InterPrediction::xWeightedTriangleBlk( const PredictionUnit &pu, const uint32_t width, const uint32_t height, const ComponentID compIdx, const bool splitDir, PelUnitBuf& predDst, PelUnitBuf& predSrc0, PelUnitBuf& predSrc1 ) -{ - Pel* dst = predDst .get(compIdx).buf; - Pel* src0 = predSrc0.get(compIdx).buf; - Pel* src1 = predSrc1.get(compIdx).buf; - int32_t strideDst = predDst .get(compIdx).stride - width; - int32_t strideSrc0 = predSrc0.get(compIdx).stride - width; - int32_t strideSrc1 = predSrc1.get(compIdx).stride - width; - - const char log2WeightBase = 3; - const ClpRng clipRng = pu.cu->slice->clpRngs().comp[compIdx]; - const int32_t clipbd = clipRng.bd; - const int32_t shiftDefault = std::max<int>(2, (IF_INTERNAL_PREC - clipbd)); - const int32_t offsetDefault = (1<<(shiftDefault-1)) + IF_INTERNAL_OFFS; - const int32_t shiftWeighted = std::max<int>(2, (IF_INTERNAL_PREC - clipbd)) + log2WeightBase; - const int32_t offsetWeighted = (1 << (shiftWeighted - 1)) + (IF_INTERNAL_OFFS << log2WeightBase); - - const int32_t ratioWH = (width > height) ? (width / height) : 1; - const int32_t ratioHW = (width > height) ? 1 : (height / width); - const bool longWeight = (compIdx == COMPONENT_Y) || ( predDst.chromaFormat == CHROMA_444 ); - const int32_t weightedLength = longWeight ? 7 : 3; - int32_t weightedStartPos = ( splitDir == 0 ) ? ( 0 - (weightedLength >> 1) * ratioWH ) : ( width - ((weightedLength + 1) >> 1) * ratioWH ); - int32_t weightedEndPos = weightedStartPos + weightedLength * ratioWH - 1; - int32_t weightedPosoffset =( splitDir == 0 ) ? ratioWH : -ratioWH; - - Pel tmpPelWeighted; - int32_t weightIdx; - int32_t x, y, tmpX, tmpY, tmpWeightedStart, tmpWeightedEnd; - - for( y = 0; y < height; y+= ratioHW ) - { - for( tmpY = ratioHW; tmpY > 0; tmpY-- ) - { - for( x = 0; x < weightedStartPos; x++ ) - { - *dst++ = ClipPel( rightShift( (splitDir == 0 ? *src1 : *src0) + offsetDefault, shiftDefault), clipRng ); - src0++; - src1++; - } - - tmpWeightedStart = std::max((int32_t)0, weightedStartPos); - tmpWeightedEnd = std::min(weightedEndPos, (int32_t)(width - 1)); - weightIdx = 1; - if( weightedStartPos < 0 ) - { - weightIdx += abs(weightedStartPos) / ratioWH; - } - for( x = tmpWeightedStart; x <= tmpWeightedEnd; x+= ratioWH ) - { - for( tmpX = ratioWH; tmpX > 0; tmpX-- ) - { - tmpPelWeighted = Clip3( 1, 7, longWeight ? weightIdx : (weightIdx * 2)); - tmpPelWeighted = splitDir ? ( 8 - tmpPelWeighted ) : tmpPelWeighted; - *dst++ = ClipPel( rightShift( (tmpPelWeighted*(*src0++) + ((8 - tmpPelWeighted) * (*src1++)) + offsetWeighted), shiftWeighted ), clipRng ); - } - weightIdx ++; - } - - for( x = weightedEndPos + 1; x < width; x++ ) - { - *dst++ = ClipPel( rightShift( (splitDir == 0 ? *src0 : *src1) + offsetDefault, shiftDefault ), clipRng ); - src0++; - src1++; - } - - dst += strideDst; - src0 += strideSrc0; - src1 += strideSrc1; - } - weightedStartPos += weightedPosoffset; - weightedEndPos += weightedPosoffset; - } -} -void InterPrediction::xPrefetchPad(PredictionUnit& pu, PelUnitBuf &pcPad, RefPicList refId) +void InterPrediction::xPrefetch(PredictionUnit& pu, PelUnitBuf &pcPad, RefPicList refId, bool forLuma) { int offset, width, height; - int padsize; Mv cMv; - const Picture* refPic = pu.cu->slice->getRefPic(refId, pu.refIdx[refId]); + const Picture* refPic = pu.cu->slice->getRefPic( refId, pu.refIdx[refId] )->unscaledPic; int mvShift = (MV_FRACTIONAL_BITS_INTERNAL); - for (int compID = 0; compID < MAX_NUM_COMPONENT; compID++) + + int start = 0; + int end = MAX_NUM_COMPONENT; + + start = forLuma ? 0 : 1; + end = forLuma ? 1 : MAX_NUM_COMPONENT; + + for (int compID = start; compID < end; compID++) { cMv = Mv(pu.mv[refId].getHor(), pu.mv[refId].getVer()); - pcPad.bufs[compID].stride = (MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION) + NTAPS_LUMA); + pcPad.bufs[compID].stride = (pcPad.bufs[compID].width + (2 * DMVR_NUM_ITERATION) + NTAPS_LUMA); int filtersize = (compID == (COMPONENT_Y)) ? NTAPS_LUMA : NTAPS_CHROMA; width = pcPad.bufs[compID].width; height = pcPad.bufs[compID].height; offset = (DMVR_NUM_ITERATION) * (pcPad.bufs[compID].stride + 1); - padsize = (DMVR_NUM_ITERATION) >> getComponentScaleX((ComponentID)compID, pu.chromaFormat); - int mvshiftTemp = mvShift + getComponentScaleX((ComponentID)compID, pu.chromaFormat); + int mvshiftTempHor = mvShift + getComponentScaleX((ComponentID)compID, pu.chromaFormat); + int mvshiftTempVer = mvShift + getComponentScaleY((ComponentID)compID, pu.chromaFormat); width += (filtersize - 1); height += (filtersize - 1); - cMv += Mv(-(((filtersize >> 1) - 1) << mvshiftTemp), - -(((filtersize >> 1) - 1) << mvshiftTemp)); - clipMv(cMv, pu.lumaPos(), pu.lumaSize(),*pu.cs->sps); + cMv += Mv(-(((filtersize >> 1) - 1) << mvshiftTempHor), + -(((filtersize >> 1) - 1) << mvshiftTempVer)); + bool wrapRef = false; + if( pu.cs->sps->getWrapAroundEnabledFlag() ) + { + wrapRef = wrapClipMv( cMv, pu.blocks[0].pos(), pu.blocks[0].size(), pu.cs->sps, pu.cs->pps ); + } + else + { + clipMv( cMv, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps, *pu.cs->pps ); + } /* Pre-fetch similar to HEVC*/ { CPelBuf refBuf; - Position Rec_offset = pu.blocks[compID].pos().offset(cMv.getHor() >> mvshiftTemp, cMv.getVer() >> mvshiftTemp); - refBuf = refPic->getRecoBuf(CompArea((ComponentID)compID, pu.chromaFormat, Rec_offset, pu.blocks[compID].size())); + Position Rec_offset = pu.blocks[compID].pos().offset(cMv.getHor() >> mvshiftTempHor, cMv.getVer() >> mvshiftTempVer); + refBuf = refPic->getRecoBuf(CompArea((ComponentID)compID, pu.chromaFormat, Rec_offset, pu.blocks[compID].size()), wrapRef); PelBuf &dstBuf = pcPad.bufs[compID]; g_pelBufOP.copyBuffer((Pel *)refBuf.buf, refBuf.stride, ((Pel *)dstBuf.buf) + offset, dstBuf.stride, width, height); } + } +} +void InterPrediction::xPad(PredictionUnit& pu, PelUnitBuf &pcPad, RefPicList refId) +{ + int offset = 0, width, height; + int padsize; + Mv cMv; + for (int compID = 0; compID < MAX_NUM_COMPONENT; compID++) + { + int filtersize = (compID == (COMPONENT_Y)) ? NTAPS_LUMA : NTAPS_CHROMA; + width = pcPad.bufs[compID].width; + height = pcPad.bufs[compID].height; + offset = (DMVR_NUM_ITERATION) * (pcPad.bufs[compID].stride + 1); + /*using the larger padsize for 422*/ + padsize = (DMVR_NUM_ITERATION) >> getComponentScaleY((ComponentID)compID, pu.chromaFormat); + width += (filtersize - 1); + height += (filtersize - 1); /*padding on all side of size DMVR_PAD_LENGTH*/ { g_pelBufOP.padding(pcPad.bufs[compID].buf + offset, pcPad.bufs[compID].stride, width, height, padsize); @@ -1500,6 +1874,7 @@ void InterPrediction::xBIPMVRefine(int bd, Pel *pRefL0, Pel *pRefL1, uint64_t& m void InterPrediction::xFinalPaddedMCForDMVR(PredictionUnit& pu, PelUnitBuf &pcYuvSrc0, PelUnitBuf &pcYuvSrc1, PelUnitBuf &pcPad0, PelUnitBuf &pcPad1, const bool bioApplied , const Mv mergeMV[NUM_REF_PIC_LIST_01] + , bool blockMoved ) { int offset, deltaIntMvX, deltaIntMvY; @@ -1514,9 +1889,12 @@ void InterPrediction::xFinalPaddedMCForDMVR(PredictionUnit& pu, PelUnitBuf &pcYu RefPicList refId = (RefPicList)k; Mv cMv = pu.mv[refId]; m_iRefListIdx = refId; - const Picture* refPic = pu.cu->slice->getRefPic(refId, pu.refIdx[refId]); + const Picture* refPic = pu.cu->slice->getRefPic( refId, pu.refIdx[refId] )->unscaledPic; Mv cMvClipped = cMv; - clipMv(cMvClipped, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps); + if( !pu.cs->sps->getWrapAroundEnabledFlag() ) + { + clipMv( cMvClipped, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps, *pu.cs->pps ); + } Mv startMv = mergeMV[refId]; @@ -1529,30 +1907,39 @@ void InterPrediction::xFinalPaddedMCForDMVR(PredictionUnit& pu, PelUnitBuf &pcYu } for (int compID = 0; compID < MAX_NUM_COMPONENT; compID++) { - int mvshiftTemp = mvShift + getComponentScaleX((ComponentID)compID, pu.chromaFormat); - int leftPixelExtra; - if (compID == COMPONENT_Y) + Pel *srcBufPelPtr = NULL; + int pcPadstride = 0; + if (blockMoved || (compID == 0)) { - leftPixelExtra = (NTAPS_LUMA >> 1) - 1; - } - else - { - leftPixelExtra = (NTAPS_CHROMA >> 1) - 1; + pcPadstride = pcPadTemp.bufs[compID].stride; + int mvshiftTempHor = mvShift + getComponentScaleX((ComponentID)compID, pu.chromaFormat); + int mvshiftTempVer = mvShift + getComponentScaleY((ComponentID)compID, pu.chromaFormat); + int leftPixelExtra; + if (compID == COMPONENT_Y) + { + leftPixelExtra = (NTAPS_LUMA >> 1) - 1; + } + else + { + leftPixelExtra = (NTAPS_CHROMA >> 1) - 1; + } + PelBuf &srcBuf = pcPadTemp.bufs[compID]; + deltaIntMvX = (cMv.getHor() >> mvshiftTempHor) - + (startMv.getHor() >> mvshiftTempHor); + deltaIntMvY = (cMv.getVer() >> mvshiftTempVer) - + (startMv.getVer() >> mvshiftTempVer); + + CHECK((abs(deltaIntMvX) > DMVR_NUM_ITERATION) || (abs(deltaIntMvY) > DMVR_NUM_ITERATION), "not expected DMVR movement"); + + offset = (DMVR_NUM_ITERATION + leftPixelExtra) * (pcPadTemp.bufs[compID].stride + 1); + offset += (deltaIntMvY)* pcPadTemp.bufs[compID].stride; + offset += (deltaIntMvX); + srcBufPelPtr = (srcBuf.buf + offset); } - - deltaIntMvX = (cMv.getHor() >> mvshiftTemp) - - (startMv.getHor() >> mvshiftTemp); - deltaIntMvY = (cMv.getVer() >> mvshiftTemp) - - (startMv.getVer() >> mvshiftTemp); - - CHECK((abs(deltaIntMvX) > DMVR_NUM_ITERATION) || (abs(deltaIntMvY) > DMVR_NUM_ITERATION), "not expected DMVR movement"); - - offset = (DMVR_NUM_ITERATION + leftPixelExtra) * (pcPadTemp.bufs[compID].stride + 1); - offset += (deltaIntMvY)* pcPadTemp.bufs[compID].stride; - offset += (deltaIntMvX); - PelBuf &srcBuf = pcPadTemp.bufs[compID]; - xPredInterBlk((ComponentID)compID, pu, refPic, cMvClipped, pcYUVTemp, true, pu.cs->slice->getClpRngs().comp[compID], - bioApplied, false, 0, 0, 0, (srcBuf.buf + offset), pcPadTemp.bufs[compID].stride); + JVET_J0090_SET_CACHE_ENABLE(false); + xPredInterBlk( (ComponentID)compID, pu, refPic, cMvClipped, pcYUVTemp, true, pu.cs->slice->getClpRngs().comp[compID], + bioApplied, false, pu.cu->slice->getScalingRatio( refId, pu.refIdx[refId] ), 0, 0, 0, srcBufPelPtr, pcPadstride ); + JVET_J0090_SET_CACHE_ENABLE(false); } pcYUVTemp = pcYuvSrc1; pcPadTemp = pcPad1; @@ -1566,7 +1953,7 @@ uint64_t InterPrediction::xDMVRCost(int bitDepth, Pel* pOrg, uint32_t refStride, cDistParam.useMR = false; m_pcRdCost->setDistParam(cDistParam, pOrg, pRef, orgStride, refStride, bitDepth, COMPONENT_Y, width, height, 1); uint64_t uiCost = cDistParam.distFunc(cDistParam); - return uiCost; + return uiCost>>1; } void xDMVRSubPixelErrorSurface(bool notZeroCost, int16_t *totalDeltaMV, int16_t *deltaMV, uint64_t *pSADsArray) @@ -1598,8 +1985,11 @@ void InterPrediction::xinitMC(PredictionUnit& pu, const ClpRngs &clpRngs) Mv mergeMVL1(pu.mv[REF_PIC_LIST_1]); /*Clip the starting MVs*/ - clipMv(mergeMVL0, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps); - clipMv(mergeMVL1, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps); + if( !pu.cs->sps->getWrapAroundEnabledFlag() ) + { + clipMv( mergeMVL0, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps, *pu.cs->pps ); + clipMv( mergeMVL1, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps, *pu.cs->pps ); + } /*L0 MC for refinement*/ { @@ -1610,11 +2000,11 @@ void InterPrediction::xinitMC(PredictionUnit& pu, const ClpRngs &clpRngs) offset += (-(int)DMVR_NUM_ITERATION); PelBuf srcBuf = m_cYuvRefBuffDMVRL0.bufs[COMPONENT_Y]; PelUnitBuf yuvPredTempL0 = PelUnitBuf(pu.chromaFormat, PelBuf(m_cYuvPredTempDMVRL0, - (MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION)), pu.lwidth() + (2 * DMVR_NUM_ITERATION), pu.lheight() + (2 * DMVR_NUM_ITERATION))); + m_biLinearBufStride + , pu.lwidth() + (2 * DMVR_NUM_ITERATION), pu.lheight() + (2 * DMVR_NUM_ITERATION))); - xPredInterBlk(COMPONENT_Y, pu, pu.cu->slice->getRefPic(REF_PIC_LIST_0, refIdx0), mergeMVL0, yuvPredTempL0, true, clpRngs.comp[COMPONENT_Y], - false, false, pu.lwidth() + (2 * DMVR_NUM_ITERATION), pu.lheight() + (2 * DMVR_NUM_ITERATION), true, ((Pel *)srcBuf.buf) + offset, srcBuf.stride - ); + xPredInterBlk( COMPONENT_Y, pu, pu.cu->slice->getRefPic( REF_PIC_LIST_0, refIdx0 )->unscaledPic, mergeMVL0, yuvPredTempL0, true, clpRngs.comp[COMPONENT_Y], + false, false, pu.cu->slice->getScalingRatio( REF_PIC_LIST_0, refIdx0 ), pu.lwidth() + ( 2 * DMVR_NUM_ITERATION ), pu.lheight() + ( 2 * DMVR_NUM_ITERATION ), true, ( (Pel *)srcBuf.buf ) + offset, srcBuf.stride ); } /*L1 MC for refinement*/ @@ -1626,11 +2016,11 @@ void InterPrediction::xinitMC(PredictionUnit& pu, const ClpRngs &clpRngs) offset += (-(int)DMVR_NUM_ITERATION); PelBuf srcBuf = m_cYuvRefBuffDMVRL1.bufs[COMPONENT_Y]; PelUnitBuf yuvPredTempL1 = PelUnitBuf(pu.chromaFormat, PelBuf(m_cYuvPredTempDMVRL1, - (MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION)), pu.lwidth() + (2 * DMVR_NUM_ITERATION), pu.lheight() + (2 * DMVR_NUM_ITERATION))); + m_biLinearBufStride + , pu.lwidth() + (2 * DMVR_NUM_ITERATION), pu.lheight() + (2 * DMVR_NUM_ITERATION))); - xPredInterBlk(COMPONENT_Y, pu, pu.cu->slice->getRefPic(REF_PIC_LIST_1, refIdx1), mergeMVL1, yuvPredTempL1, true, clpRngs.comp[COMPONENT_Y], - false, false, pu.lwidth() + (2 * DMVR_NUM_ITERATION), pu.lheight() + (2 * DMVR_NUM_ITERATION), true, ((Pel *)srcBuf.buf) + offset, srcBuf.stride - ); + xPredInterBlk( COMPONENT_Y, pu, pu.cu->slice->getRefPic( REF_PIC_LIST_1, refIdx1 )->unscaledPic, mergeMVL1, yuvPredTempL1, true, clpRngs.comp[COMPONENT_Y], + false, false, pu.cu->slice->getScalingRatio( REF_PIC_LIST_1, refIdx1 ), pu.lwidth() + ( 2 * DMVR_NUM_ITERATION ), pu.lheight() + ( 2 * DMVR_NUM_ITERATION ), true, ( (Pel *)srcBuf.buf ) + offset, srcBuf.stride ); } } @@ -1647,40 +2037,101 @@ void InterPrediction::xProcessDMVR(PredictionUnit& pu, PelUnitBuf &pcYuvDst, con int dy = std::min<int>(pu.lumaSize().height, DMVR_SUBCU_HEIGHT); int dx = std::min<int>(pu.lumaSize().width, DMVR_SUBCU_WIDTH); - /*L0 Padding*/ - m_cYuvRefBuffDMVRL0 = (pu.chromaFormat == CHROMA_400 ? - PelUnitBuf(pu.chromaFormat, PelBuf(m_cRefSamplesDMVRL0[0], pcYuvDst.Y())) : - PelUnitBuf(pu.chromaFormat, PelBuf(m_cRefSamplesDMVRL0[0], pcYuvDst.Y()), - PelBuf(m_cRefSamplesDMVRL0[1], pcYuvDst.Cb()), PelBuf(m_cRefSamplesDMVRL0[2], pcYuvDst.Cr()))); - - xPrefetchPad(pu, m_cYuvRefBuffDMVRL0, REF_PIC_LIST_0); + Position puPos = pu.lumaPos(); - /*L1 Padding*/ - m_cYuvRefBuffDMVRL1 = (pu.chromaFormat == CHROMA_400 ? - PelUnitBuf(pu.chromaFormat, PelBuf(m_cRefSamplesDMVRL1[0], pcYuvDst.Y())) : - PelUnitBuf(pu.chromaFormat, PelBuf(m_cRefSamplesDMVRL1[0], pcYuvDst.Y()), PelBuf(m_cRefSamplesDMVRL1[1], pcYuvDst.Cb()), - PelBuf(m_cRefSamplesDMVRL1[2], pcYuvDst.Cr()))); + int bd = pu.cs->slice->getClpRngs().comp[COMPONENT_Y].bd; - xPrefetchPad(pu, m_cYuvRefBuffDMVRL1, REF_PIC_LIST_1); + int bioEnabledThres = 2 * dy * dx; + bool bioAppliedType[MAX_NUM_SUBCU_DMVR]; - xinitMC(pu, clpRngs); +#if JVET_J0090_MEMORY_BANDWITH_MEASURE + JVET_J0090_SET_CACHE_ENABLE(true); + for (int k = 0; k < NUM_REF_PIC_LIST_01; k++) + { + RefPicList refId = (RefPicList)k; + const Picture* refPic = pu.cu->slice->getRefPic(refId, pu.refIdx[refId]); + for (int compID = 0; compID < MAX_NUM_COMPONENT; compID++) + { + Mv cMv = pu.mv[refId]; + int mvshiftTemp = mvShift + getComponentScaleX((ComponentID)compID, pu.chromaFormat); + int filtersize = (compID == (COMPONENT_Y)) ? NTAPS_LUMA : NTAPS_CHROMA; + cMv += Mv(-(((filtersize >> 1) - 1) << mvshiftTemp), -(((filtersize >> 1) - 1) << mvshiftTemp)); + bool wrapRef = false; + if (pu.cs->sps->getWrapAroundEnabledFlag()) + { + wrapRef = wrapClipMv(cMv, pu.blocks[0].pos(), pu.blocks[0].size(), pu.cs->sps, pu.cs->pps); + } + else + { + clipMv(cMv, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps, *pu.cs->pps); + } - // point mc buffer to cetre point to avoid multiplication to reach each iteration to the begining - Pel *biLinearPredL0 = m_cYuvPredTempDMVRL0 + (DMVR_NUM_ITERATION * m_biLinearBufStride) + DMVR_NUM_ITERATION; - Pel *biLinearPredL1 = m_cYuvPredTempDMVRL1 + (DMVR_NUM_ITERATION * m_biLinearBufStride) + DMVR_NUM_ITERATION; + int width = pcYuvDst.bufs[compID].width + (filtersize - 1); + int height = pcYuvDst.bufs[compID].height + (filtersize - 1); - Position puPos = pu.lumaPos(); + CPelBuf refBuf; + Position recOffset = pu.blocks[compID].pos().offset(cMv.getHor() >> mvshiftTemp, cMv.getVer() >> mvshiftTemp); + refBuf = refPic->getRecoBuf(CompArea((ComponentID)compID, pu.chromaFormat, recOffset, pu.blocks[compID].size()), wrapRef); - int bd = pu.cs->slice->getClpRngs().comp[COMPONENT_Y].bd; + JVET_J0090_SET_REF_PICTURE(refPic, (ComponentID)compID); + for (int row = 0; row < height; row++) + { + for (int col = 0; col < width; col++) + { + JVET_J0090_CACHE_ACCESS(((Pel *)refBuf.buf) + row * refBuf.stride + col, __FILE__, __LINE__); + } + } + } + } + JVET_J0090_SET_CACHE_ENABLE(false); +#endif { int num = 0; + int scaleX = getComponentScaleX(COMPONENT_Cb, pu.chromaFormat); + int scaleY = getComponentScaleY(COMPONENT_Cb, pu.chromaFormat); + m_biLinearBufStride = (dx + (2 * DMVR_NUM_ITERATION)); + // point mc buffer to cetre point to avoid multiplication to reach each iteration to the begining + Pel *biLinearPredL0 = m_cYuvPredTempDMVRL0 + (DMVR_NUM_ITERATION * m_biLinearBufStride) + DMVR_NUM_ITERATION; + Pel *biLinearPredL1 = m_cYuvPredTempDMVRL1 + (DMVR_NUM_ITERATION * m_biLinearBufStride) + DMVR_NUM_ITERATION; + + PredictionUnit subPu = pu; + subPu.UnitArea::operator=(UnitArea(pu.chromaFormat, Area(puPos.x, puPos.y, dx, dy))); + m_cYuvRefBuffDMVRL0 = (pu.chromaFormat == CHROMA_400 ? + PelUnitBuf(pu.chromaFormat, PelBuf(m_cRefSamplesDMVRL0[0], pcYuvDst.Y())) : + PelUnitBuf(pu.chromaFormat, PelBuf(m_cRefSamplesDMVRL0[0], pcYuvDst.Y()), + PelBuf(m_cRefSamplesDMVRL0[1], pcYuvDst.Cb()), PelBuf(m_cRefSamplesDMVRL0[2], pcYuvDst.Cr()))); + m_cYuvRefBuffDMVRL0 = m_cYuvRefBuffDMVRL0.subBuf(UnitAreaRelative(pu, subPu)); + + m_cYuvRefBuffDMVRL1 = (pu.chromaFormat == CHROMA_400 ? + PelUnitBuf(pu.chromaFormat, PelBuf(m_cRefSamplesDMVRL1[0], pcYuvDst.Y())) : + PelUnitBuf(pu.chromaFormat, PelBuf(m_cRefSamplesDMVRL1[0], pcYuvDst.Y()), PelBuf(m_cRefSamplesDMVRL1[1], pcYuvDst.Cb()), + PelBuf(m_cRefSamplesDMVRL1[2], pcYuvDst.Cr()))); + m_cYuvRefBuffDMVRL1 = m_cYuvRefBuffDMVRL1.subBuf(UnitAreaRelative(pu, subPu)); + + PelUnitBuf srcPred0 = (pu.chromaFormat == CHROMA_400 ? + PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[0][0], pcYuvDst.Y())) : + PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[0][0], pcYuvDst.Y()), PelBuf(m_acYuvPred[0][1], pcYuvDst.Cb()), PelBuf(m_acYuvPred[0][2], pcYuvDst.Cr()))); + PelUnitBuf srcPred1 = (pu.chromaFormat == CHROMA_400 ? + PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[1][0], pcYuvDst.Y())) : + PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[1][0], pcYuvDst.Y()), PelBuf(m_acYuvPred[1][1], pcYuvDst.Cb()), PelBuf(m_acYuvPred[1][2], pcYuvDst.Cr()))); + + srcPred0 = srcPred0.subBuf(UnitAreaRelative(pu, subPu)); + srcPred1 = srcPred1.subBuf(UnitAreaRelative(pu, subPu)); + int yStart = 0; for (int y = puPos.y; y < (puPos.y + pu.lumaSize().height); y = y + dy, yStart = yStart + dy) { for (int x = puPos.x, xStart = 0; x < (puPos.x + pu.lumaSize().width); x = x + dx, xStart = xStart + dx) { + PredictionUnit subPu = pu; + subPu.UnitArea::operator=(UnitArea(pu.chromaFormat, Area(x, y, dx, dy))); + xPrefetch(subPu, m_cYuvRefBuffDMVRL0, REF_PIC_LIST_0, 1); + xPrefetch(subPu, m_cYuvRefBuffDMVRL1, REF_PIC_LIST_1, 1); + + xinitMC(subPu, clpRngs); + uint64_t minCost = MAX_UINT64; bool notZeroCost = true; int16_t totalDeltaMV[2] = { 0,0 }; @@ -1691,19 +2142,17 @@ void InterPrediction::xProcessDMVR(PredictionUnit& pu, PelUnitBuf &pcYuvDst, con m_SADsArray[i] = MAX_UINT64; } pSADsArray = &m_SADsArray[(((2 * DMVR_NUM_ITERATION) + 1) * ((2 * DMVR_NUM_ITERATION) + 1)) >> 1]; - - Pel *addrL0Centre = biLinearPredL0 + yStart * m_biLinearBufStride + xStart; - Pel *addrL1Centre = biLinearPredL1 + yStart * m_biLinearBufStride + xStart; for (int i = 0; i < iterationCount; i++) { deltaMV[0] = 0; deltaMV[1] = 0; - Pel *addrL0 = addrL0Centre + totalDeltaMV[0] + (totalDeltaMV[1] * m_biLinearBufStride); - Pel *addrL1 = addrL1Centre - totalDeltaMV[0] - (totalDeltaMV[1] * m_biLinearBufStride); + Pel *addrL0 = biLinearPredL0 + totalDeltaMV[0] + (totalDeltaMV[1] * m_biLinearBufStride); + Pel *addrL1 = biLinearPredL1 - totalDeltaMV[0] - (totalDeltaMV[1] * m_biLinearBufStride); if (i == 0) { minCost = xDMVRCost(clpRngs.comp[COMPONENT_Y].bd, addrL0, m_biLinearBufStride, addrL1, m_biLinearBufStride, dx, dy); - if (minCost < ((4 * dx * (dy >> 1/*for alternate line*/)))) + minCost -= (minCost >>2); + if (minCost < (dx * dy)) { notZeroCost = false; break; @@ -1727,58 +2176,47 @@ void InterPrediction::xProcessDMVR(PredictionUnit& pu, PelUnitBuf &pcYuvDst, con pSADsArray += ((deltaMV[1] * (((2 * DMVR_NUM_ITERATION) + 1))) + deltaMV[0]); } + bioAppliedType[num] = (minCost < bioEnabledThres) ? false : bioApplied; totalDeltaMV[0] = (totalDeltaMV[0] << mvShift); totalDeltaMV[1] = (totalDeltaMV[1] << mvShift); xDMVRSubPixelErrorSurface(notZeroCost, totalDeltaMV, deltaMV, pSADsArray); pu.mvdL0SubPu[num] = Mv(totalDeltaMV[0], totalDeltaMV[1]); + PelUnitBuf subPredBuf = pcYuvDst.subBuf(UnitAreaRelative(pu, subPu)); - num++; - } - } - } + bool blockMoved = false; + if (pu.mvdL0SubPu[num] != Mv(0, 0)) + { + blockMoved = true; + xPrefetch(subPu, m_cYuvRefBuffDMVRL0, REF_PIC_LIST_0, 0); + xPrefetch(subPu, m_cYuvRefBuffDMVRL1, REF_PIC_LIST_1, 0); + xPad(subPu, m_cYuvRefBuffDMVRL0, REF_PIC_LIST_0); + xPad(subPu, m_cYuvRefBuffDMVRL1, REF_PIC_LIST_1); + } - { - PredictionUnit subPu = pu; - subPu.UnitArea::operator=(UnitArea(pu.chromaFormat, Area(puPos.x, puPos.y, dx, dy))); - PelUnitBuf m_cYuvRefBuffSubCuDMVRL0; - PelUnitBuf m_cYuvRefBuffSubCuDMVRL1; - PelUnitBuf srcPred0 = (pu.chromaFormat == CHROMA_400 ? - PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[0][0], pcYuvDst.Y())) : - PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[0][0], pcYuvDst.Y()), PelBuf(m_acYuvPred[0][1], pcYuvDst.Cb()), PelBuf(m_acYuvPred[0][2], pcYuvDst.Cr()))); - PelUnitBuf srcPred1 = (pu.chromaFormat == CHROMA_400 ? - PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[1][0], pcYuvDst.Y())) : - PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[1][0], pcYuvDst.Y()), PelBuf(m_acYuvPred[1][1], pcYuvDst.Cb()), PelBuf(m_acYuvPred[1][2], pcYuvDst.Cr()))); + int dstStride[MAX_NUM_COMPONENT] = { pcYuvDst.bufs[COMPONENT_Y].stride, pcYuvDst.bufs[COMPONENT_Cb].stride, pcYuvDst.bufs[COMPONENT_Cr].stride }; + subPu.mv[0] = mergeMv[REF_PIC_LIST_0] + pu.mvdL0SubPu[num]; + subPu.mv[1] = mergeMv[REF_PIC_LIST_1] - pu.mvdL0SubPu[num]; - srcPred0 = srcPred0.subBuf(UnitAreaRelative(pu, subPu)); - srcPred1 = srcPred1.subBuf(UnitAreaRelative(pu, subPu)); - PelUnitBuf subPredBuf = pcYuvDst.subBuf(UnitAreaRelative(pu, subPu)); + subPu.mv[0].clipToStorageBitDepth(); + subPu.mv[1].clipToStorageBitDepth(); - int x = 0, y = 0; - int xStart = 0, yStart = 0; - int num = 0; + xFinalPaddedMCForDMVR(subPu, srcPred0, srcPred1, m_cYuvRefBuffDMVRL0, m_cYuvRefBuffDMVRL1, bioAppliedType[num], mergeMv + , blockMoved + ); - int dstStride[MAX_NUM_COMPONENT] = { pcYuvDst.bufs[COMPONENT_Y].stride, pcYuvDst.bufs[COMPONENT_Cb].stride, pcYuvDst.bufs[COMPONENT_Cr].stride }; - for (y = puPos.y; y < (puPos.y + pu.lumaSize().height); y = y + dy, yStart = yStart + dy) - { - for (x = puPos.x, xStart = 0; x < (puPos.x + pu.lumaSize().width); x = x + dx, xStart = xStart + dx) - { - subPu.UnitArea::operator=(UnitArea(pu.chromaFormat, Area(x, y, dx, dy))); + subPredBuf.bufs[COMPONENT_Y].buf = pcYuvDst.bufs[COMPONENT_Y].buf + xStart + yStart * dstStride[COMPONENT_Y]; - subPu.mv[0] = mergeMv[REF_PIC_LIST_0] + pu.mvdL0SubPu[num]; - subPu.mv[1] = mergeMv[REF_PIC_LIST_1] - pu.mvdL0SubPu[num]; - m_cYuvRefBuffSubCuDMVRL0 = m_cYuvRefBuffDMVRL0.subBuf(UnitAreaRelative(pu, subPu)); - m_cYuvRefBuffSubCuDMVRL1 = m_cYuvRefBuffDMVRL1.subBuf(UnitAreaRelative(pu, subPu)); - xFinalPaddedMCForDMVR(subPu, srcPred0, srcPred1, m_cYuvRefBuffSubCuDMVRL0, m_cYuvRefBuffSubCuDMVRL1, bioApplied, mergeMv); - - subPredBuf.bufs[COMPONENT_Y].buf = pcYuvDst.bufs[COMPONENT_Y].buf + xStart + yStart * dstStride[COMPONENT_Y]; - subPredBuf.bufs[COMPONENT_Cb].buf = pcYuvDst.bufs[COMPONENT_Cb].buf + (xStart >> 1) + ((yStart >> 1) * dstStride[COMPONENT_Cb]); - subPredBuf.bufs[COMPONENT_Cr].buf = pcYuvDst.bufs[COMPONENT_Cr].buf + (xStart >> 1) + ((yStart >> 1) * dstStride[COMPONENT_Cr]); - xWeightedAverage(subPu, srcPred0, srcPred1, subPredBuf, subPu.cu->slice->getSPS()->getBitDepths(), subPu.cu->slice->clpRngs(), bioApplied); + subPredBuf.bufs[COMPONENT_Cb].buf = pcYuvDst.bufs[COMPONENT_Cb].buf + (xStart >> scaleX) + ((yStart >> scaleY) * dstStride[COMPONENT_Cb]); + + subPredBuf.bufs[COMPONENT_Cr].buf = pcYuvDst.bufs[COMPONENT_Cr].buf + (xStart >> scaleX) + ((yStart >> scaleY) * dstStride[COMPONENT_Cr]); + + xWeightedAverage(subPu, srcPred0, srcPred1, subPredBuf, subPu.cu->slice->getSPS()->getBitDepths(), subPu.cu->slice->clpRngs(), bioAppliedType[num]); num++; } } } + JVET_J0090_SET_CACHE_ENABLE(true); } #if JVET_J0090_MEMORY_BANDWITH_MEASURE void InterPrediction::cacheAssign( CacheModel *cache ) @@ -1789,4 +2227,241 @@ void InterPrediction::cacheAssign( CacheModel *cache ) } #endif -//! \} +void InterPrediction::xFillIBCBuffer(CodingUnit &cu) +{ + for (auto &currPU : CU::traverseTUs(cu)) + { + for (const CompArea &area : currPU.blocks) + { + if (!area.valid()) + continue; + + const unsigned int lcuWidth = cu.cs->slice->getSPS()->getMaxCUWidth(); + const int shiftSampleHor = ::getComponentScaleX(area.compID, cu.chromaFormat); + const int shiftSampleVer = ::getComponentScaleY(area.compID, cu.chromaFormat); + const int ctuSizeLog2Ver = floorLog2(lcuWidth) - shiftSampleVer; + const int pux = area.x & ((m_IBCBufferWidth >> shiftSampleHor) - 1); + const int puy = area.y & (( 1 << ctuSizeLog2Ver ) - 1); + const CompArea dstArea = CompArea(area.compID, cu.chromaFormat, Position(pux, puy), Size(area.width, area.height)); + CPelBuf srcBuf = cu.cs->getRecoBuf(area); + PelBuf dstBuf = m_IBCBuffer.getBuf(dstArea); + + dstBuf.copyFrom(srcBuf); + } + } +} + +void InterPrediction::xIntraBlockCopy(PredictionUnit &pu, PelUnitBuf &predBuf, const ComponentID compID) +{ + const unsigned int lcuWidth = pu.cs->slice->getSPS()->getMaxCUWidth(); + const int shiftSampleHor = ::getComponentScaleX(compID, pu.chromaFormat); + const int shiftSampleVer = ::getComponentScaleY(compID, pu.chromaFormat); + const int ctuSizeLog2Ver = floorLog2(lcuWidth) - shiftSampleVer; + pu.bv = pu.mv[REF_PIC_LIST_0]; + pu.bv.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_INT); + int refx, refy; + if (compID == COMPONENT_Y) + { + refx = pu.Y().x + pu.bv.hor; + refy = pu.Y().y + pu.bv.ver; + } + else + {//Cb or Cr + refx = pu.Cb().x + (pu.bv.hor >> shiftSampleHor); + refy = pu.Cb().y + (pu.bv.ver >> shiftSampleVer); + } + refx &= ((m_IBCBufferWidth >> shiftSampleHor) - 1); + refy &= ((1 << ctuSizeLog2Ver) - 1); + + if (refx + predBuf.bufs[compID].width <= (m_IBCBufferWidth >> shiftSampleHor)) + { + const CompArea srcArea = CompArea(compID, pu.chromaFormat, Position(refx, refy), Size(predBuf.bufs[compID].width, predBuf.bufs[compID].height)); + const CPelBuf refBuf = m_IBCBuffer.getBuf(srcArea); + predBuf.bufs[compID].copyFrom(refBuf); + } + else + {//wrap around + int width = (m_IBCBufferWidth >> shiftSampleHor) - refx; + CompArea srcArea = CompArea(compID, pu.chromaFormat, Position(refx, refy), Size(width, predBuf.bufs[compID].height)); + CPelBuf srcBuf = m_IBCBuffer.getBuf(srcArea); + PelBuf dstBuf = PelBuf(predBuf.bufs[compID].bufAt(Position(0, 0)), predBuf.bufs[compID].stride, Size(width, predBuf.bufs[compID].height)); + dstBuf.copyFrom(srcBuf); + + width = refx + predBuf.bufs[compID].width - (m_IBCBufferWidth >> shiftSampleHor); + srcArea = CompArea(compID, pu.chromaFormat, Position(0, refy), Size(width, predBuf.bufs[compID].height)); + srcBuf = m_IBCBuffer.getBuf(srcArea); + dstBuf = PelBuf(predBuf.bufs[compID].bufAt(Position((m_IBCBufferWidth >> shiftSampleHor) - refx, 0)), predBuf.bufs[compID].stride, Size(width, predBuf.bufs[compID].height)); + dstBuf.copyFrom(srcBuf); + } +} + +void InterPrediction::resetIBCBuffer(const ChromaFormat chromaFormatIDC, const int ctuSize) +{ + const UnitArea area = UnitArea(chromaFormatIDC, Area(0, 0, m_IBCBufferWidth, ctuSize)); + m_IBCBuffer.getBuf(area).fill(-1); +} + +void InterPrediction::resetVPDUforIBC(const ChromaFormat chromaFormatIDC, const int ctuSize, const int vSize, const int xPos, const int yPos) +{ + const UnitArea area = UnitArea(chromaFormatIDC, Area(xPos & (m_IBCBufferWidth - 1), yPos & (ctuSize - 1), vSize, vSize)); + m_IBCBuffer.getBuf(area).fill(-1); +} + +bool InterPrediction::isLumaBvValid(const int ctuSize, const int xCb, const int yCb, const int width, const int height, const int xBv, const int yBv) +{ + if(((yCb + yBv) & (ctuSize - 1)) + height > ctuSize) + { + return false; + } + int refTLx = xCb + xBv; + int refTLy = (yCb + yBv) & (ctuSize - 1); + PelBuf buf = m_IBCBuffer.Y(); + for(int x = 0; x < width; x += 4) + { + for(int y = 0; y < height; y += 4) + { + if(buf.at((x + refTLx) & (m_IBCBufferWidth - 1), y + refTLy) == -1) return false; + if(buf.at((x + 3 + refTLx) & (m_IBCBufferWidth - 1), y + refTLy) == -1) return false; + if(buf.at((x + refTLx) & (m_IBCBufferWidth - 1), y + 3 + refTLy) == -1) return false; + if(buf.at((x + 3 + refTLx) & (m_IBCBufferWidth - 1), y + 3 + refTLy) == -1) return false; + } + } + return true; +} + +bool InterPrediction::xPredInterBlkRPR( const std::pair<int, int>& scalingRatio, const PPS& pps, const CompArea &blk, const Picture* refPic, const Mv& mv, Pel* dst, const int dstStride, const bool bi, const bool wrapRef, const ClpRng& clpRng, const int filterIndex, const bool useAltHpelIf ) +{ + const ChromaFormat chFmt = blk.chromaFormat; + const ComponentID compID = blk.compID; + const bool rndRes = !bi; + + int shiftHor = MV_FRACTIONAL_BITS_INTERNAL + ::getComponentScaleX( compID, chFmt ); + int shiftVer = MV_FRACTIONAL_BITS_INTERNAL + ::getComponentScaleY( compID, chFmt ); + + int width = blk.width; + int height = blk.height; + CPelBuf refBuf; + +#if JVET_Q0487_SCALING_WINDOW_ISSUES + const bool scaled = refPic->isRefScaled( &pps ); +#else + const bool scaled = scalingRatio != SCALE_1X; +#endif + + if( scaled ) + { + int row, col; + int refPicWidth = refPic->getPicWidthInLumaSamples(); + int refPicHeight = refPic->getPicHeightInLumaSamples(); + + int xFilter = filterIndex; + int yFilter = filterIndex; + const int rprThreshold1 = ( 1 << SCALE_RATIO_BITS ) * 5 / 4; + const int rprThreshold2 = ( 1 << SCALE_RATIO_BITS ) * 7 / 4; + if( filterIndex == 0 ) + { + if( scalingRatio.first > rprThreshold2 ) + { + xFilter = 4; + } + else if( scalingRatio.first > rprThreshold1 ) + { + xFilter = 3; + } + + if( scalingRatio.second > rprThreshold2 ) + { + yFilter = 4; + } + else if( scalingRatio.second > rprThreshold1 ) + { + yFilter = 3; + } + } + + const int posShift = SCALE_RATIO_BITS - 4; + int stepX = ( scalingRatio.first + 8 ) >> 4; + int stepY = ( scalingRatio.second + 8 ) >> 4; + int64_t x0Int; + int64_t y0Int; + int offX = 1 << ( posShift - shiftHor - 1 ); + int offY = 1 << ( posShift - shiftVer - 1 ); + +#if JVET_Q0487_SCALING_WINDOW_ISSUES + const int64_t posX = ( ( blk.pos().x << ::getComponentScaleX( compID, chFmt ) ) - ( pps.getScalingWindow().getWindowLeftOffset() * SPS::getWinUnitX( chFmt ) ) ) >> ::getComponentScaleX( compID, chFmt ); + const int64_t posY = ( ( blk.pos().y << ::getComponentScaleY( compID, chFmt ) ) - ( pps.getScalingWindow().getWindowTopOffset() * SPS::getWinUnitY( chFmt ) ) ) >> ::getComponentScaleY( compID, chFmt ); +#else + const int64_t posX = ( ( blk.pos().x << ::getComponentScaleX( compID, chFmt ) ) - pps.getScalingWindow().getWindowLeftOffset() ) >> ::getComponentScaleX( compID, chFmt ); + const int64_t posY = ( ( blk.pos().y << ::getComponentScaleY( compID, chFmt ) ) - pps.getScalingWindow().getWindowTopOffset() ) >> ::getComponentScaleY( compID, chFmt ); +#endif + + int addX = isLuma( compID ) ? 0 : int( 1 - refPic->cs->sps->getHorCollocatedChromaFlag() ) * 8 * ( scalingRatio.first - SCALE_1X.first ); + int addY = isLuma( compID ) ? 0 : int( 1 - refPic->cs->sps->getVerCollocatedChromaFlag() ) * 8 * ( scalingRatio.second - SCALE_1X.second ); + + x0Int = ( ( posX << ( 4 + ::getComponentScaleX( compID, chFmt ) ) ) + mv.getHor() ) * (int64_t)scalingRatio.first + addX; +#if JVET_Q0487_SCALING_WINDOW_ISSUES + x0Int = SIGN( x0Int ) * ( ( llabs( x0Int ) + ( (long long)1 << ( 7 + ::getComponentScaleX( compID, chFmt ) ) ) ) >> ( 8 + ::getComponentScaleX( compID, chFmt ) ) ) + ( ( refPic->getScalingWindow().getWindowLeftOffset() * SPS::getWinUnitX( chFmt ) ) << ( ( posShift - ::getComponentScaleX( compID, chFmt ) ) ) ); +#else + x0Int = SIGN( x0Int ) * ( ( llabs( x0Int ) + ( (long long)1 << ( 7 + ::getComponentScaleX( compID, chFmt ) ) ) ) >> ( 8 + ::getComponentScaleX( compID, chFmt ) ) ) + ( refPic->getScalingWindow().getWindowLeftOffset() << ( ( posShift - ::getComponentScaleX( compID, chFmt ) ) ) ); +#endif + + y0Int = ( ( posY << ( 4 + ::getComponentScaleY( compID, chFmt ) ) ) + mv.getVer() ) * (int64_t)scalingRatio.second + addY; +#if JVET_Q0487_SCALING_WINDOW_ISSUES + y0Int = SIGN( y0Int ) * ( ( llabs( y0Int ) + ( (long long)1 << ( 7 + ::getComponentScaleY( compID, chFmt ) ) ) ) >> ( 8 + ::getComponentScaleY( compID, chFmt ) ) ) + ( ( refPic->getScalingWindow().getWindowTopOffset() * SPS::getWinUnitY( chFmt ) ) << ( ( posShift - ::getComponentScaleY( compID, chFmt ) ) ) ); +#else + y0Int = SIGN( y0Int ) * ( ( llabs( y0Int ) + ( (long long)1 << ( 7 + ::getComponentScaleY( compID, chFmt ) ) ) ) >> ( 8 + ::getComponentScaleY( compID, chFmt ) ) ) + ( refPic->getScalingWindow().getWindowTopOffset() << ( ( posShift - ::getComponentScaleY( compID, chFmt ) ) ) ); +#endif + + const int extSize = isLuma( compID ) ? 1 : 2; + int vFilterSize = isLuma( compID ) ? NTAPS_LUMA : NTAPS_CHROMA; + + int yInt0 = ( (int32_t)y0Int + offY ) >> posShift; + yInt0 = std::min( std::max( -(NTAPS_LUMA / 2), yInt0 ), ( refPicHeight >> ::getComponentScaleY( compID, chFmt ) ) + (NTAPS_LUMA / 2) ); + + int xInt0 = ( (int32_t)x0Int + offX ) >> posShift; + xInt0 = std::min( std::max( -(NTAPS_LUMA / 2), xInt0 ), ( refPicWidth >> ::getComponentScaleX( compID, chFmt ) ) + (NTAPS_LUMA / 2) ); + + int refHeight = ((((int32_t)y0Int + (height-1) * stepY) + offY ) >> posShift) - ((((int32_t)y0Int + 0 * stepY) + offY ) >> posShift) + 1; + refHeight = std::max<int>( 1, refHeight ); + + CHECK( MAX_CU_SIZE * MAX_SCALING_RATIO + 16 < refHeight + vFilterSize - 1 + extSize, "Buffer is not large enough, increase MAX_SCALING_RATIO" ); + + Pel buffer[( MAX_CU_SIZE + 16 ) * ( MAX_CU_SIZE * MAX_SCALING_RATIO + 16 )]; + int tmpStride = width; + int xInt = 0, yInt = 0; + + for( col = 0; col < width; col++ ) + { + int posX = (int32_t)x0Int + col * stepX; + xInt = ( posX + offX ) >> posShift; + xInt = std::min( std::max( -(NTAPS_LUMA / 2), xInt ), ( refPicWidth >> ::getComponentScaleX( compID, chFmt ) ) + (NTAPS_LUMA / 2) ); + int xFrac = ( ( posX + offX ) >> ( posShift - shiftHor ) ) & ( ( 1 << shiftHor ) - 1 ); + + CHECK( xInt0 > xInt, "Wrong horizontal starting point" ); + + Position offset = Position( xInt, yInt0 ); + refBuf = refPic->getRecoBuf( CompArea( compID, chFmt, offset, Size( 1, refHeight ) ), wrapRef ); + Pel* tempBuf = buffer + col; + + m_if.filterHor( compID, (Pel*)refBuf.buf - ( ( vFilterSize >> 1 ) - 1 ) * refBuf.stride, refBuf.stride, tempBuf, tmpStride, 1, refHeight + vFilterSize - 1 + extSize, xFrac, false, chFmt, clpRng, xFilter, false, useAltHpelIf ); + } + + for( row = 0; row < height; row++ ) + { + int posY = (int32_t)y0Int + row * stepY; + yInt = ( posY + offY ) >> posShift; + yInt = std::min( std::max( -(NTAPS_LUMA / 2), yInt ), ( refPicHeight >> ::getComponentScaleY( compID, chFmt ) ) + (NTAPS_LUMA / 2) ); + int yFrac = ( ( posY + offY ) >> ( posShift - shiftVer ) ) & ( ( 1 << shiftVer ) - 1 ); + + CHECK( yInt0 > yInt, "Wrong vertical starting point" ); + + Pel* tempBuf = buffer + ( yInt - yInt0 ) * tmpStride; + + JVET_J0090_SET_CACHE_ENABLE( false ); + m_if.filterVer( compID, tempBuf + ( ( vFilterSize >> 1 ) - 1 ) * tmpStride, tmpStride, dst + row * dstStride, dstStride, width, 1, yFrac, false, rndRes, chFmt, clpRng, yFilter, false, useAltHpelIf ); + JVET_J0090_SET_CACHE_ENABLE( true ); + } + } + + return scaled; +} diff --git a/source/Lib/CommonLib/InterPrediction.h b/source/Lib/CommonLib/InterPrediction.h index 56b45faa2e72a52c99c526c191a492c71ad7687a..1bc16ff8d7b567029f5eaff532713e6f850cb822 100644 --- a/source/Lib/CommonLib/InterPrediction.h +++ b/source/Lib/CommonLib/InterPrediction.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -63,17 +63,8 @@ class Mv; class InterPrediction : public WeightPrediction { private: - int m_shareState; - Distortion m_bioDistThres; - Distortion m_bioSubBlkDistThres; - Distortion m_bioPredSubBlkDist[MAX_NUM_PARTS_IN_CTU]; - int m_dotProduct1[BIO_TEMP_BUFFER_SIZE]; - int m_dotProduct2[BIO_TEMP_BUFFER_SIZE]; - int m_dotProduct3[BIO_TEMP_BUFFER_SIZE]; - int m_dotProduct5[BIO_TEMP_BUFFER_SIZE]; - int m_dotProduct6[BIO_TEMP_BUFFER_SIZE]; protected: InterpolationFilter m_if; @@ -108,23 +99,32 @@ protected: Mv(-2, 2), Mv(-1, 2), Mv(0, 2), Mv(1, 2), Mv(2, 2) }; uint64_t m_SADsArray[((2 * DMVR_NUM_ITERATION) + 1) * ((2 * DMVR_NUM_ITERATION) + 1)]; + Pel m_gradBuf[2][(AFFINE_MIN_BLOCK_SIZE + 2) * (AFFINE_MIN_BLOCK_SIZE + 2)]; + int m_dMvBuf[2][16 * 2]; + bool m_skipPROF; + bool m_encOnly; + bool m_isBi; + Pel* m_gradX0; Pel* m_gradY0; Pel* m_gradX1; Pel* m_gradY1; bool m_subPuMC; + int m_IBCBufferWidth; + PelStorage m_IBCBuffer; + void xIntraBlockCopy (PredictionUnit &pu, PelUnitBuf &predBuf, const ComponentID compID); int rightShiftMSB(int numer, int denom); void applyBiOptFlow(const PredictionUnit &pu, const CPelUnitBuf &yuvSrc0, const CPelUnitBuf &yuvSrc1, const int &refIdx0, const int &refIdx1, PelUnitBuf &yuvDst, const BitDepths &clipBitDepths); - bool xCalcBiPredSubBlkDist(const PredictionUnit &pu, const Pel* yuvSrc0, const int src0Stride, const Pel* yuvSrc1, const int src1Stride, const BitDepths &clipBitDepths); void xPredInterUni ( const PredictionUnit& pu, const RefPicList& eRefPicList, PelUnitBuf& pcYuvPred, const bool& bi , const bool& bioApplied , const bool luma, const bool chroma ); - void xPredInterBi ( PredictionUnit& pu, PelUnitBuf &pcYuvPred ); + void xPredInterBi ( PredictionUnit& pu, PelUnitBuf &pcYuvPred, const bool luma = true, const bool chroma = true, PelUnitBuf* yuvPredTmp = NULL ); void xPredInterBlk ( const ComponentID& compID, const PredictionUnit& pu, const Picture* refPic, const Mv& _mv, PelUnitBuf& dstPic, const bool& bi, const ClpRng& clpRng , const bool& bioApplied , bool isIBC + , const std::pair<int, int> scalingRatio = SCALE_1X , SizeType dmvrWidth = 0 , SizeType dmvrHeight = 0 , bool bilinearMC = false @@ -136,31 +136,33 @@ protected: void xBioGradFilter (Pel* pSrc, int srcStride, int width, int height, int gradStride, Pel* gradX, Pel* gradY, int bitDepth); void xCalcBIOPar (const Pel* srcY0Temp, const Pel* srcY1Temp, const Pel* gradX0, const Pel* gradX1, const Pel* gradY0, const Pel* gradY1, int* dotProductTemp1, int* dotProductTemp2, int* dotProductTemp3, int* dotProductTemp5, int* dotProductTemp6, const int src0Stride, const int src1Stride, const int gradStride, const int widthG, const int heightG, int bitDepth); void xCalcBlkGradient (int sx, int sy, int *arraysGx2, int *arraysGxGy, int *arraysGxdI, int *arraysGy2, int *arraysGydI, int &sGx2, int &sGy2, int &sGxGy, int &sGxdI, int &sGydI, int width, int height, int unitSize); - void xWeightedAverage ( const PredictionUnit& pu, const CPelUnitBuf& pcYuvSrc0, const CPelUnitBuf& pcYuvSrc1, PelUnitBuf& pcYuvDst, const BitDepths& clipBitDepths, const ClpRngs& clpRngs, const bool& bioApplied ); - void xPredAffineBlk( const ComponentID& compID, const PredictionUnit& pu, const Picture* refPic, const Mv* _mv, PelUnitBuf& dstPic, const bool& bi, const ClpRng& clpRng ); - + void xWeightedAverage ( const PredictionUnit& pu, const CPelUnitBuf& pcYuvSrc0, const CPelUnitBuf& pcYuvSrc1, PelUnitBuf& pcYuvDst, const BitDepths& clipBitDepths, const ClpRngs& clpRngs, const bool& bioApplied, const bool lumaOnly = false, const bool chromaOnly = false, PelUnitBuf* yuvDstTmp = NULL ); + void xPredAffineBlk ( const ComponentID& compID, const PredictionUnit& pu, const Picture* refPic, const Mv* _mv, PelUnitBuf& dstPic, const bool& bi, const ClpRng& clpRng, const bool genChromaMv = false, const std::pair<int, int> scalingRatio = SCALE_1X ); void xWeightedTriangleBlk ( const PredictionUnit &pu, const uint32_t width, const uint32_t height, const ComponentID compIdx, const bool splitDir, PelUnitBuf& predDst, PelUnitBuf& predSrc0, PelUnitBuf& predSrc1 ); static bool xCheckIdenticalMotion( const PredictionUnit& pu ); - void xSubPuMC(PredictionUnit& pu, PelUnitBuf& predBuf, const RefPicList &eRefPicList = REF_PIC_LIST_X); + void xSubPuMC(PredictionUnit& pu, PelUnitBuf& predBuf, const RefPicList &eRefPicList = REF_PIC_LIST_X, const bool luma = true, const bool chroma = true); + void xSubPuBio(PredictionUnit& pu, PelUnitBuf& predBuf, const RefPicList &eRefPicList = REF_PIC_LIST_X, PelUnitBuf* yuvDstTmp = NULL); void destroy(); MotionInfo m_SubPuMiBuf[(MAX_CU_SIZE * MAX_CU_SIZE) >> (MIN_CU_LOG2 << 1)]; - void xChromaMC(PredictionUnit &pu, PelUnitBuf& pcYuvPred); #if JVET_J0090_MEMORY_BANDWITH_MEASURE CacheModel *m_cacheModel; #endif + PelStorage m_colorTransResiBuf[3]; // 0-org; 1-act; 2-tmp + public: InterPrediction(); virtual ~InterPrediction(); - void init (RdCost* pcRdCost, ChromaFormat chromaFormatIDC); + void init (RdCost* pcRdCost, ChromaFormat chromaFormatIDC, const int ctuSize); // inter void motionCompensation (PredictionUnit &pu, PelUnitBuf& predBuf, const RefPicList &eRefPicList = REF_PIC_LIST_X , const bool luma = true, const bool chroma = true + , PelUnitBuf* predBufWOBIO = NULL ); void motionCompensation (PredictionUnit &pu, const RefPicList &eRefPicList = REF_PIC_LIST_X , const bool luma = true, const bool chroma = true @@ -171,9 +173,11 @@ public: void motionCompensation4Triangle( CodingUnit &cu, MergeCtx &triangleMrgCtx, const bool splitDir, const uint8_t candIdx0, const uint8_t candIdx1 ); void weightedTriangleBlk ( PredictionUnit &pu, const bool splitDir, int32_t channel, PelUnitBuf& predDst, PelUnitBuf& predSrc0, PelUnitBuf& predSrc1 ); - void xPrefetchPad(PredictionUnit& pu, PelUnitBuf &pcPad, RefPicList refId); + void xPrefetch(PredictionUnit& pu, PelUnitBuf &pcPad, RefPicList refId, bool forLuma); + void xPad(PredictionUnit& pu, PelUnitBuf &pcPad, RefPicList refId); void xFinalPaddedMCForDMVR(PredictionUnit& pu, PelUnitBuf &pcYuvSrc0, PelUnitBuf &pcYuvSrc1, PelUnitBuf &pcPad0, PelUnitBuf &pcPad1, const bool bioApplied , const Mv startMV[NUM_REF_PIC_LIST_01] + , bool blockMoved ); void xBIPMVRefine(int bd, Pel *pRefL0, Pel *pRefL1, uint64_t& minCost, int16_t *deltaMV, uint64_t *pSADsArray, int width, int height); uint64_t xDMVRCost(int bitDepth, Pel* pRef, uint32_t refStride, const Pel* pOrg, uint32_t orgStride, int width, int height); @@ -183,10 +187,13 @@ public: #if JVET_J0090_MEMORY_BANDWITH_MEASURE void cacheAssign( CacheModel *cache ); #endif - void setShareState(int shareStateIn) {m_shareState = shareStateIn;} -#if ENABLE_SPLIT_PARALLELISM - int getShareState() const { return m_shareState; } -#endif + static bool isSubblockVectorSpreadOverLimit( int a, int b, int c, int d, int predType ); + void xFillIBCBuffer(CodingUnit &cu); + void resetIBCBuffer(const ChromaFormat chromaFormatIDC, const int ctuSize); + void resetVPDUforIBC(const ChromaFormat chromaFormatIDC, const int ctuSize, const int vSize, const int xPos, const int yPos); + bool isLumaBvValid(const int ctuSize, const int xCb, const int yCb, const int width, const int height, const int xBv, const int yBv); + + bool xPredInterBlkRPR( const std::pair<int, int>& scalingRatio, const PPS& pps, const CompArea &blk, const Picture* refPic, const Mv& mv, Pel* dst, const int dstStride, const bool bi, const bool wrapRef, const ClpRng& clpRng, const int filterIndex, const bool useAltHpelIf = false ); }; //! \} diff --git a/source/Lib/CommonLib/InterpolationFilter.cpp b/source/Lib/CommonLib/InterpolationFilter.cpp index 01d7c284636a06f205eb5f3c938e7661b6ee8197..5d02b6da2a1f5e7717645b6259936994b7497775 100644 --- a/source/Lib/CommonLib/InterpolationFilter.cpp +++ b/source/Lib/CommonLib/InterpolationFilter.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -54,6 +54,25 @@ CacheModel* InterpolationFilter::m_cacheModel; // ==================================================================================================================== // Tables // ==================================================================================================================== +const TFilterCoeff InterpolationFilter::m_lumaFilter4x4[LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_LUMA] = +{ + { 0, 0, 0, 64, 0, 0, 0, 0 }, + { 0, 1, -3, 63, 4, -2, 1, 0 }, + { 0, 1, -5, 62, 8, -3, 1, 0 }, + { 0, 2, -8, 60, 13, -4, 1, 0 }, + { 0, 3, -10, 58, 17, -5, 1, 0 }, //1/4 + { 0, 3, -11, 52, 26, -8, 2, 0 }, + { 0, 2, -9, 47, 31, -10, 3, 0 }, + { 0, 3, -11, 45, 34, -10, 3, 0 }, + { 0, 3, -11, 40, 40, -11, 3, 0 }, //1/2 + { 0, 3, -10, 34, 45, -11, 3, 0 }, + { 0, 3, -10, 31, 47, -9, 2, 0 }, + { 0, 2, -8, 26, 52, -11, 3, 0 }, + { 0, 1, -5, 17, 58, -10, 3, 0 }, //3/4 + { 0, 1, -4, 13, 60, -8, 2, 0 }, + { 0, 1, -3, 8, 62, -5, 1, 0 }, + { 0, 1, -2, 4, 63, -3, 1, 0 } +}; const TFilterCoeff InterpolationFilter::m_lumaFilter[LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_LUMA] = { @@ -75,6 +94,49 @@ const TFilterCoeff InterpolationFilter::m_lumaFilter[LUMA_INTERPOLATION_FILTER_S { 0, 1, -2, 4, 63, -3, 1, 0 } }; +// 1.5x +const TFilterCoeff InterpolationFilter::m_lumaFilterRPR1[LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_LUMA] = +{ + { -1, -5, 17, 42, 17, -5, -1, 0 }, + { 0, -5, 15, 41, 19, -5, -1, 0 }, + { 0, -5, 13, 40, 21, -4, -1, 0 }, + { 0, -5, 11, 39, 24, -4, -2, 1 }, + { 0, -5, 9, 38, 26, -3, -2, 1 }, + { 0, -5, 7, 38, 28, -2, -3, 1 }, + { 1, -5, 5, 36, 30, -1, -3, 1 }, + { 1, -4, 3, 35, 32, 0, -4, 1 }, + { 1, -4, 2, 33, 33, 2, -4, 1 }, + { 1, -4, 0, 32, 35, 3, -4, 1 }, + { 1, -3, -1, 30, 36, 5, -5, 1 }, + { 1, -3, -2, 28, 38, 7, -5, 0 }, + { 1, -2, -3, 26, 38, 9, -5, 0 }, + { 1, -2, -4, 24, 39, 11, -5, 0 }, + { 0, -1, -4, 21, 40, 13, -5, 0 }, + { 0, -1, -5, 19, 41, 15, -5, 0 } +}; + +// 2x +const TFilterCoeff InterpolationFilter::m_lumaFilterRPR2[LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_LUMA] = +{ + { -4, 2, 20, 28, 20, 2, -4, 0 }, + { -4, 0, 19, 29, 21, 5, -4, -2 }, + { -4, -1, 18, 29, 22, 6, -4, -2 }, + { -4, -1, 16, 29, 23, 7, -4, -2 }, + { -4, -1, 16, 28, 24, 7, -4, -2 }, + { -4, -1, 14, 28, 25, 8, -4, -2 }, + { -3, -3, 14, 27, 26, 9, -3, -3 }, + { -3, -1, 12, 28, 25, 10, -4, -3 }, + { -3, -3, 11, 27, 27, 11, -3, -3 }, + { -3, -4, 10, 25, 28, 12, -1, -3 }, + { -3, -3, 9, 26, 27, 14, -3, -3 }, + { -2, -4, 8, 25, 28, 14, -1, -4 }, + { -2, -4, 7, 24, 28, 16, -1, -4 }, + { -2, -4, 7, 23, 29, 16, -1, -4 }, + { -2, -4, 6, 22, 29, 18, -1, -4 }, + { -2, -4, 5, 21, 29, 19, 0, -4 } +}; + +const TFilterCoeff InterpolationFilter::m_lumaAltHpelIFilter[NTAPS_LUMA] = { 0, 3, 9, 20, 20, 9, 3, 0 }; const TFilterCoeff InterpolationFilter::m_chromaFilter[CHROMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_CHROMA] = { { 0, 64, 0, 0 }, @@ -111,6 +173,80 @@ const TFilterCoeff InterpolationFilter::m_chromaFilter[CHROMA_INTERPOLATION_FILT { 0, 2, 63, -1 }, }; +//1.5x +const TFilterCoeff InterpolationFilter::m_chromaFilterRPR1[CHROMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_CHROMA] = +{ + { 12, 40, 12, 0 }, + { 11, 40, 13, 0 }, + { 10, 40, 15, -1 }, + { 9, 40, 16, -1 }, + { 8, 40, 17, -1 }, + { 8, 39, 18, -1 }, + { 7, 39, 19, -1 }, + { 6, 38, 21, -1 }, + { 5, 38, 22, -1 }, + { 4, 38, 23, -1 }, + { 4, 37, 24, -1 }, + { 3, 36, 25, 0 }, + { 3, 35, 26, 0 }, + { 2, 34, 28, 0 }, + { 2, 33, 29, 0 }, + { 1, 33, 30, 0 }, + { 1, 31, 31, 1 }, + { 0, 30, 33, 1 }, + { 0, 29, 33, 2 }, + { 0, 28, 34, 2 }, + { 0, 26, 35, 3 }, + { 0, 25, 36, 3 }, + { -1, 24, 37, 4 }, + { -1, 23, 38, 4 }, + { -1, 22, 38, 5 }, + { -1, 21, 38, 6 }, + { -1, 19, 39, 7 }, + { -1, 18, 39, 8 }, + { -1, 17, 40, 8 }, + { -1, 16, 40, 9 }, + { -1, 15, 40, 10 }, + { 0, 13, 40, 11 }, +}; + +//2x +const TFilterCoeff InterpolationFilter::m_chromaFilterRPR2[CHROMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_CHROMA] = +{ + { 17, 30, 17, 0 }, + { 17, 30, 18, -1 }, + { 16, 30, 18, 0 }, + { 16, 30, 18, 0 }, + { 15, 30, 18, 1 }, + { 14, 30, 18, 2 }, + { 13, 29, 19, 3 }, + { 13, 29, 19, 3 }, + { 12, 29, 20, 3 }, + { 11, 28, 21, 4 }, + { 10, 28, 22, 4 }, + { 10, 27, 22, 5 }, + { 9, 27, 23, 5 }, + { 9, 26, 24, 5 }, + { 8, 26, 24, 6 }, + { 7, 26, 25, 6 }, + { 7, 25, 25, 7 }, + { 6, 25, 26, 7 }, + { 6, 24, 26, 8 }, + { 5, 24, 26, 9 }, + { 5, 23, 27, 9 }, + { 5, 22, 27, 10 }, + { 4, 22, 28, 10 }, + { 4, 21, 28, 11 }, + { 3, 20, 29, 12 }, + { 3, 19, 29, 13 }, + { 3, 19, 29, 13 }, + { 2, 18, 30, 14 }, + { 1, 18, 30, 15 }, + { 0, 18, 30, 16 }, + { 0, 18, 30, 16 }, + { -1, 18, 30, 17 } +}; + const TFilterCoeff InterpolationFilter::m_bilinearFilter[LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_BILINEAR] = { { 64, 0, }, @@ -191,6 +327,7 @@ InterpolationFilter::InterpolationFilter() m_filterCopy[1][0] = filterCopy<true, false>; m_filterCopy[1][1] = filterCopy<true, true>; + m_weightedTriangleBlk = xWeightedTriangleBlk; } @@ -226,11 +363,7 @@ void InterpolationFilter::filterCopy( const ClpRng& clpRng, const Pel *src, int { for (col = 0; col < width; col++) { -#if HM_JEM_CLIP_PEL dst[col] = src[col]; -#else - dst[col] = ClipPel( src[col], clpRng ); -#endif JVET_J0090_CACHE_ACCESS( &src[col], __FILE__, __LINE__ ); } @@ -559,29 +692,61 @@ void InterpolationFilter::filterVer(const ClpRng& clpRng, Pel const *src, int sr * \param fmt Chroma format * \param bitDepth Bit depth */ -void InterpolationFilter::filterHor( const ComponentID compID, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, int frac, bool isLast, const ChromaFormat fmt, const ClpRng& clpRng, int nFilterIdx, bool biMCForDMVR) +void InterpolationFilter::filterHor(const ComponentID compID, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, int frac, bool isLast, const ChromaFormat fmt, const ClpRng& clpRng, int nFilterIdx, bool biMCForDMVR, bool useAltHpelIf) { - if( frac == 0 ) + if( frac == 0 && nFilterIdx < 2 ) { - m_filterCopy[true][isLast](clpRng, src, srcStride, dst, dstStride, width, height, biMCForDMVR); + m_filterCopy[true][isLast]( clpRng, src, srcStride, dst, dstStride, width, height, biMCForDMVR ); } else if( isLuma( compID ) ) { CHECK( frac < 0 || frac >= LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS, "Invalid fraction" ); if( nFilterIdx == 1 ) { - filterHor<NTAPS_BILINEAR>(clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_bilinearFilterPrec4[frac], biMCForDMVR); + filterHor<NTAPS_BILINEAR>( clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_bilinearFilterPrec4[frac], biMCForDMVR ); + } + else if( nFilterIdx == 2 ) + { + filterHor<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_lumaFilter4x4[frac], biMCForDMVR ); + } + else if( nFilterIdx == 3 ) + { + filterHor<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_lumaFilterRPR1[frac], biMCForDMVR ); + } + else if( nFilterIdx == 4 ) + { + filterHor<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_lumaFilterRPR2[frac], biMCForDMVR ); + } + else if( frac == 8 && useAltHpelIf ) + { + filterHor<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_lumaAltHpelIFilter, biMCForDMVR ); + } + else if( ( width == 4 && height == 4 ) || ( width == 4 && height == ( 4 + NTAPS_LUMA - 1 ) ) ) + { + filterHor<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_lumaFilter4x4[frac], biMCForDMVR ); } else { - filterHor<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_lumaFilter[frac], biMCForDMVR); + filterHor<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_lumaFilter[frac], biMCForDMVR ); + } } else { const uint32_t csx = getComponentScaleX( compID, fmt ); CHECK( frac < 0 || csx >= 2 || ( frac << ( 1 - csx ) ) >= CHROMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS, "Invalid fraction" ); - filterHor<NTAPS_CHROMA>( clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_chromaFilter[frac << ( 1 - csx )], biMCForDMVR); + if( nFilterIdx == 3 ) + { + filterHor<NTAPS_CHROMA>( clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_chromaFilterRPR1[frac << ( 1 - csx )], biMCForDMVR ); + } + else if( nFilterIdx == 4 ) + { + filterHor<NTAPS_CHROMA>( clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_chromaFilterRPR2[frac << ( 1 - csx )], biMCForDMVR ); + } + else + { + filterHor<NTAPS_CHROMA>( clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_chromaFilter[frac << ( 1 - csx )], biMCForDMVR ); + } } } @@ -602,32 +767,159 @@ void InterpolationFilter::filterHor( const ComponentID compID, Pel const *src, i * \param fmt Chroma format * \param bitDepth Bit depth */ -void InterpolationFilter::filterVer( const ComponentID compID, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, int frac, bool isFirst, bool isLast, const ChromaFormat fmt, const ClpRng& clpRng, int nFilterIdx, bool biMCForDMVR) +void InterpolationFilter::filterVer(const ComponentID compID, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, int frac, bool isFirst, bool isLast, const ChromaFormat fmt, const ClpRng& clpRng, int nFilterIdx, bool biMCForDMVR, bool useAltHpelIf) { - if( frac == 0 ) + if( frac == 0 && nFilterIdx < 2 ) { - m_filterCopy[isFirst][isLast](clpRng, src, srcStride, dst, dstStride, width, height, biMCForDMVR); + m_filterCopy[isFirst][isLast]( clpRng, src, srcStride, dst, dstStride, width, height, biMCForDMVR ); } else if( isLuma( compID ) ) { CHECK( frac < 0 || frac >= LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS, "Invalid fraction" ); - if (nFilterIdx == 1) + if( nFilterIdx == 1 ) + { + filterVer<NTAPS_BILINEAR>( clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_bilinearFilterPrec4[frac], biMCForDMVR ); + } + else if( nFilterIdx == 2 ) + { + filterVer<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_lumaFilter4x4[frac], biMCForDMVR ); + } + else if( nFilterIdx == 3 ) { - filterVer<NTAPS_BILINEAR>(clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_bilinearFilterPrec4[frac], biMCForDMVR); + filterVer<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_lumaFilterRPR1[frac], biMCForDMVR ); + } + else if( nFilterIdx == 4 ) + { + filterVer<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_lumaFilterRPR2[frac], biMCForDMVR ); + } + else if( frac == 8 && useAltHpelIf ) + { + filterVer<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_lumaAltHpelIFilter, biMCForDMVR ); + } + else if( width == 4 && height == 4 ) + { + filterVer<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_lumaFilter4x4[frac], biMCForDMVR ); } else { - filterVer<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_lumaFilter[frac], biMCForDMVR); + filterVer<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_lumaFilter[frac], biMCForDMVR ); } } else { const uint32_t csy = getComponentScaleY( compID, fmt ); CHECK( frac < 0 || csy >= 2 || ( frac << ( 1 - csy ) ) >= CHROMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS, "Invalid fraction" ); - filterVer<NTAPS_CHROMA>(clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_chromaFilter[frac << (1 - csy)], biMCForDMVR); + if( nFilterIdx == 3 ) + { + filterVer<NTAPS_CHROMA>( clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_chromaFilterRPR1[frac << ( 1 - csy )], biMCForDMVR ); + } + else if( nFilterIdx == 4 ) + { + filterVer<NTAPS_CHROMA>( clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_chromaFilterRPR2[frac << ( 1 - csy )], biMCForDMVR ); + } + else + { + filterVer<NTAPS_CHROMA>( clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_chromaFilter[frac << ( 1 - csy )], biMCForDMVR ); + } } } +void InterpolationFilter::xWeightedTriangleBlk( const PredictionUnit &pu, const uint32_t width, const uint32_t height, const ComponentID compIdx, const bool splitDir, PelUnitBuf& predDst, PelUnitBuf& predSrc0, PelUnitBuf& predSrc1 ) +{ + Pel* dst = predDst .get(compIdx).buf; + Pel* src0 = predSrc0.get(compIdx).buf; + Pel* src1 = predSrc1.get(compIdx).buf; + int32_t strideDst = predDst .get(compIdx).stride - width; + int32_t strideSrc0 = predSrc0.get(compIdx).stride - width; + int32_t strideSrc1 = predSrc1.get(compIdx).stride - width; + + const char log2WeightBase = 3; + const ClpRng clipRng = pu.cu->slice->clpRngs().comp[compIdx]; + const int32_t clipbd = clipRng.bd; + const int32_t shiftDefault = std::max<int>(2, (IF_INTERNAL_PREC - clipbd)); + const int32_t offsetDefault = (1<<(shiftDefault-1)) + IF_INTERNAL_OFFS; + const int32_t shiftWeighted = std::max<int>(2, (IF_INTERNAL_PREC - clipbd)) + log2WeightBase; + const int32_t offsetWeighted = (1 << (shiftWeighted - 1)) + (IF_INTERNAL_OFFS << log2WeightBase); + int32_t stepX = 1 << getComponentScaleX(compIdx, pu.chromaFormat); + int32_t stepY = 1 << getComponentScaleY(compIdx, pu.chromaFormat); + + int32_t widthY = width << getComponentScaleX(compIdx, pu.chromaFormat); + int32_t heightY = height << getComponentScaleY(compIdx, pu.chromaFormat); + + int32_t ratioWH = (widthY > heightY) ? (widthY / heightY) : 1; + int32_t ratioHW = (widthY > heightY) ? 1 : (heightY / widthY); + + int32_t weightedLength = 7; + int32_t weightedStartPos = (splitDir == 0) ? (0 - (weightedLength >> 1) * ratioWH) : (widthY - ((weightedLength + 1) >> 1) * ratioWH); + int32_t weightedEndPos = weightedStartPos + weightedLength * ratioWH - 1; + int32_t weightedPosoffset = ( splitDir == 0 ) ? ratioWH : -ratioWH; + + Pel tmpPelWeighted; + int32_t weightIdx; + int32_t x, y, tmpX, tmpY, tmpWeightedStart, tmpWeightedEnd; + for (y = 0; y < heightY; y += ratioHW) + { + if (y % stepY != 0) + { + weightedStartPos += weightedPosoffset; + weightedEndPos += weightedPosoffset; + continue; + } + for (tmpY = ratioHW; tmpY > 0; tmpY -= stepY) + { + for (x = 0; x < weightedStartPos; x += stepX) + { + *dst++ = ClipPel( rightShift( (splitDir == 0 ? *src1 : *src0) + offsetDefault, shiftDefault), clipRng ); + src0++; + src1++; + } + + tmpWeightedStart = std::max((int32_t)0, weightedStartPos); + tmpWeightedEnd = std::min(weightedEndPos, (int32_t)(widthY - 1)); + weightIdx = 1; + if( weightedStartPos < 0 ) + { + weightIdx += abs(weightedStartPos) / ratioWH; + } + for( x = tmpWeightedStart; x <= tmpWeightedEnd; x+= ratioWH ) + { + if (x % stepX != 0) + { + weightIdx++; + continue; + } + + for (tmpX = ratioWH; tmpX > 0; tmpX -= stepX) + { + tmpPelWeighted = Clip3(1, 7, weightIdx); + tmpPelWeighted = splitDir ? ( 8 - tmpPelWeighted ) : tmpPelWeighted; + *dst++ = ClipPel( rightShift( (tmpPelWeighted*(*src0++) + ((8 - tmpPelWeighted) * (*src1++)) + offsetWeighted), shiftWeighted ), clipRng ); + } + weightIdx ++; + } + + int32_t start = ((weightedEndPos + 1) % stepX != 0) ? (weightedEndPos + 2) : (weightedEndPos + 1); + for (x = start; x < widthY; x += stepX) + { + *dst++ = ClipPel( rightShift( (splitDir == 0 ? *src0 : *src1) + offsetDefault, shiftDefault ), clipRng ); + src0++; + src1++; + } + + dst += strideDst; + src0 += strideSrc0; + src1 += strideSrc1; + } + weightedStartPos += weightedPosoffset; + weightedEndPos += weightedPosoffset; + } +} + +void InterpolationFilter::weightedTriangleBlk(const PredictionUnit &pu, const uint32_t width, const uint32_t height, const ComponentID compIdx, const bool splitDir, PelUnitBuf& predDst, PelUnitBuf& predSrc0, PelUnitBuf& predSrc1) +{ + m_weightedTriangleBlk(pu, width, height, compIdx, splitDir, predDst, predSrc0, predSrc1); +} + /** * \brief turn on SIMD fuc * diff --git a/source/Lib/CommonLib/InterpolationFilter.h b/source/Lib/CommonLib/InterpolationFilter.h index 21f698e816e67198ffb6b671cda8bb869fdbb1e3..3c8422cc537fab474a4a4fb061b29c23320e1753 100644 --- a/source/Lib/CommonLib/InterpolationFilter.h +++ b/source/Lib/CommonLib/InterpolationFilter.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -55,8 +55,16 @@ */ class InterpolationFilter { + static const TFilterCoeff m_lumaFilter4x4[LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_LUMA]; +public: static const TFilterCoeff m_lumaFilter[LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_LUMA]; ///< Luma filter taps static const TFilterCoeff m_chromaFilter[CHROMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_CHROMA]; ///< Chroma filter taps + static const TFilterCoeff m_lumaFilterRPR1[LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_LUMA]; ///< Luma filter taps 1.5x + static const TFilterCoeff m_lumaFilterRPR2[LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_LUMA]; ///< Luma filter taps 2x + static const TFilterCoeff m_chromaFilterRPR1[CHROMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_CHROMA]; ///< Chroma filter taps 1.5x + static const TFilterCoeff m_chromaFilterRPR2[CHROMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_CHROMA]; ///< Chroma filter taps 2x +private: + static const TFilterCoeff m_lumaAltHpelIFilter[NTAPS_LUMA]; ///< Luma filter taps static const TFilterCoeff m_bilinearFilter[LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_BILINEAR]; ///< bilinear filter taps static const TFilterCoeff m_bilinearFilterPrec4[LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_BILINEAR]; ///< bilinear filter taps public: @@ -71,6 +79,8 @@ public: template<int N> void filterVer(const ClpRng& clpRng, Pel const* src, int srcStride, Pel *dst, int dstStride, int width, int height, bool isFirst, bool isLast, TFilterCoeff const *coeff, bool biMCForDMVR); + static void xWeightedTriangleBlk(const PredictionUnit &pu, const uint32_t width, const uint32_t height, const ComponentID compIdx, const bool splitDir, PelUnitBuf& predDst, PelUnitBuf& predSrc0, PelUnitBuf& predSrc1); + void weightedTriangleBlk(const PredictionUnit &pu, const uint32_t width, const uint32_t height, const ComponentID compIdx, const bool splitDir, PelUnitBuf& predDst, PelUnitBuf& predSrc0, PelUnitBuf& predSrc1); protected: #if JVET_J0090_MEMORY_BANDWITH_MEASURE static CacheModel* m_cacheModel; @@ -81,6 +91,7 @@ public: void( *m_filterHor[3][2][2] )( const ClpRng& clpRng, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, TFilterCoeff const *coeff, bool biMCForDMVR); void( *m_filterVer[3][2][2] )( const ClpRng& clpRng, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, TFilterCoeff const *coeff, bool biMCForDMVR); void( *m_filterCopy[2][2] ) ( const ClpRng& clpRng, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, bool biMCForDMVR); + void( *m_weightedTriangleBlk )(const PredictionUnit &pu, const uint32_t width, const uint32_t height, const ComponentID compIdx, const bool splitDir, PelUnitBuf& predDst, PelUnitBuf& predSrc0, PelUnitBuf& predSrc1); void initInterpolationFilter( bool enable ); #ifdef TARGET_SIMD_X86 @@ -88,8 +99,8 @@ public: template <X86_VEXT vext> void _initInterpolationFilterX86(); #endif - void filterHor(const ComponentID compID, Pel const* src, int srcStride, Pel *dst, int dstStride, int width, int height, int frac, bool isLast, const ChromaFormat fmt, const ClpRng& clpRng, int nFilterIdx = 0, bool biMCForDMVR = false); - void filterVer(const ComponentID compID, Pel const* src, int srcStride, Pel *dst, int dstStride, int width, int height, int frac, bool isFirst, bool isLast, const ChromaFormat fmt, const ClpRng& clpRng, int nFilterIdx = 0, bool biMCForDMVR = false); + void filterHor(const ComponentID compID, Pel const* src, int srcStride, Pel *dst, int dstStride, int width, int height, int frac, bool isLast, const ChromaFormat fmt, const ClpRng& clpRng, int nFilterIdx = 0, bool biMCForDMVR = false, bool useAltHpelIf = false); + void filterVer(const ComponentID compID, Pel const* src, int srcStride, Pel *dst, int dstStride, int width, int height, int frac, bool isFirst, bool isLast, const ChromaFormat fmt, const ClpRng& clpRng, int nFilterIdx = 0, bool biMCForDMVR = false, bool useAltHpelIf = false); #if JVET_J0090_MEMORY_BANDWITH_MEASURE void cacheAssign( CacheModel *cache ) { m_cacheModel = cache; } #endif diff --git a/source/Lib/CommonLib/IntraPrediction.cpp b/source/Lib/CommonLib/IntraPrediction.cpp index 980b4b546e39b102ac13d102ad8f371057ce6199..07d2b0a0a5ad67b7f7a6586ee5db6334fb0c538b 100644 --- a/source/Lib/CommonLib/IntraPrediction.cpp +++ b/source/Lib/CommonLib/IntraPrediction.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -55,64 +55,18 @@ // Tables // ==================================================================================================================== -const uint8_t IntraPrediction::m_aucIntraFilter[MAX_NUM_CHANNEL_TYPE][MAX_INTRA_FILTER_DEPTHS] = +const uint8_t IntraPrediction::m_aucIntraFilter[MAX_INTRA_FILTER_DEPTHS] = { - { // Luma - 20, // 1xn - 20, // 2xn - 20, // 4xn - 14, // 8xn - 2, // 16xn - 0, // 32xn - 0, // 64xn - 0, // 128xn - }, - { // Chroma - 40, // 1xn - 40, // 2xn - 40, // 4xn - 28, // 8xn - 4, // 16xn - 0, // 32xn - 0, // 64xn - 0, // 128xn - } + 24, // 1xn + 24, // 2xn + 24, // 4xn + 14, // 8xn + 2, // 16xn + 0, // 32xn + 0, // 64xn + 0 // 128xn }; -const TFilterCoeff g_intraGaussFilter[32][4] = { - { 16, 32, 16, 0 }, - { 15, 29, 17, 3 }, - { 15, 29, 17, 3 }, - { 14, 29, 18, 3 }, - { 13, 29, 18, 4 }, - { 13, 28, 19, 4 }, - { 13, 28, 19, 4 }, - { 12, 28, 20, 4 }, - { 11, 28, 20, 5 }, - { 11, 27, 21, 5 }, - { 10, 27, 22, 5 }, - { 9, 27, 22, 6 }, - { 9, 26, 23, 6 }, - { 9, 26, 23, 6 }, - { 8, 25, 24, 7 }, - { 8, 25, 24, 7 }, - { 8, 24, 24, 8 }, - { 7, 24, 25, 8 }, - { 7, 24, 25, 8 }, - { 6, 23, 26, 9 }, - { 6, 23, 26, 9 }, - { 6, 22, 27, 9 }, - { 5, 22, 27, 10 }, - { 5, 21, 27, 11 }, - { 5, 20, 28, 11 }, - { 4, 20, 28, 12 }, - { 4, 19, 28, 13 }, - { 4, 19, 28, 13 }, - { 4, 18, 29, 13 }, - { 3, 18, 29, 14 }, - { 3, 17, 29, 15 }, - { 3, 17, 29, 15 } -}; // ==================================================================================================================== // Constructor / destructor / initialize @@ -122,13 +76,6 @@ IntraPrediction::IntraPrediction() : m_currChromaFormat( NUM_CHROMA_FORMAT ) { - for (uint32_t ch = 0; ch < MAX_NUM_COMPONENT; ch++) - { - for (uint32_t buf = 0; buf < NUM_PRED_BUF; buf++) - { - m_piYuvExt[ch][buf] = nullptr; - } - } for (uint32_t ch = 0; ch < MAX_NUM_COMPONENT; ch++) { for (uint32_t buf = 0; buf < 4; buf++) @@ -148,14 +95,6 @@ IntraPrediction::~IntraPrediction() void IntraPrediction::destroy() { - for (uint32_t ch = 0; ch < MAX_NUM_COMPONENT; ch++) - { - for (uint32_t buf = 0; buf < NUM_PRED_BUF; buf++) - { - delete[] m_piYuvExt[ch][buf]; - m_piYuvExt[ch][buf] = nullptr; - } - } for (uint32_t ch = 0; ch < MAX_NUM_COMPONENT; ch++) { for (uint32_t buf = 0; buf < 4; buf++) @@ -173,11 +112,6 @@ void IntraPrediction::destroy() void IntraPrediction::init(ChromaFormat chromaFormatIDC, const unsigned bitDepthY) { - // if it has been initialised before, but the chroma format has changed, release the memory and start again. - if (m_piYuvExt[COMPONENT_Y][PRED_BUF_UNFILTERED] != nullptr && m_currChromaFormat != chromaFormatIDC) - { - destroy(); - } if (m_yuvExt2[COMPONENT_Y][0] != nullptr && m_currChromaFormat != chromaFormatIDC) { @@ -186,18 +120,6 @@ void IntraPrediction::init(ChromaFormat chromaFormatIDC, const unsigned bitDepth m_currChromaFormat = chromaFormatIDC; - if (m_piYuvExt[COMPONENT_Y][PRED_BUF_UNFILTERED] == nullptr) // check if first is null (in which case, nothing initialised yet) - { - m_iYuvExtSize = (MAX_CU_SIZE * 2 + 1 + MAX_REF_LINE_IDX * 33) * (MAX_CU_SIZE * 2 + 1 + MAX_REF_LINE_IDX * 33); - - for (uint32_t ch = 0; ch < MAX_NUM_COMPONENT; ch++) - { - for (uint32_t buf = 0; buf < NUM_PRED_BUF; buf++) - { - m_piYuvExt[ch][buf] = new Pel[m_iYuvExtSize]; - } - } - } if (m_yuvExt2[COMPONENT_Y][0] == nullptr) // check if first is null (in which case, nothing initialised yet) { @@ -237,21 +159,21 @@ Pel IntraPrediction::xGetPredValDc( const CPelBuf &pSrc, const Size &dstSize ) const int width = dstSize.width; const int height = dstSize.height; const auto denom = (width == height) ? (width << 1) : std::max(width,height); - const auto divShift = g_aucLog2[denom]; + const auto divShift = floorLog2(denom); const auto divOffset = (denom >> 1); if ( width >= height ) { for( idx = 0; idx < width; idx++ ) { - sum += pSrc.at( 1 + idx, 0 ); + sum += pSrc.at(m_ipaParam.multiRefIndex + 1 + idx, 0); } } if ( width <= height ) { for( idx = 0; idx < height; idx++ ) { - sum += pSrc.at( 0, 1 + idx ); + sum += pSrc.at(m_ipaParam.multiRefIndex + 1 + idx, 1); } } @@ -259,141 +181,85 @@ Pel IntraPrediction::xGetPredValDc( const CPelBuf &pSrc, const Size &dstSize ) return dcVal; } - int IntraPrediction::getWideAngle( int width, int height, int predMode ) +int IntraPrediction::getWideAngle( int width, int height, int predMode ) +{ + if ( predMode > DC_IDX && predMode <= VDIA_IDX ) { - if ( predMode > DC_IDX && predMode <= VDIA_IDX ) + int modeShift[] = { 0, 6, 10, 12, 14, 15 }; + int deltaSize = abs(floorLog2(width) - floorLog2(height)); + if (width > height && predMode < 2 + modeShift[deltaSize]) { - int modeShift[] = { 0, 6, 10, 12, 14, 15 }; - int deltaSize = abs(g_aucLog2[width] - g_aucLog2[height]); - if (width > height && predMode < 2 + modeShift[deltaSize]) - { - predMode += (VDIA_IDX - 1); - } - else if (height > width && predMode > VDIA_IDX - modeShift[deltaSize]) - { - predMode -= (VDIA_IDX - 1); - } + predMode += (VDIA_IDX - 1); + } + else if (height > width && predMode > VDIA_IDX - modeShift[deltaSize]) + { + predMode -= (VDIA_IDX - 1); } - return predMode; } + return predMode; +} - void IntraPrediction::setReferenceArrayLengths( const CompArea &area ) - { - // set Top and Left reference samples length - const int width = area.width; - const int height = area.height; +void IntraPrediction::setReferenceArrayLengths( const CompArea &area ) +{ + // set Top and Left reference samples length + const int width = area.width; + const int height = area.height; - m_leftRefLength = (height << 1); - m_topRefLength = (width << 1); + m_leftRefLength = (height << 1); + m_topRefLength = (width << 1); - } +} -void IntraPrediction::predIntraAng( const ComponentID compId, PelBuf &piPred, const PredictionUnit &pu, const bool useFilteredPredSamples ) +void IntraPrediction::predIntraAng( const ComponentID compId, PelBuf &piPred, const PredictionUnit &pu) { const ComponentID compID = MAP_CHROMA( compId ); const ChannelType channelType = toChannelType( compID ); const int iWidth = piPred.width; const int iHeight = piPred.height; - const Size cuSize = Size( pu.cu->blocks[compId].width, pu.cu->blocks[compId].height ); - const uint32_t uiDirMode = PU::getFinalIntraMode( pu, channelType ); - + CHECK(iWidth == 2, "Width of 2 is not supported"); + const uint32_t uiDirMode = isLuma( compId ) && pu.cu->bdpcmMode ? BDPCM_IDX : !isLuma(compId) && pu.cu->bdpcmModeChroma ? BDPCM_IDX : PU::getFinalIntraMode(pu, channelType); - CHECK( g_aucLog2[iWidth] < 2 && pu.cs->pcv->noChroma2x2, "Size not allowed" ); - CHECK( g_aucLog2[iWidth] > 7, "Size not allowed" ); + CHECK( floorLog2(iWidth) < 2 && pu.cs->pcv->noChroma2x2, "Size not allowed" ); + CHECK( floorLog2(iWidth) > 7, "Size not allowed" ); - const int multiRefIdx = (compID == COMPONENT_Y) ? pu.multiRefIdx : 0; - const bool useISP = pu.cu->ispMode && isLuma( compID ); - const int whRatio = useISP ? std::max( unsigned( 1 ), cuSize.width / cuSize.height ) : std::max( 1, iWidth / iHeight ); - const int hwRatio = useISP ? std::max( unsigned( 1 ), cuSize.height / cuSize.width ) : std::max( 1, iHeight / iWidth ); - const int srcStride = m_topRefLength + 1 + (whRatio + 1) * multiRefIdx; - const int srcHStride = m_leftRefLength + 1 + (hwRatio + 1) * multiRefIdx; + const int srcStride = m_refBufferStride[compID]; + const int srcHStride = 2; - Pel *ptrSrc = getPredictorPtr(compID, useFilteredPredSamples); + const CPelBuf & srcBuf = CPelBuf(getPredictorPtr(compID), srcStride, srcHStride); const ClpRng& clpRng(pu.cu->cs->slice->clpRng(compID)); switch (uiDirMode) { - case(PLANAR_IDX): xPredIntraPlanar(CPelBuf(ptrSrc, srcStride, srcHStride), piPred, *pu.cs->sps); break; - case(DC_IDX): xPredIntraDc(CPelBuf(ptrSrc, srcStride, srcHStride), piPred, channelType, false); break; - case(2): - case(DIA_IDX): - case(VDIA_IDX): - if (getWideAngle(useISP ? cuSize.width : iWidth, useISP ? cuSize.height : iHeight, uiDirMode) == static_cast<int>(uiDirMode)) // check if uiDirMode is not wide-angle - { - xPredIntraAng(CPelBuf(ptrSrc, srcStride, srcHStride), piPred, channelType, uiDirMode, clpRng, *pu.cs->sps, multiRefIdx, useFilteredPredSamples, useISP, cuSize ); - break; - } - default: xPredIntraAng(CPelBuf(getPredictorPtr(compID, false), srcStride, srcHStride), piPred, channelType, uiDirMode, clpRng, *pu.cs->sps, multiRefIdx, useFilteredPredSamples, useISP, cuSize); break; + case(PLANAR_IDX): xPredIntraPlanar(srcBuf, piPred); break; + case(DC_IDX): xPredIntraDc(srcBuf, piPred, channelType, false); break; + case(BDPCM_IDX): xPredIntraBDPCM(srcBuf, piPred, isLuma(compID) ? pu.cu->bdpcmMode : pu.cu->bdpcmModeChroma, clpRng); break; + default: xPredIntraAng(srcBuf, piPred, channelType, clpRng); break; } - bool pdpcCondition = (uiDirMode == PLANAR_IDX || uiDirMode == DC_IDX || uiDirMode == HOR_IDX || uiDirMode == VER_IDX); - if( pdpcCondition && multiRefIdx == 0 && !useISP ) + if (m_ipaParam.applyPDPC) { - const CPelBuf srcBuf = CPelBuf(ptrSrc, srcStride, srcStride); PelBuf dstBuf = piPred; - const int scale = ((g_aucLog2[iWidth] - 2 + g_aucLog2[iHeight] - 2 + 2) >> 2); + const int scale = ((floorLog2(iWidth) - 2 + floorLog2(iHeight) - 2 + 2) >> 2); CHECK(scale < 0 || scale > 31, "PDPC: scale < 0 || scale > 31"); - if (uiDirMode == PLANAR_IDX) - { - for (int y = 0; y < iHeight; y++) - { - int wT = 32 >> std::min(31, ((y << 1) >> scale)); - const Pel left = srcBuf.at(0, y + 1); - for (int x = 0; x < iWidth; x++) - { - const Pel top = srcBuf.at(x + 1, 0); - int wL = 32 >> std::min(31, ((x << 1) >> scale)); - dstBuf.at(x, y) = ClipPel((wL * left + wT * top + (64 - wL - wT) * dstBuf.at(x, y) + 32) >> 6, clpRng); - } - } - } - else if (uiDirMode == DC_IDX) + if (uiDirMode == PLANAR_IDX || uiDirMode == DC_IDX) { - const Pel topLeft = srcBuf.at(0, 0); for (int y = 0; y < iHeight; y++) { - int wT = 32 >> std::min(31, ((y << 1) >> scale)); - const Pel left = srcBuf.at(0, y + 1); + const int wT = 32 >> std::min(31, ((y << 1) >> scale)); + const Pel left = srcBuf.at(y + 1, 1); for (int x = 0; x < iWidth; x++) { - const Pel top = srcBuf.at(x + 1, 0); - int wL = 32 >> std::min(31, ((x << 1) >> scale)); - int wTL = (wL >> 4) + (wT >> 4); - dstBuf.at(x, y) = ClipPel((wL * left + wT * top - wTL * topLeft + (64 - wL - wT + wTL) * dstBuf.at(x, y) + 32) >> 6, clpRng); - } - } - } - else if (uiDirMode == HOR_IDX) - { - const Pel topLeft = srcBuf.at(0, 0); - for (int y = 0; y < iHeight; y++) - { - int wT = 32 >> std::min(31, ((y << 1) >> scale)); - for (int x = 0; x < iWidth; x++) - { - const Pel top = srcBuf.at(x + 1, 0); - int wTL = wT; - dstBuf.at(x, y) = ClipPel((wT * top - wTL * topLeft + (64 - wT + wTL) * dstBuf.at(x, y) + 32) >> 6, clpRng); - } - } - } - else if (uiDirMode == VER_IDX) - { - const Pel topLeft = srcBuf.at(0, 0); - for (int y = 0; y < iHeight; y++) - { - const Pel left = srcBuf.at(0, y + 1); - for (int x = 0; x < iWidth; x++) - { - int wL = 32 >> std::min(31, ((x << 1) >> scale)); - int wTL = wL; - dstBuf.at(x, y) = ClipPel((wL * left - wTL * topLeft + (64 - wL + wTL) * dstBuf.at(x, y) + 32) >> 6, clpRng); + const int wL = 32 >> std::min(31, ((x << 1) >> scale)); + const Pel top = srcBuf.at(x + 1, 0); + const Pel val = dstBuf.at(x, y); + dstBuf.at(x, y) = val + ((wL * (left - val) + wT * (top - val) + 32) >> 6); } } } } } + void IntraPrediction::predIntraChromaLM(const ComponentID compID, PelBuf &piPred, const PredictionUnit &pu, const CompArea& chromaArea, int intraDir) { int iLumaStride = 0; @@ -405,8 +271,8 @@ void IntraPrediction::predIntraChromaLM(const ComponentID compID, PelBuf &piPred } else { - iLumaStride = MAX_CU_SIZE + 1; - Temp = PelBuf(m_piTemp + iLumaStride + 1, iLumaStride, Size(chromaArea)); + iLumaStride = MAX_CU_SIZE + 1; + Temp = PelBuf(m_piTemp + iLumaStride + 1, iLumaStride, Size(chromaArea)); } int a, b, iShift; xGetLMParameters(pu, compID, chromaArea, a, b, iShift); @@ -416,30 +282,17 @@ void IntraPrediction::predIntraChromaLM(const ComponentID compID, PelBuf &piPred piPred.linearTransform(a, iShift, b, true, pu.cs->slice->clpRng(compID)); } -void IntraPrediction::xFilterGroup(Pel* pMulDst[], int i, Pel const * const piSrc, int iRecStride, bool bAboveAvaillable, bool bLeftAvaillable) -{ - pMulDst[0][i] = (piSrc[1] + piSrc[iRecStride + 1] + 1) >> 1; - - pMulDst[1][i] = (piSrc[iRecStride] + piSrc[iRecStride + 1] + 1) >> 1; - - pMulDst[3][i] = (piSrc[0] + piSrc[1] + 1) >> 1; - - pMulDst[2][i] = (piSrc[0] + piSrc[1] + piSrc[iRecStride] + piSrc[iRecStride + 1] + 2) >> 2; - -} - - - /** Function for deriving planar intra prediction. This function derives the prediction samples for planar mode (intra coding). */ //NOTE: Bit-Limit - 24-bit source -void IntraPrediction::xPredIntraPlanar( const CPelBuf &pSrc, PelBuf &pDst, const SPS& sps ) +void IntraPrediction::xPredIntraPlanar( const CPelBuf &pSrc, PelBuf &pDst ) { const uint32_t width = pDst.width; const uint32_t height = pDst.height; - const uint32_t log2W = g_aucLog2[width < 2 ? 2 : width]; - const uint32_t log2H = g_aucLog2[height < 2 ? 2 : height]; + + const uint32_t log2W = floorLog2( width ); + const uint32_t log2H = floorLog2( height ); int leftColumn[MAX_CU_SIZE + 1], topRow[MAX_CU_SIZE + 1], bottomRow[MAX_CU_SIZE], rightColumn[MAX_CU_SIZE]; const uint32_t offset = 1 << (log2W + log2H); @@ -452,7 +305,7 @@ void IntraPrediction::xPredIntraPlanar( const CPelBuf &pSrc, PelBuf &pDst, const for( int k = 0; k < height + 1; k++ ) { - leftColumn[k] = pSrc.at( 0, k + 1 ); + leftColumn[k] = pSrc.at(k + 1, 1); } // Prepare intermediate variables used in interpolation @@ -488,55 +341,103 @@ void IntraPrediction::xPredIntraPlanar( const CPelBuf &pSrc, PelBuf &pDst, const } } } - - - - void IntraPrediction::xPredIntraDc( const CPelBuf &pSrc, PelBuf &pDst, const ChannelType channelType, const bool enableBoundaryFilter ) { const Pel dcval = xGetPredValDc( pSrc, pDst ); pDst.fill( dcval ); - -#if HEVC_USE_DC_PREDFILTERING - if( enableBoundaryFilter ) - { - xDCPredFiltering( pSrc, pDst, channelType ); - } -#endif } -#if HEVC_USE_DC_PREDFILTERING -/** Function for filtering intra DC predictor. This function performs filtering left and top edges of the prediction samples for DC mode (intra coding). - */ -void IntraPrediction::xDCPredFiltering(const CPelBuf &pSrc, PelBuf &pDst, const ChannelType &channelType) +// Function for initialization of intra prediction parameters +void IntraPrediction::initPredIntraParams(const PredictionUnit & pu, const CompArea area, const SPS& sps) { - uint32_t iWidth = pDst.width; - uint32_t iHeight = pDst.height; - int x, y; + const ComponentID compId = area.compID; + const ChannelType chType = toChannelType(compId); + + const bool useISP = NOT_INTRA_SUBPARTITIONS != pu.cu->ispMode && isLuma( chType ); + + const Size cuSize = Size( pu.cu->blocks[compId].width, pu.cu->blocks[compId].height ); + const Size puSize = Size( area.width, area.height ); + const Size& blockSize = useISP ? cuSize : puSize; + const int dirMode = PU::getFinalIntraMode(pu, chType); + const int predMode = getWideAngle( blockSize.width, blockSize.height, dirMode ); + + m_ipaParam.isModeVer = predMode >= DIA_IDX; + m_ipaParam.multiRefIndex = isLuma (chType) ? pu.multiRefIdx : 0 ; + m_ipaParam.refFilterFlag = false; + m_ipaParam.interpolationFlag = false; + m_ipaParam.applyPDPC = ((puSize.width >= MIN_TB_SIZEY && puSize.height >= MIN_TB_SIZEY) || !isLuma(compId)) && m_ipaParam.multiRefIndex == 0; - if (isLuma(channelType) && (iWidth <= MAXIMUM_INTRA_FILTERED_WIDTH) && (iHeight <= MAXIMUM_INTRA_FILTERED_HEIGHT)) + const int intraPredAngleMode = (m_ipaParam.isModeVer) ? predMode - VER_IDX : -(predMode - HOR_IDX); + + + int absAng = 0; + if (dirMode > DC_IDX && dirMode < NUM_LUMA_MODE) // intraPredAngle for directional modes { - //top-left - pDst.at(0, 0) = (Pel)((pSrc.at(1, 0) + pSrc.at(0, 1) + 2 * pDst.at(0, 0) + 2) >> 2); + static const int angTable[32] = { 0, 1, 2, 3, 4, 6, 8, 10, 12, 14, 16, 18, 20, 23, 26, 29, 32, 35, 39, 45, 51, 57, 64, 73, 86, 102, 128, 171, 256, 341, 512, 1024 }; + static const int invAngTable[32] = { + 0, 16384, 8192, 5461, 4096, 2731, 2048, 1638, 1365, 1170, 1024, 910, 819, 712, 630, 565, + 512, 468, 420, 364, 321, 287, 256, 224, 191, 161, 128, 96, 64, 48, 32, 16 + }; // (512 * 32) / Angle + + const int absAngMode = abs(intraPredAngleMode); + const int signAng = intraPredAngleMode < 0 ? -1 : 1; + absAng = angTable [absAngMode]; - //top row (vertical filter) - for ( x = 1; x < iWidth; x++ ) + m_ipaParam.invAngle = invAngTable[absAngMode]; + m_ipaParam.intraPredAngle = signAng * absAng; + if (intraPredAngleMode < 0) { - pDst.at(x, 0) = (Pel)((pSrc.at(x + 1, 0) + 3 * pDst.at(x, 0) + 2) >> 2); + m_ipaParam.applyPDPC = false; } - - //left column (horizontal filter) - for ( y = 1; y < iHeight; y++ ) + else if (intraPredAngleMode > 0) { - pDst.at(0, y) = (Pel)((pSrc.at(0, y + 1) + 3 * pDst.at(0, y) + 2) >> 2); + const int sideSize = m_ipaParam.isModeVer ? puSize.height : puSize.width; + const int maxScale = 2; + + m_ipaParam.angularScale = std::min(maxScale, floorLog2(sideSize) - (floorLog2(3 * m_ipaParam.invAngle - 2) - 8)); + m_ipaParam.applyPDPC &= m_ipaParam.angularScale >= 0; } } - return; + // high level conditions and DC intra prediction + if( sps.getSpsRangeExtension().getIntraSmoothingDisabledFlag() + || !isLuma( chType ) + || useISP + || PU::isMIP( pu, chType ) + || m_ipaParam.multiRefIndex + || DC_IDX == dirMode + ) + { + } + else if ((isLuma(chType) && pu.cu->bdpcmMode) || (!isLuma(chType) && pu.cu->bdpcmModeChroma)) // BDPCM + { + m_ipaParam.refFilterFlag = false; + } + else if (dirMode == PLANAR_IDX) // Planar intra prediction + { + m_ipaParam.refFilterFlag = puSize.width * puSize.height > 32 ? true : false; + } + else if (!useISP)// HOR, VER and angular modes (MDIS) + { + bool filterFlag = false; + { + const int diff = std::min<int>( abs( predMode - HOR_IDX ), abs( predMode - VER_IDX ) ); + const int log2Size = ((floorLog2(puSize.width) + floorLog2(puSize.height)) >> 1); + CHECK( log2Size >= MAX_INTRA_FILTER_DEPTHS, "Size not supported" ); + filterFlag = (diff > m_aucIntraFilter[log2Size]); + } + + // Selelection of either ([1 2 1] / 4 ) refrence filter OR Gaussian 4-tap interpolation filter + if (filterFlag) + { + const bool isRefFilter = isIntegerSlope(absAng); + CHECK( puSize.width * puSize.height <= 32, "DCT-IF interpolation filter is always used for 4x4, 4x8, and 8x4 luma CB" ); + m_ipaParam.refFilterFlag = isRefFilter; + m_ipaParam.interpolationFlag = !isRefFilter; + } + } } -#endif -// Function for deriving the angular Intra predictions /** Function for deriving the simplified angular intra predictions. * @@ -549,39 +450,16 @@ void IntraPrediction::xDCPredFiltering(const CPelBuf &pSrc, PelBuf &pDst, const * from the extended main reference. */ //NOTE: Bit-Limit - 25-bit source -#if HEVC_USE_HOR_VER_PREDFILTERING -void IntraPrediction::xPredIntraAng( const CPelBuf &pSrc, PelBuf &pDst, const ChannelType channelType, const uint32_t dirMode, const ClpRng& clpRng, const bool bEnableEdgeFilters, const SPS& sps - , int multiRefIdx - , const bool enableBoundaryFilter ) -#else -void IntraPrediction::xPredIntraAng( const CPelBuf &pSrc, PelBuf &pDst, const ChannelType channelType, const uint32_t dirMode, const ClpRng& clpRng, const SPS& sps, - int multiRefIdx, - const bool useFilteredPredSamples , - const bool useISP, - const Size cuSize ) -#endif + +void IntraPrediction::xPredIntraAng( const CPelBuf &pSrc, PelBuf &pDst, const ChannelType channelType, const ClpRng& clpRng) { int width =int(pDst.width); int height=int(pDst.height); - CHECK( !( dirMode > DC_IDX && dirMode < NUM_LUMA_MODE ), "Invalid intra dir" ); - int predMode = useISP ? getWideAngle( cuSize.width, cuSize.height, dirMode ) : getWideAngle( width, height, dirMode ); - const bool bIsModeVer = predMode >= DIA_IDX; - const int intraPredAngleMode = (bIsModeVer) ? predMode - VER_IDX : -(predMode - HOR_IDX); - const int absAngMode = abs(intraPredAngleMode); - const int signAng = intraPredAngleMode < 0 ? -1 : 1; -#if HEVC_USE_HOR_VER_PREDFILTERING - const bool edgeFilter = bEnableEdgeFilters && isLuma(channelType) && (width <= MAXIMUM_INTRA_FILTERED_WIDTH) && (height <= MAXIMUM_INTRA_FILTERED_HEIGHT); -#endif - - // Set bitshifts and scale the angle parameter to block size - - static const int angTable[32] = { 0, 1, 2, 3, 4, 6, 8, 10, 12, 14, 16, 18, 20, 23, 26, 29, 32, 35, 39, 45, 51, 57, 64, 73, 86, 102, 128, 171, 256, 341, 512, 1024 }; - static const int invAngTable[32] = { 0, 8192, 4096, 2731, 2048, 1365, 1024, 819, 683, 585, 512, 455, 410, 356, 315, 282, 256, 234, 210, 182, 160, 144, 128, 112, 95, 80, 64, 48, 32, 24, 16, 8 }; // (256 * 32) / Angle - - int invAngle = invAngTable[absAngMode]; - int absAng = angTable [absAngMode]; - int intraPredAngle = signAng * absAng; + const bool bIsModeVer = m_ipaParam.isModeVer; + const int multiRefIdx = m_ipaParam.multiRefIndex; + const int intraPredAngle = m_ipaParam.intraPredAngle; + const int invAngle = m_ipaParam.invAngle; Pel* refMain; Pel* refSide; @@ -589,132 +467,134 @@ void IntraPrediction::xPredIntraAng( const CPelBuf &pSrc, PelBuf &pDst, const Ch Pel refAbove[2 * MAX_CU_SIZE + 3 + 33 * MAX_REF_LINE_IDX]; Pel refLeft [2 * MAX_CU_SIZE + 3 + 33 * MAX_REF_LINE_IDX]; - const int whRatio = useISP ? std::max( unsigned( 1 ), cuSize.width / cuSize.height ) : std::max( 1, width / height ); - const int hwRatio = useISP ? std::max( unsigned( 1 ), cuSize.height / cuSize.width ) : std::max( 1, height / width ); - // Initialize the Main and Left reference array. if (intraPredAngle < 0) { - auto width = int(pDst.width) +1; - auto height = int(pDst.height)+1; - auto lastIdx = (bIsModeVer ? width : height) + multiRefIdx; - auto firstIdx = ( ((bIsModeVer ? height : width) -1) * intraPredAngle ) >> 5; - for (int x = 0; x < width + 1 + multiRefIdx; x++) + for (int x = 0; x <= width + 1 + multiRefIdx; x++) { - refAbove[x + height - 1] = pSrc.at( x, 0 ); + refAbove[x + height] = pSrc.at(x, 0); } - for (int y = 0; y < height + 1 + multiRefIdx; y++) + for (int y = 0; y <= height + 1 + multiRefIdx; y++) { - refLeft[y + width - 1] = pSrc.at( 0, y ); + refLeft[y + width] = pSrc.at(y, 1); } - refMain = (bIsModeVer ? refAbove + height : refLeft + width ) - 1; - refSide = (bIsModeVer ? refLeft + width : refAbove + height) - 1; + refMain = bIsModeVer ? refAbove + height : refLeft + width; + refSide = bIsModeVer ? refLeft + width : refAbove + height; // Extend the Main reference to the left. - int invAngleSum = 128; // rounding for (shift by 8) - for( int k = -1; k > firstIdx; k-- ) + int sizeSide = bIsModeVer ? height : width; + for (int k = -sizeSide; k <= -1; k++) { - invAngleSum += invAngle; - refMain[k] = refSide[invAngleSum>>8]; + refMain[k] = refSide[std::min((-k * invAngle + 256) >> 9, sizeSide)]; } - refMain[lastIdx] = refMain[lastIdx-1]; - refMain[firstIdx] = refMain[firstIdx+1]; } else { - for (int x = 0; x < m_topRefLength + 1 + (whRatio + 1) * multiRefIdx; x++) + for (int x = 0; x <= m_topRefLength + multiRefIdx; x++) { - refAbove[x+1] = pSrc.at(x, 0); + refAbove[x] = pSrc.at(x, 0); } - for (int y = 0; y < m_leftRefLength + 1 + (hwRatio + 1) * multiRefIdx; y++) + for (int y = 0; y <= m_leftRefLength + multiRefIdx; y++) { - refLeft[y+1] = pSrc.at(0, y); + refLeft[y] = pSrc.at(y, 1); } - refMain = bIsModeVer ? refAbove : refLeft ; - refSide = bIsModeVer ? refLeft : refAbove; - refMain++; - refSide++; - refMain[-1] = refMain[0]; - auto lastIdx = 1 + ((bIsModeVer) ? m_topRefLength + (whRatio + 1) * multiRefIdx : m_leftRefLength + (hwRatio + 1) * multiRefIdx); - refMain[lastIdx] = refMain[lastIdx-1]; + refMain = bIsModeVer ? refAbove : refLeft; + refSide = bIsModeVer ? refLeft : refAbove; + + // Extend main reference to right using replication + const int log2Ratio = floorLog2(width) - floorLog2(height); + const int s = std::max<int>(0, bIsModeVer ? log2Ratio : -log2Ratio); + const int maxIndex = (multiRefIdx << s) + 2; + const int refLength = bIsModeVer ? m_topRefLength : m_leftRefLength; + const Pel val = refMain[refLength + multiRefIdx]; + for (int z = 1; z <= maxIndex; z++) + { + refMain[refLength + multiRefIdx + z] = val; + } } // swap width/height if we are doing a horizontal mode: - Pel tempArray[MAX_CU_SIZE*MAX_CU_SIZE]; - const int dstStride = bIsModeVer ? pDst.stride : MAX_CU_SIZE; - Pel *pDstBuf = bIsModeVer ? pDst.buf : tempArray; if (!bIsModeVer) { std::swap(width, height); } + Pel tempArray[MAX_CU_SIZE * MAX_CU_SIZE]; + const int dstStride = bIsModeVer ? pDst.stride : width; + Pel * pDstBuf = bIsModeVer ? pDst.buf : tempArray; // compensate for line offset in reference line buffers refMain += multiRefIdx; refSide += multiRefIdx; + Pel *pDsty = pDstBuf; + if( intraPredAngle == 0 ) // pure vertical or pure horizontal { for( int y = 0; y < height; y++ ) { for( int x = 0; x < width; x++ ) { - pDstBuf[y*dstStride + x] = refMain[x + 1]; + pDsty[x] = refMain[x + 1]; } - } -#if HEVC_USE_HOR_VER_PREDFILTERING - if (edgeFilter && multiRefIdx == 0) - { - for( int y = 0; y < height; y++ ) + + if (m_ipaParam.applyPDPC) { - pDstBuf[y*dstStride] = ClipPel( pDstBuf[y*dstStride] + ( ( refSide[y + 1] - refSide[0] ) >> 1 ), clpRng ); + const int scale = (floorLog2(width) + floorLog2(height) - 2) >> 2; + const Pel topLeft = refMain[0]; + const Pel left = refSide[1 + y]; + for (int x = 0; x < std::min(3 << scale, width); x++) + { + const int wL = 32 >> (2 * x >> scale); + const Pel val = pDsty[x]; + pDsty[x] = ClipPel(val + ((wL * (left - topLeft) + 32) >> 6), clpRng); + } } + + pDsty += dstStride; } -#endif } else { - Pel *pDsty=pDstBuf; for (int y = 0, deltaPos = intraPredAngle * (1 + multiRefIdx); y<height; y++, deltaPos += intraPredAngle, pDsty += dstStride) { const int deltaInt = deltaPos >> 5; - const int deltaFract = deltaPos & (32 - 1); + const int deltaFract = deltaPos & 31; - if (absAng != 0 && absAng != 32) + if ( !isIntegerSlope( abs(intraPredAngle) ) ) { if( isLuma(channelType) ) { - Pel p[4]; - const bool useCubicFilter = useISP ? ( width <= 8 ) : ( !useFilteredPredSamples || multiRefIdx > 0 ); - TFilterCoeff const * const f = (useCubicFilter) ? InterpolationFilter::getChromaFilterTable(deltaFract) : g_intraGaussFilter[deltaFract]; + const bool useCubicFilter = !m_ipaParam.interpolationFlag; - int refMainIndex = deltaInt + 1; + const TFilterCoeff intraSmoothingFilter[4] = {TFilterCoeff(16 - (deltaFract >> 1)), TFilterCoeff(32 - (deltaFract >> 1)), TFilterCoeff(16 + (deltaFract >> 1)), TFilterCoeff(deltaFract >> 1)}; + const TFilterCoeff* const f = (useCubicFilter) ? InterpolationFilter::getChromaFilterTable(deltaFract) : intraSmoothingFilter; - for( int x = 0; x < width; x++, refMainIndex++ ) + for (int x = 0; x < width; x++) { - p[0] = refMain[refMainIndex - 1]; - p[1] = refMain[refMainIndex]; - p[2] = refMain[refMainIndex + 1]; - p[3] = f[3] != 0 ? refMain[refMainIndex + 2] : 0; + Pel p[4]; - pDstBuf[y*dstStride + x] = static_cast<Pel>((static_cast<int>(f[0] * p[0]) + static_cast<int>(f[1] * p[1]) + static_cast<int>(f[2] * p[2]) + static_cast<int>(f[3] * p[3]) + 32) >> 6); + p[0] = refMain[deltaInt + x]; + p[1] = refMain[deltaInt + x + 1]; + p[2] = refMain[deltaInt + x + 2]; + p[3] = refMain[deltaInt + x + 3]; - if( useCubicFilter ) // only cubic filter has negative coefficients and requires clipping - { - pDstBuf[y*dstStride + x] = ClipPel( pDstBuf[y*dstStride + x], clpRng ); - } + Pel val = (f[0] * p[0] + f[1] * p[1] + f[2] * p[2] + f[3] * p[3] + 32) >> 6; + + pDsty[x] = ClipPel(val, clpRng); // always clip even though not always needed } } else { // Do linear filtering - const Pel *pRM = refMain + deltaInt + 1; - int lastRefMainPel = *pRM++; - for( int x = 0; x < width; pRM++, x++ ) + for (int x = 0; x < width; x++) { - int thisRefMainPel = *pRM; - pDsty[x + 0] = ( Pel ) ( ( ( 32 - deltaFract )*lastRefMainPel + deltaFract*thisRefMainPel + 16 ) >> 5 ); - lastRefMainPel = thisRefMainPel; + Pel p[2]; + + p[0] = refMain[deltaInt + x + 1]; + p[1] = refMain[deltaInt + x + 2]; + + pDsty[x] = p[0] + ((deltaFract * (p[1] - p[0]) + 16) >> 5); } } } @@ -726,61 +606,21 @@ void IntraPrediction::xPredIntraAng( const CPelBuf &pSrc, PelBuf &pDst, const Ch pDsty[x] = refMain[x + deltaInt + 1]; } } - const int numModes = 8; - const int scale = ((g_aucLog2[width] - 2 + g_aucLog2[height] - 2 + 2) >> 2); - CHECK(scale < 0 || scale > 31, "PDPC: scale < 0 || scale > 31"); - if( !useISP ) + if (m_ipaParam.applyPDPC) { - if ((predMode == 2 || predMode == VDIA_IDX) && multiRefIdx == 0) - { - int wT = 16 >> std::min(31, ((y << 1) >> scale)); - - for (int x = 0; x < width; x++) - { - int wL = 16 >> std::min(31, ((x << 1) >> scale)); - if (wT + wL == 0) break; + const int scale = m_ipaParam.angularScale; + int invAngleSum = 256; - int c = x + y + 1; - if (c >= 2 * height) { wL = 0; } - if (c >= 2 * width) { wT = 0; } - const Pel left = (wL != 0) ? refSide[c + 1] : 0; - const Pel top = (wT != 0) ? refMain[c + 1] : 0; - - pDsty[x] = ClipPel((wL * left + wT * top + (64 - wL - wT) * pDsty[x] + 32) >> 6, clpRng); - } - } - else if (((predMode >= VDIA_IDX - numModes && predMode != VDIA_IDX) || (predMode != 2 && predMode <= (2 + numModes))) && multiRefIdx == 0) - { - int invAngleSum0 = 2; - for (int x = 0; x < width; x++) + for (int x = 0; x < std::min(3 << scale, width); x++) { - invAngleSum0 += invAngle; - int deltaPos0 = invAngleSum0 >> 2; - int deltaFrac0 = deltaPos0 & 63; - int deltaInt0 = deltaPos0 >> 6; - - int deltay = y + deltaInt0 + 1; - if (deltay >(bIsModeVer ? m_leftRefLength : m_topRefLength) - 1) break; - - int wL = 32 >> std::min(31, ((x << 1) >> scale)); - if (wL == 0) break; - Pel *p = refSide + deltay; + invAngleSum += invAngle; - Pel left = p[deltaFrac0 >> 5]; - pDsty[x] = ClipPel((wL * left + (64 - wL) * pDsty[x] + 32) >> 6, clpRng); + int wL = 32 >> (2 * x >> scale); + Pel left = refSide[y + (invAngleSum >> 9) + 1]; + pDsty[x] = pDsty[x] + ((wL * (left - pDsty[x]) + 32) >> 6); } } - } - } -#if HEVC_USE_HOR_VER_PREDFILTERING - if( edgeFilter && absAng <= 1 ) - { - for( int y = 0; y < height; y++ ) - { - pDstBuf[y*dstStride] = ClipPel( pDstBuf[y*dstStride] + ((refSide[y + 1] - refSide[0]) >> 2), clpRng ); - } } -#endif } // Flip the block if this is the horizontal mode @@ -797,91 +637,81 @@ void IntraPrediction::xPredIntraAng( const CPelBuf &pSrc, PelBuf &pDst, const Ch } } - -bool IntraPrediction::useDPCMForFirstPassIntraEstimation(const PredictionUnit &pu, const uint32_t &uiDirMode) +void IntraPrediction::xPredIntraBDPCM(const CPelBuf &pSrc, PelBuf &pDst, const uint32_t dirMode, const ClpRng& clpRng ) { - return CU::isRDPCMEnabled(*pu.cu) && pu.cu->transQuantBypass && (uiDirMode == HOR_IDX || uiDirMode == VER_IDX); + const int wdt = pDst.width; + const int hgt = pDst.height; + + const int strideP = pDst.stride; + const int strideS = pSrc.stride; + + CHECK( !( dirMode == 1 || dirMode == 2 ), "Incorrect BDPCM mode parameter." ); + + Pel* pred = &pDst.buf[0]; + if( dirMode == 1 ) + { + Pel val; + for( int y = 0; y < hgt; y++ ) + { + val = pSrc.buf[(y + 1) + strideS]; + for( int x = 0; x < wdt; x++ ) + { + pred[x] = val; + } + pred += strideP; + } + } + else + { + for( int y = 0; y < hgt; y++ ) + { + for( int x = 0; x < wdt; x++ ) + { + pred[x] = pSrc.buf[x + 1]; + } + pred += strideP; + } + } } void IntraPrediction::geneWeightedPred(const ComponentID compId, PelBuf &pred, const PredictionUnit &pu, Pel *srcBuf) { const int width = pred.width; + CHECK(width == 2, "Width of 2 is not supported"); const int height = pred.height; const int srcStride = width; const int dstStride = pred.stride; - const uint32_t dirMode = PU::getFinalIntraMode(pu, toChannelType(compId)); - const ClpRng& clpRng(pu.cu->cs->slice->clpRng(compId)); Pel* dstBuf = pred.buf; - int k, l; + int wIntra, wMerge; - bool modeDC = (dirMode <= DC_IDX); - Pel wIntra1 = 6, wInter1 = 2, wIntra2 = 5, wInter2 = 3, wIntra3 = 3, wInter3 = 5, wIntra4 = 2, wInter4 = 6; + const Position posBL = pu.Y().bottomLeft(); + const Position posTR = pu.Y().topRight(); + const PredictionUnit *neigh0 = pu.cs->getPURestricted(posBL.offset(-1, 0), pu, CHANNEL_TYPE_LUMA); + const PredictionUnit *neigh1 = pu.cs->getPURestricted(posTR.offset(0, -1), pu, CHANNEL_TYPE_LUMA); + bool isNeigh0Intra = neigh0 && (CU::isIntra(*neigh0->cu)); + bool isNeigh1Intra = neigh1 && (CU::isIntra(*neigh1->cu)); - if (modeDC || width < 4 || height < 4) + if (isNeigh0Intra && isNeigh1Intra) { - for (k = 0; k<height; k++) - { - for (l = 0; l<width; l++) - { - dstBuf[k*dstStride + l] = ClipPel((((dstBuf[k*dstStride + l] * 4) + (srcBuf[k*srcStride + l] * 4)) >> 3), clpRng); - } - } + wIntra = 3; wMerge = 1; } else { - if (dirMode <= DIA_IDX) + if (!isNeigh0Intra && !isNeigh1Intra) { - int interval = (width >> 2); - - for (k = 0; k<height; k++) - { - for (l = 0; l<width; l++) - { - if (l<interval) - { - dstBuf[k*dstStride + l] = ClipPel((((dstBuf[k*dstStride + l] * wInter1) + (srcBuf[k*srcStride + l] * wIntra1)) >> 3), clpRng); - } - else if (l >= interval && l < (2 * interval)) - { - dstBuf[k*dstStride + l] = ClipPel((((dstBuf[k*dstStride + l] * wInter2) + (srcBuf[k*srcStride + l] * wIntra2)) >> 3), clpRng); - } - else if (l >= (interval * 2) && l < (3 * interval)) - { - dstBuf[k*dstStride + l] = ClipPel((((dstBuf[k*dstStride + l] * wInter3) + (srcBuf[k*srcStride + l] * wIntra3)) >> 3), clpRng); - } - else - { - dstBuf[k*dstStride + l] = ClipPel((((dstBuf[k*dstStride + l] * wInter4) + (srcBuf[k*srcStride + l] * wIntra4)) >> 3), clpRng); - } - } - } + wIntra = 1; wMerge = 3; } else { - int interval = (height >> 2); - for (k = 0; k<height; k++) - { - for (l = 0; l<width; l++) - { - if (k<interval) - { - dstBuf[k*dstStride + l] = ClipPel((((dstBuf[k*dstStride + l] * wInter1) + (srcBuf[k*srcStride + l] * wIntra1)) >> 3), clpRng); - } - else if (k >= interval && k < (2 * interval)) - { - dstBuf[k*dstStride + l] = ClipPel((((dstBuf[k*dstStride + l] * wInter2) + (srcBuf[k*srcStride + l] * wIntra2)) >> 3), clpRng); - } - else if (k >= (interval * 2) && k < (3 * interval)) - { - dstBuf[k*dstStride + l] = ClipPel((((dstBuf[k*dstStride + l] * wInter3) + (srcBuf[k*srcStride + l] * wIntra3)) >> 3), clpRng); - } - else - { - dstBuf[k*dstStride + l] = ClipPel((((dstBuf[k*dstStride + l] * wInter4) + (srcBuf[k*srcStride + l] * wIntra4)) >> 3), clpRng); - } - } - } + wIntra = 2; wMerge = 2; + } + } + for (int y = 0; y < height; y++) + { + for (int x = 0; x < width; x++) + { + dstBuf[y*dstStride + x] = (wMerge * dstBuf[y*dstStride + x] + wIntra * srcBuf[y*srcStride + x] + 2) >> 2; } } } @@ -900,25 +730,27 @@ void IntraPrediction::switchBuffer(const PredictionUnit &pu, ComponentID compID, void IntraPrediction::geneIntrainterPred(const CodingUnit &cu) { - if (!cu.firstPU->mhIntraFlag) + if (!cu.firstPU->ciipFlag) { return; } const PredictionUnit* pu = cu.firstPU; - bool isUseFilter = IntraPrediction::useFilteredIntraRefSamples(COMPONENT_Y, *pu, true, *pu); - initIntraPatternChType(cu, pu->Y(), isUseFilter); - predIntraAng(COMPONENT_Y, cu.cs->getPredBuf(*pu).Y(), *pu, isUseFilter); - isUseFilter = IntraPrediction::useFilteredIntraRefSamples(COMPONENT_Cb, *pu, true, *pu); - initIntraPatternChType(cu, pu->Cb(), isUseFilter); - predIntraAng(COMPONENT_Cb, cu.cs->getPredBuf(*pu).Cb(), *pu, isUseFilter); - isUseFilter = IntraPrediction::useFilteredIntraRefSamples(COMPONENT_Cr, *pu, true, *pu); - initIntraPatternChType(cu, pu->Cr(), isUseFilter); - predIntraAng(COMPONENT_Cr, cu.cs->getPredBuf(*pu).Cr(), *pu, isUseFilter); + initIntraPatternChType(cu, pu->Y()); + predIntraAng(COMPONENT_Y, cu.cs->getPredBuf(*pu).Y(), *pu); + if (pu->chromaSize().width > 2) + { + initIntraPatternChType(cu, pu->Cb()); + predIntraAng(COMPONENT_Cb, cu.cs->getPredBuf(*pu).Cb(), *pu); + initIntraPatternChType(cu, pu->Cr()); + predIntraAng(COMPONENT_Cr, cu.cs->getPredBuf(*pu).Cr(), *pu); + } for (int currCompID = 0; currCompID < 3; currCompID++) { + if (pu->chromaSize().width <= 2 && currCompID > 0) + continue; ComponentID currCompID2 = (ComponentID)currCompID; PelBuf tmpBuf = currCompID == 0 ? cu.cs->getPredBuf(*pu).Y() : (currCompID == 1 ? cu.cs->getPredBuf(*pu).Cb() : cu.cs->getPredBuf(*pu).Cr()); switchBuffer(*pu, currCompID2, tmpBuf, getPredictorPtr2(currCompID2, 0)); @@ -931,26 +763,149 @@ inline int isLeftAvailable ( const CodingUnit &cu, const ChannelType &chT inline int isAboveRightAvailable ( const CodingUnit &cu, const ChannelType &chType, const Position &posRT, const uint32_t uiNumUnitsInPU, const uint32_t unitHeight, bool *validFlags ); inline int isBelowLeftAvailable ( const CodingUnit &cu, const ChannelType &chType, const Position &posLB, const uint32_t uiNumUnitsInPU, const uint32_t unitHeight, bool *validFlags ); -void IntraPrediction::initIntraPatternChType(const CodingUnit &cu, const CompArea &area, const bool bFilterRefSamples) +void IntraPrediction::initIntraPatternChType(const CodingUnit &cu, const CompArea &area, const bool forceRefFilterFlag) { + CHECK(area.width == 2, "Width of 2 is not supported"); const CodingStructure& cs = *cu.cs; - Pel *refBufUnfiltered = m_piYuvExt[area.compID][PRED_BUF_UNFILTERED]; - Pel *refBufFiltered = m_piYuvExt[area.compID][PRED_BUF_FILTERED]; + if (!forceRefFilterFlag) + { + initPredIntraParams(*cu.firstPU, area, *cs.sps); + } + + Pel *refBufUnfiltered = m_refBuffer[area.compID][PRED_BUF_UNFILTERED]; + Pel *refBufFiltered = m_refBuffer[area.compID][PRED_BUF_FILTERED]; - setReferenceArrayLengths( cu.ispMode && isLuma( area.compID ) ? cu.blocks[area.compID] : area ); + setReferenceArrayLengths( area ); // ----- Step 1: unfiltered reference samples ----- xFillReferenceSamples( cs.picture->getRecoBuf( area ), refBufUnfiltered, area, cu ); // ----- Step 2: filtered reference samples ----- - if( bFilterRefSamples ) + if( m_ipaParam.refFilterFlag || forceRefFilterFlag ) + { + xFilterReferenceSamples( refBufUnfiltered, refBufFiltered, area, *cs.sps, cu.firstPU->multiRefIdx ); + } +} + +void IntraPrediction::initIntraPatternChTypeISP(const CodingUnit& cu, const CompArea& area, PelBuf& recBuf, const bool forceRefFilterFlag) +{ + const CodingStructure& cs = *cu.cs; + + if (!forceRefFilterFlag) + { + initPredIntraParams(*cu.firstPU, area, *cs.sps); + } + + const Position posLT = area; + bool isLeftAvail = (cs.getCURestricted(posLT.offset(-1, 0), cu, CHANNEL_TYPE_LUMA) != NULL) && cs.isDecomp(posLT.offset(-1, 0), CHANNEL_TYPE_LUMA); + bool isAboveAvail = (cs.getCURestricted(posLT.offset(0, -1), cu, CHANNEL_TYPE_LUMA) != NULL) && cs.isDecomp(posLT.offset(0, -1), CHANNEL_TYPE_LUMA); + // ----- Step 1: unfiltered reference samples ----- + if (cu.blocks[area.compID].x == area.x && cu.blocks[area.compID].y == area.y) + { + Pel *refBufUnfiltered = m_refBuffer[area.compID][PRED_BUF_UNFILTERED]; + // With the first subpartition all the CU reference samples are fetched at once in a single call to xFillReferenceSamples + if (cu.ispMode == HOR_INTRA_SUBPARTITIONS) + { + m_leftRefLength = cu.Y().height << 1; + m_topRefLength = cu.Y().width + area.width; + } + else //if (cu.ispMode == VER_INTRA_SUBPARTITIONS) + { + m_leftRefLength = cu.Y().height + area.height; + m_topRefLength = cu.Y().width << 1; + } + + + xFillReferenceSamples(cs.picture->getRecoBuf(cu.Y()), refBufUnfiltered, cu.Y(), cu); + + // After having retrieved all the CU reference samples, the number of reference samples is now adjusted for the current subpartition + m_topRefLength = cu.blocks[area.compID].width + area.width; + m_leftRefLength = cu.blocks[area.compID].height + area.height; + } + else + { + + m_topRefLength = cu.blocks[area.compID].width + area.width; + m_leftRefLength = cu.blocks[area.compID].height + area.height; + + const int predSizeHor = m_topRefLength; + const int predSizeVer = m_leftRefLength; + if (cu.ispMode == HOR_INTRA_SUBPARTITIONS) + { + Pel* src = recBuf.bufAt(0, -1); + Pel *ref = m_refBuffer[area.compID][PRED_BUF_UNFILTERED] + m_refBufferStride[area.compID]; + if (isLeftAvail) + { + for (int i = 0; i <= 2 * cu.blocks[area.compID].height - area.height; i++) + { + ref[i] = ref[i + area.height]; + } + } + else + { + for (int i = 0; i <= predSizeVer; i++) + { + ref[i] = src[0]; + } + } + Pel *dst = m_refBuffer[area.compID][PRED_BUF_UNFILTERED] + 1; + dst[-1] = ref[0]; + for (int i = 0; i < area.width; i++) + { + dst[i] = src[i]; + } + Pel sample = src[area.width - 1]; + dst += area.width; + for (int i = 0; i < predSizeHor - area.width; i++) + { + dst[i] = sample; + } + } + else + { + Pel* src = recBuf.bufAt(-1, 0); + Pel *ref = m_refBuffer[area.compID][PRED_BUF_UNFILTERED]; + if (isAboveAvail) + { + for (int i = 0; i <= 2 * cu.blocks[area.compID].width - area.width; i++) + { + ref[i] = ref[i + area.width]; + } + } + else + { + for (int i = 0; i <= predSizeHor; i++) + { + ref[i] = src[0]; + } + } + Pel *dst = m_refBuffer[area.compID][PRED_BUF_UNFILTERED] + m_refBufferStride[area.compID] + 1; + dst[-1] = ref[0]; + for (int i = 0; i < area.height; i++) + { + *dst = *src; + src += recBuf.stride; + dst++; + } + Pel sample = src[-recBuf.stride]; + for (int i = 0; i < predSizeVer - area.height; i++) + { + *dst = sample; + dst++; + } + + } + } + // ----- Step 2: filtered reference samples ----- + if (m_ipaParam.refFilterFlag || forceRefFilterFlag) { - xFilterReferenceSamples( refBufUnfiltered, refBufFiltered, area, *cs.sps - , cu.firstPU->multiRefIdx - ); + Pel *refBufUnfiltered = m_refBuffer[area.compID][PRED_BUF_UNFILTERED]; + Pel *refBufFiltered = m_refBuffer[area.compID][PRED_BUF_FILTERED]; + xFilterReferenceSamples(refBufUnfiltered, refBufFiltered, area, *cs.sps, cu.firstPU->multiRefIdx); } } + void IntraPrediction::xFillReferenceSamples( const CPelBuf &recoBuf, Pel* refBufUnfiltered, const CompArea &area, const CodingUnit &cu ) { const ChannelType chType = toChannelType( area.compID ); @@ -964,11 +919,8 @@ void IntraPrediction::xFillReferenceSamples( const CPelBuf &recoBuf, Pel* refBuf const int tuHeight = area.height; const int predSize = m_topRefLength; const int predHSize = m_leftRefLength; - const int cuWidth = cu.blocks[area.compID].width; - const int cuHeight = cu.blocks[area.compID].height; - const int whRatio = cu.ispMode && isLuma(area.compID) ? std::max(1, cuWidth / cuHeight) : std::max(1, tuWidth / tuHeight); - const int hwRatio = cu.ispMode && isLuma(area.compID) ? std::max(1, cuHeight / cuWidth) : std::max(1, tuHeight / tuWidth); - const int predStride = predSize + 1 + (whRatio + 1) * multiRefIdx; + const int predStride = predSize + 1 + multiRefIdx; + m_refBufferStride[area.compID] = predStride; const bool noShift = pcv.noChroma2x2 && area.width == 4; // don't shift on the lowest level (chroma not-split) const int unitWidth = tuWidth <= 2 && cu.ispMode && isLuma(area.compID) ? tuWidth : pcv.minCUWidth >> (noShift ? 0 : getComponentScaleX(area.compID, sps.getChromaFormatIdc())); @@ -1002,7 +954,6 @@ void IntraPrediction::xFillReferenceSamples( const CPelBuf &recoBuf, Pel* refBuf numIntraNeighbor += isBelowLeftAvailable ( cu, chType, posLB, numLeftBelowUnits, unitHeight, (neighborFlags + totalLeftUnits - 1 - numLeftUnits) ); // ----- Step 2: fill reference samples (depending on neighborhood) ----- - CHECK((predHSize + 1) * predStride > m_iYuvExtSize, "Reference sample area not supported"); const Pel* srcBuf = recoBuf.buf; const int srcStride = recoBuf.stride; @@ -1015,15 +966,20 @@ void IntraPrediction::xFillReferenceSamples( const CPelBuf &recoBuf, Pel* refBuf { // Fill border with DC value for (int j = 0; j <= predSize + multiRefIdx; j++) { ptrDst[j] = valueDC; } - for (int i = 1; i <= predHSize + multiRefIdx; i++) { ptrDst[i*predStride] = valueDC; } + for (int i = 0; i <= predHSize + multiRefIdx; i++) + { + ptrDst[i + predStride] = valueDC; + } } else if( numIntraNeighbor == totalUnits ) { // Fill top-left border and top and top right with rec. samples ptrSrc = srcBuf - (1 + multiRefIdx) * srcStride - (1 + multiRefIdx); for (int j = 0; j <= predSize + multiRefIdx; j++) { ptrDst[j] = ptrSrc[j]; } - ptrSrc = srcBuf - multiRefIdx * srcStride - (1 + multiRefIdx); - for (int i = 1; i <= predHSize + multiRefIdx; i++) { ptrDst[i*predStride] = *(ptrSrc); ptrSrc += srcStride; } + for (int i = 0; i <= predHSize + multiRefIdx; i++) + { + ptrDst[i + predStride] = ptrSrc[i * srcStride]; + } } else // reference samples are partially available { @@ -1033,27 +989,28 @@ void IntraPrediction::xFillReferenceSamples( const CPelBuf &recoBuf, Pel* refBuf if (neighborFlags[totalLeftUnits]) { ptrDst[0] = ptrSrc[0]; + ptrDst[predStride] = ptrSrc[0]; for (int i = 1; i <= multiRefIdx; i++) { ptrDst[i] = ptrSrc[i]; - ptrDst[i*predStride] = ptrSrc[i*srcStride]; + ptrDst[i + predStride] = ptrSrc[i * srcStride]; } } // Fill left & below-left samples if available (downwards) ptrSrc += (1 + multiRefIdx) * srcStride; - ptrDst += (1 + multiRefIdx) * predStride; + ptrDst += (1 + multiRefIdx) + predStride; for (int unitIdx = totalLeftUnits - 1; unitIdx > 0; unitIdx--) { if (neighborFlags[unitIdx]) { for (int i = 0; i < unitHeight; i++) { - ptrDst[i*predStride] = ptrSrc[i*srcStride]; + ptrDst[i] = ptrSrc[i * srcStride]; } } ptrSrc += unitHeight * srcStride; - ptrDst += unitHeight * predStride; + ptrDst += unitHeight; } // Fill last below-left sample(s) if (neighborFlags[0]) @@ -1061,7 +1018,7 @@ void IntraPrediction::xFillReferenceSamples( const CPelBuf &recoBuf, Pel* refBuf int lastSample = (predHSize % unitHeight == 0) ? unitHeight : predHSize % unitHeight; for (int i = 0; i < lastSample; i++) { - ptrDst[i*predStride] = ptrSrc[i*srcStride]; + ptrDst[i] = ptrSrc[i * srcStride]; } } @@ -1102,7 +1059,7 @@ void IntraPrediction::xFillReferenceSamples( const CPelBuf &recoBuf, Pel* refBuf } // first available sample - int firstAvailRow = 0; + int firstAvailRow = -1; int firstAvailCol = 0; if (firstAvailUnit < totalLeftUnits) { @@ -1116,7 +1073,7 @@ void IntraPrediction::xFillReferenceSamples( const CPelBuf &recoBuf, Pel* refBuf { firstAvailCol = (firstAvailUnit - totalLeftUnits - 1) * unitWidth + 1 + multiRefIdx; } - const Pel firstAvailSample = ptrDst[firstAvailCol + firstAvailRow * predStride]; + const Pel firstAvailSample = ptrDst[firstAvailRow < 0 ? firstAvailCol : firstAvailRow + predStride]; // last sample below-left (n.a.) int lastRow = predHSize + multiRefIdx; @@ -1124,7 +1081,7 @@ void IntraPrediction::xFillReferenceSamples( const CPelBuf &recoBuf, Pel* refBuf // fill left column for (int i = lastRow; i > firstAvailRow; i--) { - ptrDst[i*predStride] = firstAvailSample; + ptrDst[i + predStride] = firstAvailSample; } // fill top row if (firstAvailCol > 0) @@ -1144,7 +1101,7 @@ void IntraPrediction::xFillReferenceSamples( const CPelBuf &recoBuf, Pel* refBuf if (!neighborFlags[currUnit]) // samples not available { // last available sample - int lastAvailRow = 0; + int lastAvailRow = -1; int lastAvailCol = 0; if (lastAvailUnit < totalLeftUnits) { @@ -1158,21 +1115,21 @@ void IntraPrediction::xFillReferenceSamples( const CPelBuf &recoBuf, Pel* refBuf { lastAvailCol = (lastAvailUnit - totalLeftUnits) * unitWidth + multiRefIdx; } - const Pel lastAvailSample = ptrDst[lastAvailCol + lastAvailRow * predStride]; + const Pel lastAvailSample = ptrDst[lastAvailRow < 0 ? lastAvailCol : lastAvailRow + predStride]; // fill current unit with last available sample if (currUnit < totalLeftUnits) { for (int i = lastAvailRow - 1; i >= lastAvailRow - unitHeight; i--) { - ptrDst[i*predStride] = lastAvailSample; + ptrDst[i + predStride] = lastAvailSample; } } else if (currUnit == totalLeftUnits) { - for (int i = 1; i < multiRefIdx + 1; i++) + for (int i = 0; i < multiRefIdx + 1; i++) { - ptrDst[i*predStride] = lastAvailSample; + ptrDst[i + predStride] = lastAvailSample; } for (int j = 0; j < multiRefIdx + 1; j++) { @@ -1191,265 +1148,166 @@ void IntraPrediction::xFillReferenceSamples( const CPelBuf &recoBuf, Pel* refBuf lastAvailUnit = currUnit; currUnit++; } -} - // padding of extended samples above right with the last sample - int lastSample = multiRefIdx + predSize; - for (int j = 1; j <= whRatio * multiRefIdx; j++) { ptrDst[lastSample + j] = ptrDst[lastSample]; } - // padding of extended samples below left with the last sample - lastSample = multiRefIdx + predHSize; - for (int i = 1; i <= hwRatio * multiRefIdx; i++) { ptrDst[(lastSample + i)*predStride] = ptrDst[lastSample*predStride]; } + } } -void IntraPrediction::xFilterReferenceSamples( const Pel* refBufUnfiltered, Pel* refBufFiltered, const CompArea &area, const SPS &sps - , int multiRefIdx +void IntraPrediction::xFilterReferenceSamples(const Pel *refBufUnfiltered, Pel *refBufFiltered, const CompArea &area, + const SPS &sps, int multiRefIdx ) { if (area.compID != COMPONENT_Y) { multiRefIdx = 0; } - int whRatio = std::max(1, int(area.width / area.height)); - int hwRatio = std::max(1, int(area.height / area.width)); - const int predSize = m_topRefLength + (whRatio + 1) * multiRefIdx; - const int predHSize = m_leftRefLength + (hwRatio + 1) * multiRefIdx; - const int predStride = predSize + 1; - - -#if HEVC_USE_INTRA_SMOOTHING_T32 || HEVC_USE_INTRA_SMOOTHING_T64 - // Strong intra smoothing - ChannelType chType = toChannelType( area.compID ); - if( sps.getUseStrongIntraSmoothing() && isLuma( chType ) ) - { - const Pel bottomLeft = refBufUnfiltered[predStride * predHSize]; - const Pel topLeft = refBufUnfiltered[0]; - const Pel topRight = refBufUnfiltered[predSize]; - - const int threshold = 1 << (sps.getBitDepth( chType ) - 5); - const bool bilinearLeft = abs( (bottomLeft + topLeft) - (2 * refBufUnfiltered[predStride * tuHeight]) ) < threshold; //difference between the - const bool bilinearAbove = abs( (topLeft + topRight) - (2 * refBufUnfiltered[ tuWidth ]) ) < threshold; //ends and the middle - - if( tuWidth >= 32 && tuHeight >= 32 && bilinearLeft && bilinearAbove ) -#if !HEVC_USE_INTRA_SMOOTHING_T32 - if( tuWidth > 32 && tuHeight > 32 ) -#endif -#if !HEVC_USE_INTRA_SMOOTHING_T64 - if( tuWidth < 64 && tuHeight < 64 ) -#endif - { - Pel *piDestPtr = refBufFiltered + (predStride * predHSize); // bottom left - - // apply strong intra smoothing - for (int i = 0; i < predHSize; i++, piDestPtr -= predStride) //left column (bottom to top) - { - *piDestPtr = (((predHSize - i) * bottomLeft) + (i * topLeft) + predHSize / 2) / predHSize; - } - for( uint32_t i = 0; i <= predSize; i++, piDestPtr++ ) //full top row (left-to-right) - { - *piDestPtr = (((predSize - i) * topLeft) + (i * topRight) + predSize / 2) / predSize; - } + const int predSize = m_topRefLength + multiRefIdx; + const int predHSize = m_leftRefLength + multiRefIdx; + const size_t predStride = m_refBufferStride[area.compID]; - return; - } - } -#endif + const Pel topLeft = + (refBufUnfiltered[0] + refBufUnfiltered[1] + refBufUnfiltered[predStride] + refBufUnfiltered[predStride + 1] + 2) + >> 2; - // Regular reference sample filter - const Pel *piSrcPtr = refBufUnfiltered + (predStride * predHSize); // bottom left - Pel *piDestPtr = refBufFiltered + (predStride * predHSize); // bottom left + refBufFiltered[0] = topLeft; - // bottom left (not filtered) - *piDestPtr = *piSrcPtr; - piDestPtr -= predStride; - piSrcPtr -= predStride; - //left column (bottom to top) - for( int i = 1; i < predHSize; i++, piDestPtr -= predStride, piSrcPtr -= predStride) - { - *piDestPtr = (piSrcPtr[predStride] + 2 * piSrcPtr[0] + piSrcPtr[-predStride] + 2) >> 2; - } - //top-left - *piDestPtr = (piSrcPtr[predStride] + 2 * piSrcPtr[0] + piSrcPtr[1] + 2) >> 2; - piDestPtr++; - piSrcPtr++; - //top row (left-to-right) - for( uint32_t i=1; i < predSize; i++, piDestPtr++, piSrcPtr++ ) + for (int i = 1; i < predSize; i++) { - *piDestPtr = (piSrcPtr[1] + 2 * piSrcPtr[0] + piSrcPtr[-1] + 2) >> 2; + refBufFiltered[i] = (refBufUnfiltered[i - 1] + 2 * refBufUnfiltered[i] + refBufUnfiltered[i + 1] + 2) >> 2; } - // top right (not filtered) - *piDestPtr=*piSrcPtr; -} - -bool IntraPrediction::useFilteredIntraRefSamples( const ComponentID &compID, const PredictionUnit &pu, bool modeSpecific, const UnitArea &tuArea ) -{ - const SPS &sps = *pu.cs->sps; - const ChannelType chType = toChannelType( compID ); - - // high level conditions - if( sps.getSpsRangeExtension().getIntraSmoothingDisabledFlag() ) { return false; } - if( !isLuma( chType ) && pu.chromaFormat != CHROMA_444 ) { return false; } - - if( pu.cu->ispMode && isLuma(compID) ) { return false; } + refBufFiltered[predSize] = refBufUnfiltered[predSize]; - if( !modeSpecific ) { return true; } + refBufFiltered += predStride; + refBufUnfiltered += predStride; - if (pu.multiRefIdx) { return false; } + refBufFiltered[0] = topLeft; - // pred. mode related conditions - const int dirMode = PU::getFinalIntraMode( pu, chType ); - int predMode = getWideAngle(tuArea.blocks[compID].width, tuArea.blocks[compID].height, dirMode); - if (predMode != dirMode ) { return true; } - if (dirMode == DC_IDX) { return false; } - if (dirMode == PLANAR_IDX) + for (int i = 1; i < predHSize; i++) { - return tuArea.blocks[compID].width * tuArea.blocks[compID].height > 32 ? true : false; + refBufFiltered[i] = (refBufUnfiltered[i - 1] + 2 * refBufUnfiltered[i] + refBufUnfiltered[i + 1] + 2) >> 2; } - - int diff = std::min<int>( abs( dirMode - HOR_IDX ), abs( dirMode - VER_IDX ) ); - int log2Size = ((g_aucLog2[tuArea.blocks[compID].width] + g_aucLog2[tuArea.blocks[compID].height]) >> 1); - CHECK( log2Size >= MAX_INTRA_FILTER_DEPTHS, "Size not supported" ); - return (diff > m_aucIntraFilter[chType][log2Size]); + refBufFiltered[predHSize] = refBufUnfiltered[predHSize]; } - bool isAboveLeftAvailable(const CodingUnit &cu, const ChannelType &chType, const Position &posLT) { const CodingStructure& cs = *cu.cs; const Position refPos = posLT.offset(-1, -1); - const CodingUnit* pcCUAboveLeft = cs.isDecomp( refPos, chType ) ? cs.getCURestricted( refPos, cu, chType ) : nullptr; - const bool isConstrained = cs.pps->getConstrainedIntraPred(); - bool bAboveLeftFlag; - if (isConstrained) - { - bAboveLeftFlag = pcCUAboveLeft && CU::isIntra(*pcCUAboveLeft); - } - else + if (!cs.isDecomp(refPos, chType)) { - bAboveLeftFlag = (pcCUAboveLeft ? true : false); + return false; } - return bAboveLeftFlag; + return (cs.getCURestricted(refPos, cu, chType) != NULL); } int isAboveAvailable(const CodingUnit &cu, const ChannelType &chType, const Position &posLT, const uint32_t uiNumUnitsInPU, const uint32_t unitWidth, bool *bValidFlags) { const CodingStructure& cs = *cu.cs; - const bool isConstrained = cs.pps->getConstrainedIntraPred(); - bool *pbValidFlags = bValidFlags; - int iNumIntra = 0; - int maxDx = uiNumUnitsInPU * unitWidth; - for (uint32_t dx = 0; dx < maxDx; dx += unitWidth) + bool * validFlags = bValidFlags; + int numIntra = 0; + const int maxDx = uiNumUnitsInPU * unitWidth; + + for (int dx = 0; dx < maxDx; dx += unitWidth) { const Position refPos = posLT.offset(dx, -1); - const CodingUnit* pcCUAbove = cs.isDecomp(refPos, chType) ? cs.getCURestricted(refPos, cu, chType) : nullptr; - - if( pcCUAbove && ( ( isConstrained && CU::isIntra( *pcCUAbove ) ) || !isConstrained ) ) - { - iNumIntra++; - *pbValidFlags = true; - } - else if( !pcCUAbove ) + if (!cs.isDecomp(refPos, chType)) { - return iNumIntra; + break; } - pbValidFlags++; + const bool valid = (cs.getCURestricted(refPos, cu, chType) != NULL); + numIntra += valid ? 1 : 0; + *validFlags = valid; + + validFlags++; } - return iNumIntra; + + return numIntra; } int isLeftAvailable(const CodingUnit &cu, const ChannelType &chType, const Position &posLT, const uint32_t uiNumUnitsInPU, const uint32_t unitHeight, bool *bValidFlags) { const CodingStructure& cs = *cu.cs; - const bool isConstrained = cs.pps->getConstrainedIntraPred(); - bool *pbValidFlags = bValidFlags; - int iNumIntra = 0; - int maxDy = uiNumUnitsInPU * unitHeight; - for (uint32_t dy = 0; dy < maxDy; dy += unitHeight) + bool * validFlags = bValidFlags; + int numIntra = 0; + const int maxDy = uiNumUnitsInPU * unitHeight; + + for (int dy = 0; dy < maxDy; dy += unitHeight) { const Position refPos = posLT.offset(-1, dy); - const CodingUnit* pcCULeft = cs.isDecomp(refPos, chType) ? cs.getCURestricted(refPos, cu, chType) : nullptr; - - if( pcCULeft && ( ( isConstrained && CU::isIntra( *pcCULeft ) ) || !isConstrained ) ) - { - iNumIntra++; - *pbValidFlags = true; - } - else if( !pcCULeft ) + if (!cs.isDecomp(refPos, chType)) { - return iNumIntra; + break; } - pbValidFlags--; // opposite direction + const bool valid = (cs.getCURestricted(refPos, cu, chType) != NULL); + numIntra += valid ? 1 : 0; + *validFlags = valid; + + validFlags--; } - return iNumIntra; + return numIntra; } int isAboveRightAvailable(const CodingUnit &cu, const ChannelType &chType, const Position &posRT, const uint32_t uiNumUnitsInPU, const uint32_t unitWidth, bool *bValidFlags ) { const CodingStructure& cs = *cu.cs; - const bool isConstrained = cs.pps->getConstrainedIntraPred(); - bool *pbValidFlags = bValidFlags; - int iNumIntra = 0; - uint32_t maxDx = uiNumUnitsInPU * unitWidth; + bool * validFlags = bValidFlags; + int numIntra = 0; + const int maxDx = uiNumUnitsInPU * unitWidth; - for (uint32_t dx = 0; dx < maxDx; dx += unitWidth) + for (int dx = 0; dx < maxDx; dx += unitWidth) { const Position refPos = posRT.offset(unitWidth + dx, -1); - const CodingUnit* pcCUAbove = cs.isDecomp(refPos, chType) ? cs.getCURestricted(refPos, cu, chType) : nullptr; - - if( pcCUAbove && ( ( isConstrained && CU::isIntra( *pcCUAbove ) ) || !isConstrained ) ) + if (!cs.isDecomp(refPos, chType)) { - iNumIntra++; - *pbValidFlags = true; - } - else if( !pcCUAbove ) - { - return iNumIntra; + break; } - pbValidFlags++; + const bool valid = (cs.getCURestricted(refPos, cu, chType) != NULL); + numIntra += valid ? 1 : 0; + *validFlags = valid; + + validFlags++; } - return iNumIntra; + return numIntra; } int isBelowLeftAvailable(const CodingUnit &cu, const ChannelType &chType, const Position &posLB, const uint32_t uiNumUnitsInPU, const uint32_t unitHeight, bool *bValidFlags ) { const CodingStructure& cs = *cu.cs; - const bool isConstrained = cs.pps->getConstrainedIntraPred(); - bool *pbValidFlags = bValidFlags; - int iNumIntra = 0; - int maxDy = uiNumUnitsInPU * unitHeight; - for (uint32_t dy = 0; dy < maxDy; dy += unitHeight) + bool * validFlags = bValidFlags; + int numIntra = 0; + const int maxDy = uiNumUnitsInPU * unitHeight; + + for (int dy = 0; dy < maxDy; dy += unitHeight) { const Position refPos = posLB.offset(-1, unitHeight + dy); - const CodingUnit* pcCULeft = cs.isDecomp(refPos, chType) ? cs.getCURestricted(refPos, cu, chType) : nullptr; - - if( pcCULeft && ( ( isConstrained && CU::isIntra( *pcCULeft ) ) || !isConstrained ) ) - { - iNumIntra++; - *pbValidFlags = true; - } - else if ( !pcCULeft ) + if (!cs.isDecomp(refPos, chType)) { - return iNumIntra; + break; } - pbValidFlags--; // opposite direction + const bool valid = (cs.getCURestricted(refPos, cu, chType) != NULL); + numIntra += valid ? 1 : 0; + *validFlags = valid; + + validFlags--; } - return iNumIntra; + return numIntra; } + // LumaRecPixels void IntraPrediction::xGetLumaRecPixels(const PredictionUnit &pu, CompArea chromaArea) { @@ -1463,15 +1321,14 @@ void IntraPrediction::xGetLumaRecPixels(const PredictionUnit &pu, CompArea chrom } else { - iDstStride = MAX_CU_SIZE + 1; - pDst0 = m_piTemp + iDstStride + 1; //MMLM_SAMPLE_NEIGHBOR_LINES; + iDstStride = MAX_CU_SIZE + 1; + pDst0 = m_piTemp + iDstStride + 1; //MMLM_SAMPLE_NEIGHBOR_LINES; } //assert 420 chroma subsampling CompArea lumaArea = CompArea( COMPONENT_Y, pu.chromaFormat, chromaArea.lumaPos(), recalcSize( pu.chromaFormat, CHANNEL_TYPE_CHROMA, CHANNEL_TYPE_LUMA, chromaArea.size() ) );//needed for correct pos/size (4x4 Tus) - - CHECK( lumaArea.width == chromaArea.width, "" ); - CHECK( lumaArea.height == chromaArea.height, "" ); + CHECK(lumaArea.width == chromaArea.width && CHROMA_444 != pu.chromaFormat, ""); + CHECK(lumaArea.height == chromaArea.height && CHROMA_444 != pu.chromaFormat && CHROMA_422 != pu.chromaFormat, ""); const SizeType uiCWidth = chromaArea.width; const SizeType uiCHeight = chromaArea.height; @@ -1479,7 +1336,11 @@ void IntraPrediction::xGetLumaRecPixels(const PredictionUnit &pu, CompArea chrom const CPelBuf Src = pu.cs->picture->getRecoBuf( lumaArea ); Pel const* pRecSrc0 = Src.bufAt( 0, 0 ); int iRecStride = Src.stride; - int iRecStride2 = iRecStride << 1; + int logSubWidthC = getChannelTypeScaleX(CHANNEL_TYPE_CHROMA, pu.chromaFormat); + int logSubHeightC = getChannelTypeScaleY(CHANNEL_TYPE_CHROMA, pu.chromaFormat); + + int iRecStride2 = iRecStride << logSubHeightC; + const int mult = 1 << logSubWidthC ; const CodingUnit& lumaCU = isChroma( pu.chType ) ? *pu.cs->picture->cs->getCU( lumaArea.pos(), CH_L ) : *pu.cu; const CodingUnit& cu = *pu.cu; @@ -1492,13 +1353,14 @@ void IntraPrediction::xGetLumaRecPixels(const PredictionUnit &pu, CompArea chrom int iBaseUnitSize = ( 1 << MIN_CU_LOG2 ); const int iUnitWidth = iBaseUnitSize >> getComponentScaleX( area.compID, area.chromaFormat ); - const int iUnitHeight = iBaseUnitSize >> getComponentScaleX( area.compID, area.chromaFormat ); - const int iTUWidthInUnits = uiTuWidth / iUnitWidth; + const int iUnitHeight = iBaseUnitSize >> getComponentScaleY(area.compID, area.chromaFormat); + + const int iTUWidthInUnits = uiTuWidth / iUnitWidth; const int iTUHeightInUnits = uiTuHeight / iUnitHeight; const int iAboveUnits = iTUWidthInUnits; const int iLeftUnits = iTUHeightInUnits; const int chromaUnitWidth = iBaseUnitSize >> getComponentScaleX(COMPONENT_Cb, area.chromaFormat); - const int chromaUnitHeight = iBaseUnitSize >> getComponentScaleX(COMPONENT_Cb, area.chromaFormat); + const int chromaUnitHeight = iBaseUnitSize >> getComponentScaleY(COMPONENT_Cb, area.chromaFormat); const int topTemplateSampNum = 2 * uiCWidth; // for MDLM, the number of template samples is 2W or 2H. const int leftTemplateSampNum = 2 * uiCHeight; assert(m_topRefLength >= topTemplateSampNum); @@ -1538,7 +1400,33 @@ void IntraPrediction::xGetLumaRecPixels(const PredictionUnit &pu, CompArea chrom Pel* pDst = nullptr; Pel const* piSrc = nullptr; - bool isFirstRowOfCtu = ((pu.block(COMPONENT_Cb).y)&(((pu.cs->sps)->getMaxCUWidth() >> 1) - 1)) == 0; + bool isFirstRowOfCtu = ( lumaArea.y & ((pu.cs->sps)->getCTUSize() - 1) ) == 0; + const int strOffset = (CHROMA_444 == pu.chromaFormat) ? 0 : iRecStride; + + int c0_2tap = 1, c1_2tap = 1, offset_2tap = 1, shift_2tap = 1; //sum = 2 + int c0_3tap = 2, c1_3tap = 1, c2_3tap = 1, offset_3tap = 2, shift_3tap = 2; //sum = 4 + int c0_5tap = 1, c1_5tap = 4, c2_5tap = 1, c3_5tap = 1, c4_5tap = 1, offset_5tap = 4, shift_5tap = 3; //sum = 8 + int c0_6tap = 2, c1_6tap = 1, c2_6tap = 1, c3_6tap = 2, c4_6tap = 1, c5_6tap = 1, offset_6tap = 4, shift_6tap = 3; //sum = 8 + + switch (pu.chromaFormat) + { + case CHROMA_422: //overwrite filter coefficient values for 422 + c0_2tap = 1, c1_2tap = 0, offset_2tap = 0, shift_2tap = 0; //sum = 1 + c0_3tap = 2, c1_3tap = 1, c2_3tap = 1, offset_3tap = 2, shift_3tap = 2; //sum = 4 + c0_5tap = 0, c1_5tap = 1, c2_5tap = 0, c3_5tap = 0, c4_5tap = 0, offset_5tap = 0, shift_5tap = 0; //sum = 1 + c0_6tap = 2, c1_6tap = 1, c2_6tap = 1, c3_6tap = 0, c4_6tap = 0, c5_6tap = 0, offset_6tap = 2, shift_6tap = 2; //sum = 4 + break; + + case CHROMA_444: //overwrite filter coefficient values for 422 + c0_2tap = 1, c1_2tap = 0, offset_2tap = 0, shift_2tap = 0; //sum = 1 + c0_3tap = 1, c1_3tap = 0, c2_3tap = 0, offset_3tap = 0, shift_3tap = 0; //sum = 1 + c0_5tap = 0, c1_5tap = 1, c2_5tap = 0, c3_5tap = 0, c4_5tap = 0, offset_5tap = 0, shift_5tap = 0; //sum = 1 + c0_6tap = 1, c1_6tap = 0, c2_6tap = 0, c3_6tap = 0, c4_6tap = 0, c5_6tap = 0, offset_6tap = 0, shift_6tap = 0; //sum = 1 + break; + + default: + break; + } if( bAboveAvaillable ) { @@ -1554,44 +1442,44 @@ void IntraPrediction::xGetLumaRecPixels(const PredictionUnit &pu, CompArea chrom { piSrc = pRecSrc0 - iRecStride; - if (i == 0 && !bLeftAvaillable) + if ((i == 0 && !bLeftAvaillable) || (i == uiCWidth + addedAboveRight - 1 + logSubWidthC)) { - pDst[i] = piSrc[2 * i]; + pDst[i] = piSrc[mult * i]; } else { - pDst[i] = ( piSrc[2 * i] * 2 + piSrc[2 * i - 1] + piSrc[2 * i + 1] + 2 ) >> 2; + pDst[i] = (piSrc[mult * i] * c0_3tap + piSrc[mult * i - 1] * c1_3tap + piSrc[mult * i + 1] * c2_3tap + offset_3tap) >> shift_3tap; } } else if( pu.cs->sps->getCclmCollocatedChromaFlag() ) { piSrc = pRecSrc0 - iRecStride2; - if( i == 0 && !bLeftAvaillable ) + if ((i == 0 && !bLeftAvaillable) || (i == uiCWidth + addedAboveRight - 1 + logSubWidthC)) { - pDst[i] = ( piSrc[2 * i] * 2 + piSrc[2 * i - iRecStride] + piSrc[2 * i + iRecStride] + 2 ) >> 2; + pDst[i] = (piSrc[mult * i] * c0_3tap + piSrc[mult * i - strOffset] * c1_3tap + piSrc[mult * i + strOffset] * c2_3tap + offset_3tap) >> shift_3tap; } else { - pDst[i] = ( piSrc[2 * i - iRecStride] - + piSrc[2 * i ] * 4 + piSrc[2 * i - 1] + piSrc[2 * i + 1] - + piSrc[2 * i + iRecStride] - + 4 ) >> 3; + pDst[i] = (piSrc[mult * i - strOffset] * c0_5tap + + piSrc[mult * i] * c1_5tap + piSrc[mult * i - 1] * c2_5tap + piSrc[mult * i + 1] * c3_5tap + + piSrc[mult * i + strOffset] * c4_5tap + + offset_5tap) >> shift_5tap; } } else { piSrc = pRecSrc0 - iRecStride2; - if (i == 0 && !bLeftAvaillable) + if ((i == 0 && !bLeftAvaillable) || (i == uiCWidth + addedAboveRight - 1 + logSubWidthC)) { - pDst[i] = ( piSrc[2 * i] + piSrc[2 * i + iRecStride] + 1 ) >> 1; + pDst[i] = (piSrc[mult * i] * c0_2tap + piSrc[mult * i + strOffset] * c1_2tap + offset_2tap) >> shift_2tap; } else { - pDst[i] = ( ( ( piSrc[2 * i ] * 2 ) + piSrc[2 * i - 1 ] + piSrc[2 * i + 1 ] ) - + ( ( piSrc[2 * i + iRecStride] * 2 ) + piSrc[2 * i - 1 + iRecStride] + piSrc[2 * i + 1 + iRecStride] ) - + 4 ) >> 3; + pDst[i] = ((piSrc[mult * i] * c0_6tap + piSrc[mult * i - 1] * c1_6tap + piSrc[mult * i + 1] * c2_6tap) + + (piSrc[mult * i + strOffset] * c3_6tap + piSrc[mult * i - 1 + strOffset] * c4_6tap + piSrc[mult * i + 1 + strOffset] * c5_6tap) + + offset_6tap) >> shift_6tap; } } } @@ -1600,33 +1488,36 @@ void IntraPrediction::xGetLumaRecPixels(const PredictionUnit &pu, CompArea chrom if( bLeftAvaillable ) { pDst = pDst0 - 1; - piSrc = pRecSrc0 - 3; + + piSrc = pRecSrc0 - 2 - logSubWidthC; + int addedLeftBelow = 0; if ((curChromaMode == MDLM_L_IDX) || (curChromaMode == MDLM_T_IDX)) { addedLeftBelow = avaiLeftBelowUnits*chromaUnitHeight; } + for (int j = 0; j < uiCHeight + addedLeftBelow; j++) { if( pu.cs->sps->getCclmCollocatedChromaFlag() ) { - if( j == 0 && !bAboveAvaillable ) + if ((j == 0 && !bAboveAvaillable) || (j == uiCHeight + addedLeftBelow - 1 + logSubWidthC)) { - pDst[0] = ( piSrc[1] * 2 + piSrc[0] + piSrc[2] + 2 ) >> 2; + pDst[0] = ( piSrc[1] * c0_3tap + piSrc[0] * c1_3tap + piSrc[2] * c2_3tap + offset_3tap) >> shift_3tap; } else { - pDst[0] = ( piSrc[1 - iRecStride] - + piSrc[1 ] * 4 + piSrc[0] + piSrc[2] - + piSrc[1 + iRecStride] - + 4 ) >> 3; + pDst[0] = ( piSrc[1 - strOffset] * c0_5tap + + piSrc[1 ] * c1_5tap + piSrc[0] * c2_5tap + piSrc[2] * c3_5tap + + piSrc[1 + strOffset] * c4_5tap + + offset_5tap ) >> shift_5tap; } } else { - pDst[0] = ( ( piSrc[1 ] * 2 + piSrc[0 ] + piSrc[2 ] ) - + ( piSrc[1 + iRecStride] * 2 + piSrc[iRecStride] + piSrc[2 + iRecStride] ) - + 4 ) >> 3; + pDst[0] = ((piSrc[1] * c0_6tap + piSrc[0] * c1_6tap + piSrc[2] * c2_6tap) + + (piSrc[1 + strOffset] * c3_6tap + piSrc[strOffset] * c4_6tap + piSrc[2 + strOffset] * c5_6tap) + + offset_6tap) >> shift_6tap; } piSrc += iRecStride2; @@ -1634,7 +1525,6 @@ void IntraPrediction::xGetLumaRecPixels(const PredictionUnit &pu, CompArea chrom } } - // inner part from reconstructed picture buffer for( int j = 0; j < uiCHeight; j++ ) { @@ -1644,38 +1534,47 @@ void IntraPrediction::xGetLumaRecPixels(const PredictionUnit &pu, CompArea chrom { if( i == 0 && !bLeftAvaillable ) { - if( j == 0 && !bAboveAvaillable ) + if ( j == 0 && !bAboveAvaillable ) { - pDst0[i] = pRecSrc0[2 * i]; + pDst0[i] = pRecSrc0[mult * i]; } else { - pDst0[i] = ( pRecSrc0[2 * i] * 2 + pRecSrc0[2 * i - iRecStride] + pRecSrc0[2 * i + iRecStride] + 2 ) >> 2; + pDst0[i] = (pRecSrc0[mult * i] * c0_3tap + pRecSrc0[mult * i - strOffset] * c1_3tap + pRecSrc0[mult * i + strOffset] * c2_3tap + offset_3tap) >> shift_3tap; } } - else if( j == 0 && !bAboveAvaillable ) + else if ( j == 0 && !bAboveAvaillable ) { - pDst0[i] = ( pRecSrc0[2 * i] * 2 + pRecSrc0[2 * i - 1] + pRecSrc0[2 * i + 1] + 2 ) >> 2; + pDst0[i] = (pRecSrc0[mult * i] * c0_3tap + pRecSrc0[mult * i - 1] * c1_3tap + pRecSrc0[mult * i + 1] * c2_3tap + offset_3tap) >> shift_3tap; } else { - pDst0[i] = ( pRecSrc0[2 * i - iRecStride] - + pRecSrc0[2 * i ] * 4 + pRecSrc0[2 * i - 1] + pRecSrc0[2 * i + 1] - + pRecSrc0[2 * i + iRecStride] - + 4 ) >> 3; + pDst0[i] = (pRecSrc0[mult * i - strOffset] * c0_5tap + + pRecSrc0[mult * i] * c1_5tap + pRecSrc0[mult * i - 1] * c2_5tap + pRecSrc0[mult * i + 1] * c3_5tap + + pRecSrc0[mult * i + strOffset] * c4_5tap + + offset_5tap) >> shift_5tap; } } else { - if( i == 0 && !bLeftAvaillable ) + + if ((i == 0 && !bLeftAvaillable) || (i == uiCWidth - 1 + logSubWidthC)) { - pDst0[i] = ( pRecSrc0[2 * i] + pRecSrc0[2 * i + iRecStride] + 1 ) >> 1; + pDst0[i] = (pRecSrc0[mult * i] * c0_2tap + pRecSrc0[mult * i + strOffset] * c1_2tap + offset_2tap) >> shift_2tap; } else { - pDst0[i] = ( pRecSrc0[2 * i ] * 2 + pRecSrc0[2 * i + 1 ] + pRecSrc0[2 * i - 1 ] - + pRecSrc0[2 * i + iRecStride] * 2 + pRecSrc0[2 * i + 1 + iRecStride] + pRecSrc0[2 * i - 1 + iRecStride] - + 4 ) >> 3; + int s = offset_6tap; + s += pRecSrc0[mult * i] * c0_6tap; + s += pRecSrc0[mult * i + 1] * c1_6tap; + s += pRecSrc0[mult * i - 1] * c2_6tap; + if (pu.chromaFormat == CHROMA_420) + { + s += pRecSrc0[mult * i + strOffset] * c3_6tap; + s += pRecSrc0[mult * i + 1 + strOffset] * c4_6tap; + s += pRecSrc0[mult * i - 1 + strOffset] * c5_6tap; + } + pDst0[i] = s >> shift_6tap; } } } @@ -1705,7 +1604,7 @@ void IntraPrediction::xGetLMParameters(const PredictionUnit &pu, const Component const int baseUnitSize = 1 << MIN_CU_LOG2; const int unitWidth = baseUnitSize >> getComponentScaleX(chromaArea.compID, nChromaFormat); - const int unitHeight = baseUnitSize >> getComponentScaleX(chromaArea.compID, nChromaFormat); + const int unitHeight = baseUnitSize >> getComponentScaleY(chromaArea.compID, nChromaFormat); const int tuWidthInUnits = tuWidth / unitWidth; const int tuHeightInUnits = tuHeight / unitHeight; @@ -1751,7 +1650,7 @@ void IntraPrediction::xGetLMParameters(const PredictionUnit &pu, const Component avaiAboveRightUnits = isAboveRightAvailable(cu, CHANNEL_TYPE_CHROMA, chromaArea.topRightComp(chromaArea.compID), aboveRightUnits, unitWidth, (neighborFlags + leftUnits + leftBelowUnits + aboveUnits + 1)); } Pel *srcColor0, *curChroma0; - int srcStride, curStride; + int srcStride; PelBuf temp; if ((curChromaMode == MDLM_L_IDX) || (curChromaMode == MDLM_T_IDX)) @@ -1761,15 +1660,12 @@ void IntraPrediction::xGetLMParameters(const PredictionUnit &pu, const Component } else { - srcStride = MAX_CU_SIZE + 1; - temp = PelBuf(m_piTemp + srcStride + 1, srcStride, Size(chromaArea)); + srcStride = MAX_CU_SIZE + 1; + temp = PelBuf(m_piTemp + srcStride + 1, srcStride, Size(chromaArea)); } srcColor0 = temp.bufAt(0, 0); curChroma0 = getPredictorPtr(compID); - curStride = m_topRefLength + 1; - - curChroma0 += curStride + 1; unsigned internalBitDepth = sps.getBitDepth(CHANNEL_TYPE_CHROMA); @@ -1777,71 +1673,89 @@ void IntraPrediction::xGetLMParameters(const PredictionUnit &pu, const Component int maxLuma[2] = { -MAX_INT, 0 }; Pel *src = srcColor0 - srcStride; - Pel *cur = curChroma0 - curStride; - int minDim = 1; int actualTopTemplateSampNum = 0; int actualLeftTemplateSampNum = 0; if (curChromaMode == MDLM_T_IDX) { leftAvailable = 0; + avaiAboveRightUnits = avaiAboveRightUnits > (cHeight/unitWidth) ? cHeight/unitWidth : avaiAboveRightUnits; actualTopTemplateSampNum = unitWidth*(avaiAboveUnits + avaiAboveRightUnits); - minDim = actualTopTemplateSampNum; } else if (curChromaMode == MDLM_L_IDX) { aboveAvailable = 0; + avaiLeftBelowUnits = avaiLeftBelowUnits > (cWidth/unitHeight) ? cWidth/unitHeight : avaiLeftBelowUnits; actualLeftTemplateSampNum = unitHeight*(avaiLeftUnits + avaiLeftBelowUnits); - minDim = actualLeftTemplateSampNum; } else if (curChromaMode == LM_CHROMA_IDX) { actualTopTemplateSampNum = cWidth; actualLeftTemplateSampNum = cHeight; - minDim = leftAvailable && aboveAvailable ? 1 << g_aucPrevLog2[std::min(actualLeftTemplateSampNum, actualTopTemplateSampNum)] - : 1 << g_aucPrevLog2[leftAvailable ? actualLeftTemplateSampNum : actualTopTemplateSampNum]; } - int numSteps = minDim; + int startPos[2]; //0:Above, 1: Left + int pickStep[2]; + + int aboveIs4 = leftAvailable ? 0 : 1; + int leftIs4 = aboveAvailable ? 0 : 1; + + startPos[0] = actualTopTemplateSampNum >> (2 + aboveIs4); + pickStep[0] = std::max(1, actualTopTemplateSampNum >> (1 + aboveIs4)); + + startPos[1] = actualLeftTemplateSampNum >> (2 + leftIs4); + pickStep[1] = std::max(1, actualLeftTemplateSampNum >> (1 + leftIs4)); + Pel selectLumaPix[4] = { 0, 0, 0, 0 }; + Pel selectChromaPix[4] = { 0, 0, 0, 0 }; + + int cntT, cntL; + cntT = cntL = 0; + int cnt = 0; if (aboveAvailable) { - for (int j = 0; j < numSteps; j++) + cntT = std::min(actualTopTemplateSampNum, (1 + aboveIs4) << 1); + src = srcColor0 - srcStride; + const Pel *cur = curChroma0 + 1; + for (int pos = startPos[0]; cnt < cntT; pos += pickStep[0], cnt++) { - int idx = (j * actualTopTemplateSampNum) / minDim; - - if (minLuma[0] > src[idx]) - { - minLuma[0] = src[idx]; - minLuma[1] = cur[idx]; - } - if (maxLuma[0] < src[idx]) - { - maxLuma[0] = src[idx]; - maxLuma[1] = cur[idx]; - } + selectLumaPix[cnt] = src[pos]; + selectChromaPix[cnt] = cur[pos]; } } if (leftAvailable) { + cntL = std::min(actualLeftTemplateSampNum, ( 1 + leftIs4 ) << 1 ); src = srcColor0 - 1; - cur = curChroma0 - 1; - - for (int i = 0; i < numSteps; i++) + const Pel *cur = curChroma0 + m_refBufferStride[compID] + 1; + for (int pos = startPos[1], cnt = 0; cnt < cntL; pos += pickStep[1], cnt++) { - int idx = (i * actualLeftTemplateSampNum) / minDim; - - if (minLuma[0] > src[srcStride * idx]) - { - minLuma[0] = src[srcStride * idx]; - minLuma[1] = cur[curStride * idx]; - } - if (maxLuma[0] < src[srcStride * idx]) - { - maxLuma[0] = src[srcStride * idx]; - maxLuma[1] = cur[curStride * idx]; - } + selectLumaPix[cnt + cntT] = src[pos * srcStride]; + selectChromaPix[cnt + cntT] = cur[pos]; } } + cnt = cntL + cntT; + + if (cnt == 2) + { + selectLumaPix[3] = selectLumaPix[0]; selectChromaPix[3] = selectChromaPix[0]; + selectLumaPix[2] = selectLumaPix[1]; selectChromaPix[2] = selectChromaPix[1]; + selectLumaPix[0] = selectLumaPix[1]; selectChromaPix[0] = selectChromaPix[1]; + selectLumaPix[1] = selectLumaPix[3]; selectChromaPix[1] = selectChromaPix[3]; + } + + int minGrpIdx[2] = { 0, 2 }; + int maxGrpIdx[2] = { 1, 3 }; + int *tmpMinGrp = minGrpIdx; + int *tmpMaxGrp = maxGrpIdx; + if (selectLumaPix[tmpMinGrp[0]] > selectLumaPix[tmpMinGrp[1]]) std::swap(tmpMinGrp[0], tmpMinGrp[1]); + if (selectLumaPix[tmpMaxGrp[0]] > selectLumaPix[tmpMaxGrp[1]]) std::swap(tmpMaxGrp[0], tmpMaxGrp[1]); + if (selectLumaPix[tmpMinGrp[0]] > selectLumaPix[tmpMaxGrp[1]]) std::swap(tmpMinGrp, tmpMaxGrp); + if (selectLumaPix[tmpMinGrp[1]] > selectLumaPix[tmpMaxGrp[0]]) std::swap(tmpMinGrp[1], tmpMaxGrp[0]); + + minLuma[0] = (selectLumaPix[tmpMinGrp[0]] + selectLumaPix[tmpMinGrp[1]] + 1 )>>1; + minLuma[1] = (selectChromaPix[tmpMinGrp[0]] + selectChromaPix[tmpMinGrp[1]] + 1) >> 1; + maxLuma[0] = (selectLumaPix[tmpMaxGrp[0]] + selectLumaPix[tmpMaxGrp[1]] + 1 )>>1; + maxLuma[1] = (selectChromaPix[tmpMaxGrp[0]] + selectChromaPix[tmpMaxGrp[1]] + 1) >> 1; if (leftAvailable || aboveAvailable) { @@ -1862,7 +1776,8 @@ void IntraPrediction::xGetLMParameters(const PredictionUnit &pu, const Component int add = 1 << y >> 1; a = (diffC * v + add) >> y; iShift = 3 + x - y; - if ( iShift < 1 ) { + if ( iShift < 1 ) + { iShift = 1; a = ( (a == 0)? 0: (a < 0)? -15 : 15 ); // a=Sign(a)*15 } @@ -1885,4 +1800,108 @@ void IntraPrediction::xGetLMParameters(const PredictionUnit &pu, const Component } } +void IntraPrediction::initIntraMip( const PredictionUnit &pu, const CompArea &area ) +{ + CHECK( area.width > MIP_MAX_WIDTH || area.height > MIP_MAX_HEIGHT, "Error: block size not supported for MIP" ); + + // prepare input (boundary) data for prediction + CHECK( m_ipaParam.refFilterFlag, "ERROR: unfiltered refs expected for MIP" ); + Pel *ptrSrc = getPredictorPtr( COMPONENT_Y ); + const int srcStride = m_refBufferStride[COMPONENT_Y]; + const int srcHStride = 2; + + m_matrixIntraPred.prepareInputForPred( CPelBuf( ptrSrc, srcStride, srcHStride ), area, pu.cu->slice->getSPS()->getBitDepth( CHANNEL_TYPE_LUMA ) ); +} + +void IntraPrediction::predIntraMip( const ComponentID compId, PelBuf &piPred, const PredictionUnit &pu ) +{ + CHECK( compId != COMPONENT_Y, "Error: chroma not supported" ); + CHECK( piPred.width > MIP_MAX_WIDTH || piPred.height > MIP_MAX_HEIGHT, "Error: block size not supported for MIP" ); + CHECK( piPred.width != (1 << floorLog2(piPred.width)) || piPred.height != (1 << floorLog2(piPred.height)), "Error: expecting blocks of size 2^M x 2^N" ); + + // generate mode-specific prediction + const int bitDepth = pu.cu->slice->getSPS()->getBitDepth( CHANNEL_TYPE_LUMA ); + + static_vector<int, MIP_MAX_WIDTH* MIP_MAX_HEIGHT> predMip( piPred.width * piPred.height ); + m_matrixIntraPred.predBlock( predMip.data(), pu.intraDir[CHANNEL_TYPE_LUMA], pu.mipTransposedFlag, bitDepth ); + + for( int y = 0; y < piPred.height; y++ ) + { + for( int x = 0; x < piPred.width; x++ ) + { + piPred.at( x, y ) = Pel(predMip[y * piPred.width + x]); + } + } +} +void IntraPrediction::reorderPLT(CodingStructure& cs, Partitioner& partitioner, ComponentID compBegin, uint32_t numComp) +{ + CodingUnit &cu = *cs.getCU(partitioner.chType); + + uint8_t reusePLTSizetmp = 0; + uint8_t pltSizetmp = 0; + Pel curPLTtmp[MAX_NUM_COMPONENT][MAXPLTSIZE]; + bool curPLTpred[MAXPLTPREDSIZE]; + + for (int idx = 0; idx < MAXPLTPREDSIZE; idx++) + { + curPLTpred[idx] = false; + cu.reuseflag[compBegin][idx] = false; + } + for (int idx = 0; idx < MAXPLTSIZE; idx++) + { + curPLTpred[idx] = false; + } + + for (int predidx = 0; predidx < cs.prevPLT.curPLTSize[compBegin]; predidx++) + { + bool match = false; + int curidx = 0; + + for (curidx = 0; curidx < cu.curPLTSize[compBegin]; curidx++) + { + bool matchTmp = true; + for (int comp = compBegin; comp < (compBegin + numComp); comp++) + { + matchTmp = matchTmp && (cu.curPLT[comp][curidx] == cs.prevPLT.curPLT[comp][predidx]); + } + if (matchTmp) + { + match = true; + break; + } + } + + if (match) + { + cu.reuseflag[compBegin][predidx] = true; + curPLTpred[curidx] = true; + for (int comp = compBegin; comp < (compBegin + numComp); comp++) + { + curPLTtmp[comp][reusePLTSizetmp] = cs.prevPLT.curPLT[comp][predidx]; + } + reusePLTSizetmp++; + pltSizetmp++; + } + } + cu.reusePLTSize[compBegin] = reusePLTSizetmp; + for (int curidx = 0; curidx < cu.curPLTSize[compBegin]; curidx++) + { + if (!curPLTpred[curidx]) + { + for (int comp = compBegin; comp < (compBegin + numComp); comp++) + { + curPLTtmp[comp][pltSizetmp] = cu.curPLT[comp][curidx]; + } + pltSizetmp++; + } + } + assert(pltSizetmp == cu.curPLTSize[compBegin]); + for (int curidx = 0; curidx < cu.curPLTSize[compBegin]; curidx++) + { + for (int comp = compBegin; comp < (compBegin + numComp); comp++) + { + cu.curPLT[comp][curidx] = curPLTtmp[comp][curidx]; + } + } +} //! \} diff --git a/source/Lib/CommonLib/IntraPrediction.h b/source/Lib/CommonLib/IntraPrediction.h index 22a2af108d4bcd2c6549a2758e2d4b08322a3c69..d8a8f4a232ffe07dcb85d3512efa62d6b4c85f93 100644 --- a/source/Lib/CommonLib/IntraPrediction.h +++ b/source/Lib/CommonLib/IntraPrediction.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -44,6 +44,7 @@ #include "Buffer.h" #include "Picture.h" +#include "MatrixIntraPrediction.h" //! \ingroup CommonLib //! \{ @@ -64,55 +65,80 @@ static const uint32_t MAX_INTRA_FILTER_DEPTHS=8; class IntraPrediction { -private: +protected: + Pel m_refBuffer[MAX_NUM_COMPONENT][NUM_PRED_BUF][(MAX_CU_SIZE * 2 + 1 + MAX_REF_LINE_IDX) * 2]; + uint32_t m_refBufferStride[MAX_NUM_COMPONENT]; - Pel* m_piYuvExt[MAX_NUM_COMPONENT][NUM_PRED_BUF]; - int m_iYuvExtSize; +private: Pel* m_yuvExt2[MAX_NUM_COMPONENT][4]; int m_yuvExtSize2; - static const uint8_t m_aucIntraFilter[MAX_NUM_CHANNEL_TYPE][MAX_INTRA_FILTER_DEPTHS]; + static const uint8_t m_aucIntraFilter[MAX_INTRA_FILTER_DEPTHS]; + + struct IntraPredParam //parameters of Intra Prediction + { + bool refFilterFlag; + bool applyPDPC; + bool isModeVer; + int multiRefIndex; + int intraPredAngle; + int invAngle; + bool interpolationFlag; + int angularScale; + + // clang-format off + IntraPredParam() + : refFilterFlag(false) + , applyPDPC(false) + , isModeVer(false) + , multiRefIndex(-1) + , intraPredAngle(std::numeric_limits<int>::max()) + , invAngle(std::numeric_limits<int>::max()) + , interpolationFlag(false) + , angularScale(-1) + // clang-format on + { + } + }; + + IntraPredParam m_ipaParam; Pel* m_piTemp; Pel* m_pMdlmTemp; // for MDLM mode -protected: + MatrixIntraPrediction m_matrixIntraPred; + + +protected: ChromaFormat m_currChromaFormat; int m_topRefLength; int m_leftRefLength; + ScanElement* m_scanOrder; + bool m_bestScanRotationMode; // prediction - void xPredIntraPlanar ( const CPelBuf &pSrc, PelBuf &pDst, const SPS& sps ); - void xPredIntraDc ( const CPelBuf &pSrc, PelBuf &pDst, const ChannelType channelType, const bool enableBoundaryFilter = true ); -#if HEVC_USE_HOR_VER_PREDFILTERING - void xPredIntraAng ( const CPelBuf &pSrc, PelBuf &pDst, const ChannelType channelType, const uint32_t dirMode, const ClpRng& clpRng, const bool bEnableEdgeFilters, const SPS& sps - , int multiRefIdx - , const bool enableBoundaryFilter = true ); -#else - void xPredIntraAng ( const CPelBuf &pSrc, PelBuf &pDst, const ChannelType channelType, const uint32_t dirMode, const ClpRng& clpRng, const SPS& sps, - int multiRefIdx, - const bool useFilteredPredSamples, - const bool useISP = false, - const Size cuSize = Size( 0, 0 ) ); -#endif + void xPredIntraPlanar ( const CPelBuf &pSrc, PelBuf &pDst ); + void xPredIntraDc ( const CPelBuf &pSrc, PelBuf &pDst, const ChannelType channelType, const bool enableBoundaryFilter = true ); + void xPredIntraAng ( const CPelBuf &pSrc, PelBuf &pDst, const ChannelType channelType, const ClpRng& clpRng); + + void initPredIntraParams ( const PredictionUnit & pu, const CompArea compArea, const SPS& sps ); + + static bool isIntegerSlope(const int absAng) { return (0 == (absAng & 0x1F)); } + + void xPredIntraBDPCM ( const CPelBuf &pSrc, PelBuf &pDst, const uint32_t dirMode, const ClpRng& clpRng ); Pel xGetPredValDc ( const CPelBuf &pSrc, const Size &dstSize ); void xFillReferenceSamples ( const CPelBuf &recoBuf, Pel* refBufUnfiltered, const CompArea &area, const CodingUnit &cu ); - void xFilterReferenceSamples ( const Pel* refBufUnfiltered, Pel* refBufFiltered, const CompArea &area, const SPS &sps - , int multiRefIdx + void xFilterReferenceSamples(const Pel *refBufUnfiltered, Pel *refBufFiltered, const CompArea &area, const SPS &sps, + int multiRefIdx ); -#if HEVC_USE_DC_PREDFILTERING - // dc filtering - void xDCPredFiltering ( const CPelBuf &pSrc, PelBuf &pDst, const ChannelType &channelType ); -#endif static int getWideAngle ( int width, int height, int predMode ); void setReferenceArrayLengths ( const CompArea &area ); void destroy (); - void xFilterGroup ( Pel* pMulDst[], int i, Pel const* const piSrc, int iRecStride, bool bAboveAvaillable, bool bLeftAvaillable); void xGetLMParameters(const PredictionUnit &pu, const ComponentID compID, const CompArea& chromaArea, int& a, int& b, int& iShift); public: IntraPrediction(); @@ -121,21 +147,28 @@ public: void init (ChromaFormat chromaFormatIDC, const unsigned bitDepthY); // Angular Intra - void predIntraAng ( const ComponentID compId, PelBuf &piPred, const PredictionUnit &pu, const bool useFilteredPredSamples ); - Pel* getPredictorPtr (const ComponentID compID, const bool bUseFilteredPredictions = false) { return m_piYuvExt[compID][bUseFilteredPredictions?PRED_BUF_FILTERED:PRED_BUF_UNFILTERED]; } + void predIntraAng ( const ComponentID compId, PelBuf &piPred, const PredictionUnit &pu); + Pel *getPredictorPtr(const ComponentID compId) + { + return m_refBuffer[compId][m_ipaParam.refFilterFlag ? PRED_BUF_FILTERED : PRED_BUF_UNFILTERED]; + } + // Cross-component Chroma void predIntraChromaLM(const ComponentID compID, PelBuf &piPred, const PredictionUnit &pu, const CompArea& chromaArea, int intraDir); void xGetLumaRecPixels(const PredictionUnit &pu, CompArea chromaArea); /// set parameters from CU data for accessing intra data - void initIntraPatternChType (const CodingUnit &cu, const CompArea &area, const bool bFilterRefSamples = false ); + void initIntraPatternChType (const CodingUnit &cu, const CompArea &area, const bool forceRefFilterFlag = false); // use forceRefFilterFlag to get both filtered and unfiltered buffers + void initIntraPatternChTypeISP (const CodingUnit& cu, const CompArea& area, PelBuf& piReco, const bool forceRefFilterFlag = false); // use forceRefFilterFlag to get both filtered and unfiltered buffers -static bool useFilteredIntraRefSamples( const ComponentID &compID, const PredictionUnit &pu, bool modeSpecific, const UnitArea &tuArea ); - static bool useDPCMForFirstPassIntraEstimation(const PredictionUnit &pu, const uint32_t &uiDirMode); + // Matrix-based intra prediction + void initIntraMip (const PredictionUnit &pu, const CompArea &area); + void predIntraMip (const ComponentID compId, PelBuf &piPred, const PredictionUnit &pu); void geneWeightedPred (const ComponentID compId, PelBuf &pred, const PredictionUnit &pu, Pel *srcBuf); Pel* getPredictorPtr2 (const ComponentID compID, uint32_t idx) { return m_yuvExt2[compID][idx]; } void switchBuffer (const PredictionUnit &pu, ComponentID compID, PelBuf srcBuff, Pel *dst); void geneIntrainterPred (const CodingUnit &cu); + void reorderPLT (CodingStructure& cs, Partitioner& partitioner, ComponentID compBegin, uint32_t numComp); }; //! \} diff --git a/source/Lib/CommonLib/LoopFilter.cpp b/source/Lib/CommonLib/LoopFilter.cpp index 2cd1e89ce9fb82af3f2a73b8290912f958be06a9..f2ca851eae904fbfd67aee1d9ff870f6a0b0566f 100644 --- a/source/Lib/CommonLib/LoopFilter.cpp +++ b/source/Lib/CommonLib/LoopFilter.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -63,12 +63,10 @@ // Tables // ==================================================================================================================== -const uint8_t LoopFilter::sm_tcTable[MAX_QP + 1 + DEFAULT_INTRA_TC_OFFSET] = +const uint16_t LoopFilter::sm_tcTable[MAX_QP + 1 + DEFAULT_INTRA_TC_OFFSET] = { - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,5,5,6,6,7,8,9,10,11,13,14,16,18,20,22,25 - , 28, 31, 35, 39, 44, 50, 56, 63, 70, 79, 88, 99 + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,4,4,4,4,5,5,5,5,7,7,8,9,10,10,11,13,14,15,17,19,21,24,25,29,33,36,41,45,51,57,64,71,80,89,100,112,125,141,157,177,198,222,250,280,314,352,395 }; - const uint8_t LoopFilter::sm_betaTable[MAX_QP + 1] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,7,8,9,10,11,12,13,14,15,16,17,18,20,22,24,26,28,30,32,34,36,38,40,42,44,46,48,50,52,54,56,58,60,62,64 @@ -84,7 +82,6 @@ inline static uint32_t getRasterIdx(const Position& pos, const PreCalcValues& pc // utility functions // ==================================================================================================================== -#if HEVC_TILES_WPP static bool isAvailableLeft( const CodingUnit& cu, const CodingUnit& cu2, const bool bEnforceSliceRestriction, const bool bEnforceTileRestriction ) { return ( ( !bEnforceSliceRestriction || CU::isSameSlice( cu, cu2 ) ) && ( !bEnforceTileRestriction || CU::isSameTile( cu, cu2 ) ) ); @@ -94,12 +91,6 @@ static bool isAvailableAbove( const CodingUnit& cu, const CodingUnit& cu2, const { return ( !bEnforceSliceRestriction || CU::isSameSlice( cu, cu2 ) ) && ( !bEnforceTileRestriction || CU::isSameTile( cu, cu2 ) ); } -#else -static bool isAvailable( const CodingUnit& cu, const CodingUnit& cu2, const bool bEnforceSliceRestriction ) -{ - return ( !bEnforceSliceRestriction || CU::isSameSlice( cu, cu2 ) ); -} -#endif // ==================================================================================================================== @@ -155,6 +146,8 @@ void LoopFilter::loopFilterPic( CodingStructure& cs ) { const PreCalcValues& pcv = *cs.pcv; + m_shiftHor = ::getComponentScaleX( COMPONENT_Cb, cs.pcv->chrFormat ); + m_shiftVer = ::getComponentScaleY( COMPONENT_Cb, cs.pcv->chrFormat ); DTRACE_UPDATE( g_trace_ctx, ( std::make_pair( "poc", cs.slice->getPOC() ) ) ); #if ENABLE_TRACING @@ -175,8 +168,15 @@ void LoopFilter::loopFilterPic( CodingStructure& cs { memset( m_aapucBS [EDGE_VER].data(), 0, m_aapucBS [EDGE_VER].byte_size() ); memset( m_aapbEdgeFilter[EDGE_VER].data(), false, m_aapbEdgeFilter[EDGE_VER].byte_size() ); + memset( m_maxFilterLengthP, 0, sizeof(m_maxFilterLengthP) ); + memset( m_maxFilterLengthQ, 0, sizeof(m_maxFilterLengthQ) ); + memset( m_transformEdge, false, sizeof(m_transformEdge) ); + m_ctuXLumaSamples = x << pcv.maxCUWidthLog2; + m_ctuYLumaSamples = y << pcv.maxCUHeightLog2; const UnitArea ctuArea( pcv.chrFormat, Area( x << pcv.maxCUWidthLog2, y << pcv.maxCUHeightLog2, pcv.maxCUWidth, pcv.maxCUWidth ) ); + CodingUnit* firstCU = cs.getCU( ctuArea.lumaPos(), CH_L); + cs.slice = firstCU->slice; // CU-based deblocking for( auto &currCU : cs.traverseCUs( CS::getArea( cs, ctuArea, CH_L ), CH_L ) ) @@ -188,6 +188,9 @@ void LoopFilter::loopFilterPic( CodingStructure& cs { memset( m_aapucBS [EDGE_VER].data(), 0, m_aapucBS [EDGE_VER].byte_size() ); memset( m_aapbEdgeFilter[EDGE_VER].data(), false, m_aapbEdgeFilter[EDGE_VER].byte_size() ); + memset( m_maxFilterLengthP, 0, sizeof(m_maxFilterLengthP) ); + memset( m_maxFilterLengthQ, 0, sizeof(m_maxFilterLengthQ) ); + memset( m_transformEdge, false, sizeof(m_transformEdge) ); for( auto &currCU : cs.traverseCUs( CS::getArea( cs, ctuArea, CH_C ), CH_C ) ) { @@ -204,8 +207,15 @@ void LoopFilter::loopFilterPic( CodingStructure& cs { memset( m_aapucBS [EDGE_HOR].data(), 0, m_aapucBS [EDGE_HOR].byte_size() ); memset( m_aapbEdgeFilter[EDGE_HOR].data(), false, m_aapbEdgeFilter[EDGE_HOR].byte_size() ); + memset( m_maxFilterLengthP, 0, sizeof(m_maxFilterLengthP) ); + memset( m_maxFilterLengthQ, 0, sizeof(m_maxFilterLengthQ) ); + memset( m_transformEdge, false, sizeof(m_transformEdge) ); + m_ctuXLumaSamples = x << pcv.maxCUWidthLog2; + m_ctuYLumaSamples = y << pcv.maxCUHeightLog2; const UnitArea ctuArea( pcv.chrFormat, Area( x << pcv.maxCUWidthLog2, y << pcv.maxCUHeightLog2, pcv.maxCUWidth, pcv.maxCUWidth ) ); + CodingUnit* firstCU = cs.getCU( ctuArea.lumaPos(), CH_L); + cs.slice = firstCU->slice; // CU-based deblocking for( auto &currCU : cs.traverseCUs( CS::getArea( cs, ctuArea, CH_L ), CH_L ) ) @@ -217,6 +227,9 @@ void LoopFilter::loopFilterPic( CodingStructure& cs { memset( m_aapucBS [EDGE_HOR].data(), 0, m_aapucBS [EDGE_HOR].byte_size() ); memset( m_aapbEdgeFilter[EDGE_HOR].data(), false, m_aapbEdgeFilter[EDGE_HOR].byte_size() ); + memset( m_maxFilterLengthP, 0, sizeof(m_maxFilterLengthP) ); + memset( m_maxFilterLengthQ, 0, sizeof(m_maxFilterLengthQ) ); + memset( m_transformEdge, false, sizeof(m_transformEdge) ); for( auto &currCU : cs.traverseCUs( CS::getArea( cs, ctuArea, CH_C ), CH_C ) ) { @@ -250,23 +263,49 @@ void LoopFilter::xDeblockCU( CodingUnit& cu, const DeblockEdgeDir edgeDir ) const PreCalcValues& pcv = *cu.cs->pcv; const Area area = cu.Y().valid() ? cu.Y() : Area( recalcPosition( cu.chromaFormat, cu.chType, CHANNEL_TYPE_LUMA, cu.blocks[cu.chType].pos() ), recalcSize( cu.chromaFormat, cu.chType, CHANNEL_TYPE_LUMA, cu.blocks[cu.chType].size() ) ); + bool horEdgeFilter = false, verEdgeFilter = false; + int numHorVirBndry = 0, numVerVirBndry = 0; + int horVirBndryPos[] = { 0, 0, 0 }; + int verVirBndryPos[] = { 0, 0, 0 }; + + bool isCuCrossedByVirtualBoundaries = isCrossedByVirtualBoundaries( area.x, area.y, area.width, area.height, numHorVirBndry, numVerVirBndry, horVirBndryPos, verVirBndryPos, cu.cs->picHeader ); + xSetLoopfilterParam( cu ); - bool implicitTU = false; - for( auto &currTU : CU::traverseTUs( cu ) ) + static_vector<int, 2*MAX_CU_SIZE> edgeIdx; + edgeIdx.clear(); + + if (m_enc) { - const Area& areaTu = cu.Y().valid() ? currTU.block( COMPONENT_Y ) : area; - const bool xOff = currTU.blocks[cu.chType].x != cu.blocks[cu.chType].x; - const bool yOff = currTU.blocks[cu.chType].y != cu.blocks[cu.chType].y; - if ((yOff != 0) && (edgeDir == EDGE_HOR)) + m_shiftHor = ::getComponentScaleX(COMPONENT_Cb, cu.chromaFormat); + m_shiftVer = ::getComponentScaleY(COMPONENT_Cb, cu.chromaFormat); + int x, y; + if (cu.Y().valid()) { - implicitTU = true; + x = cu.block(COMPONENT_Y).x; + y = cu.block(COMPONENT_Y).y; } - if ((xOff != 0) && (edgeDir == EDGE_VER)) + else + { + x = cu.block(COMPONENT_Cb).x << m_shiftHor; + y = cu.block(COMPONENT_Cb).y << m_shiftVer; + } + m_ctuXLumaSamples = x & ~(cu.slice->getSPS()->getMaxCUWidth() - 1); + m_ctuYLumaSamples = y & ~(cu.slice->getSPS()->getMaxCUHeight() - 1); + } + + for( auto &currTU : CU::traverseTUs( cu ) ) + { + const Area& areaTu = cu.Y().valid() ? currTU.block( COMPONENT_Y ) : area; + verEdgeFilter = m_stLFCUParam.internalEdge; + horEdgeFilter = m_stLFCUParam.internalEdge; + if( isCuCrossedByVirtualBoundaries ) { - implicitTU = true; + xDeriveEdgefilterParam( areaTu.x, areaTu.y, numVerVirBndry, numHorVirBndry, verVirBndryPos, horVirBndryPos, verEdgeFilter, horEdgeFilter ); } - xSetEdgefilterMultiple( cu, EDGE_VER, areaTu, m_stLFCUParam.internalEdge ); - xSetEdgefilterMultiple( cu, EDGE_HOR, areaTu, m_stLFCUParam.internalEdge ); + xSetEdgefilterMultiple( cu, EDGE_VER, areaTu, verEdgeFilter ); + xSetEdgefilterMultiple( cu, EDGE_HOR, areaTu, horEdgeFilter ); + xSetMaxFilterLengthPQFromTransformSizes( edgeDir, cu, currTU ); + edgeIdx.push_back( ( edgeDir == EDGE_HOR ) ? ( currTU.blocks[cu.chType].y - cu.blocks[cu.chType].y ) / 4 : ( currTU.blocks[cu.chType].x - cu.blocks[cu.chType].x ) / 4 ); } bool mvSubBlocks = false; @@ -277,8 +316,16 @@ void LoopFilter::xDeblockCU( CodingUnit& cu, const DeblockEdgeDir edgeDir ) const bool xOff = currPU.blocks[cu.chType].x != cu.blocks[cu.chType].x; const bool yOff = currPU.blocks[cu.chType].y != cu.blocks[cu.chType].y; - xSetEdgefilterMultiple( cu, EDGE_VER, areaPu, (xOff ? m_stLFCUParam.internalEdge : m_stLFCUParam.leftEdge), xOff ); - xSetEdgefilterMultiple( cu, EDGE_HOR, areaPu, (yOff ? m_stLFCUParam.internalEdge : m_stLFCUParam.topEdge), yOff ); + verEdgeFilter = (xOff ? m_stLFCUParam.internalEdge : m_stLFCUParam.leftEdge); + horEdgeFilter = (yOff ? m_stLFCUParam.internalEdge : m_stLFCUParam.topEdge); + if( isCuCrossedByVirtualBoundaries ) + { + xDeriveEdgefilterParam( areaPu.x, areaPu.y, numVerVirBndry, numHorVirBndry, verVirBndryPos, horVirBndryPos, verEdgeFilter, horEdgeFilter ); + } + + xSetEdgefilterMultiple( cu, EDGE_VER, areaPu, verEdgeFilter, xOff ); + xSetEdgefilterMultiple( cu, EDGE_HOR, areaPu, horEdgeFilter, yOff ); + edgeIdx.push_back( ( edgeDir == EDGE_HOR ) ? ( currPU.blocks[cu.chType].y - cu.blocks[cu.chType].y ) / 4 : ( currPU.blocks[cu.chType].x - cu.blocks[cu.chType].x ) / 4 ); if ((currPU.mergeFlag && (currPU.mergeType == MRG_TYPE_SUBPU_ATMVP)) || cu.affine) { @@ -288,7 +335,14 @@ void LoopFilter::xDeblockCU( CodingUnit& cu, const DeblockEdgeDir edgeDir ) for (uint32_t off = subBlockSize; off < areaPu.height; off += subBlockSize) { const Area mvBlockH(cu.Y().x, cu.Y().y + off, cu.Y().width, pcv.minCUHeight); - xSetEdgefilterMultiple(cu, EDGE_HOR, mvBlockH, m_stLFCUParam.internalEdge, 1); + horEdgeFilter = m_stLFCUParam.internalEdge; + if( isCuCrossedByVirtualBoundaries ) + { + xDeriveEdgefilterParam( mvBlockH.x, mvBlockH.y, 0, numHorVirBndry, verVirBndryPos, horVirBndryPos, verEdgeFilter, horEdgeFilter ); + } + + xSetEdgefilterMultiple(cu, EDGE_HOR, mvBlockH, horEdgeFilter, 1); + edgeIdx.push_back( ( currPU.blocks[cu.chType].y + off - cu.blocks[cu.chType].y ) / 4 ); } } else @@ -296,34 +350,19 @@ void LoopFilter::xDeblockCU( CodingUnit& cu, const DeblockEdgeDir edgeDir ) for (uint32_t off = subBlockSize; off < areaPu.width; off += subBlockSize) { const Area mvBlockV(cu.Y().x + off, cu.Y().y, pcv.minCUWidth, cu.Y().height); - xSetEdgefilterMultiple(cu, EDGE_VER, mvBlockV, m_stLFCUParam.internalEdge, 1); + verEdgeFilter = m_stLFCUParam.internalEdge; + if( isCuCrossedByVirtualBoundaries ) + { + xDeriveEdgefilterParam( mvBlockV.x, mvBlockV.y, numVerVirBndry, 0, verVirBndryPos, horVirBndryPos, verEdgeFilter, horEdgeFilter ); + } + + xSetEdgefilterMultiple(cu, EDGE_VER, mvBlockV, verEdgeFilter, 1); + edgeIdx.push_back( ( currPU.blocks[cu.chType].x + off - cu.blocks[cu.chType].x ) / 4 ); } } } - } - if (cu.firstPU->mhIntraFlag) - { - const uint32_t dirMode = PU::getFinalIntraMode(*(cu.firstPU), cu.chType); - if (edgeDir == EDGE_VER && dirMode == HOR_IDX) - { - mvSubBlocks = true; - subBlockSize = std::max(8u, (area.width >> 2)); - for (uint32_t off = subBlockSize; off < area.width; off += subBlockSize) - { - const Area mvBlockV(cu.Y().x + off, cu.Y().y, pcv.minCUWidth, cu.Y().height); - xSetEdgefilterMultiple(cu, EDGE_VER, mvBlockV, m_stLFCUParam.internalEdge, 1); - } - } - else if (edgeDir == EDGE_HOR && dirMode == VER_IDX) - { - mvSubBlocks = true; - subBlockSize = std::max(8u, (area.height >> 2)); - for (uint32_t off = subBlockSize; off < area.height; off += subBlockSize) - { - const Area mvBlockH(cu.Y().x, cu.Y().y + off, cu.Y().width, pcv.minCUHeight); - xSetEdgefilterMultiple(cu, EDGE_HOR, mvBlockH, m_stLFCUParam.internalEdge, 1); - } - } + + xSetMaxFilterLengthPQForCodingSubBlocks( edgeDir, cu, currPU, mvSubBlocks, subBlockSize, areaPu ); } const unsigned uiPelsInPart = pcv.minCUWidth; @@ -343,124 +382,248 @@ void LoopFilter::xDeblockCU( CodingUnit& cu, const DeblockEdgeDir edgeDir ) } } - if (edgeDir == EDGE_HOR) - { - if (!((cu.block(COMPONENT_Y).y % 8) == 0)) - return; - } - else - { - if (!((cu.block(COMPONENT_Y).x % 8) == 0)) - return; - } - unsigned int orthogonalLength = 1; - unsigned int orthogonalIncrement = 1; -#if FIX_DB_MAX_TRANSFORM_SIZE -#if MAX_TB_SIZE_SIGNALLING - const int maxTsize = cu.slice->getSPS()->getMaxTbSize(); -#else - const int maxTsize = MAX_TB_SIZEY; -#endif -#endif -#if FIX_DB_MAX_TRANSFORM_SIZE - int maxFilterLengthQ = 7; - int maxFilterLengthP = 7; - if (implicitTU && maxTsize < 32) - { - maxFilterLengthQ = 3; - maxFilterLengthP = 3; - } -#else - int maxFilterLength = 7; -#endif - if (cu.blocks[COMPONENT_Y].valid()) + std::sort( edgeIdx.begin(), edgeIdx.end() ); + int prevEdgeIdx = -1; + for ( const int& edge : edgeIdx ) { - if (mvSubBlocks) + if ( edge == prevEdgeIdx ) // skip duplicate edgeIdx marked by both transform and coding subblock processes { -#if FIX_DB_MAX_TRANSFORM_SIZE - maxFilterLengthQ = std::min(maxFilterLengthQ, 5); -#else - maxFilterLength = 5; -#endif - orthogonalIncrement = subBlockSize / 4; - orthogonalLength = (edgeDir == EDGE_HOR) ? cu.blocks[COMPONENT_Y].height / 4 : cu.blocks[COMPONENT_Y].width / 4; + continue; } -#if FIX_DB_MAX_TRANSFORM_SIZE - if ((cu.blocks[COMPONENT_Y].height > maxTsize) && (edgeDir == EDGE_HOR) && !mvSubBlocks) + prevEdgeIdx = edge; + + if ( cu.blocks[COMPONENT_Y].valid() ) { - orthogonalIncrement = maxTsize / 4; - orthogonalLength = cu.blocks[COMPONENT_Y].height / 4; + xEdgeFilterLuma( cu, edgeDir, edge ); } - if ((cu.blocks[COMPONENT_Y].width > maxTsize) && (edgeDir == EDGE_VER) && !mvSubBlocks) + if ( cu.blocks[COMPONENT_Cb].valid() && pcv.chrFormat != CHROMA_400 ) { - orthogonalIncrement = maxTsize / 4; - orthogonalLength = cu.blocks[COMPONENT_Y].width / 4; + if ( !cu.ispMode || edge == 0 ) + { + xEdgeFilterChroma( cu, edgeDir, edge ); + } + } + } +} +inline bool LoopFilter::isCrossedByVirtualBoundaries(const int xPos, const int yPos, const int width, const int height, int& numHorVirBndry, int& numVerVirBndry, int horVirBndryPos[], int verVirBndryPos[], const PicHeader* picHeader ) +{ + numHorVirBndry = 0; numVerVirBndry = 0; + if (picHeader->getLoopFilterAcrossVirtualBoundariesDisabledFlag()) + { + for (int i = 0; i < picHeader->getNumHorVirtualBoundaries(); i++) + { + if (yPos <= picHeader->getVirtualBoundariesPosY(i) && picHeader->getVirtualBoundariesPosY(i) < yPos + height) + { + horVirBndryPos[numHorVirBndry++] = picHeader->getVirtualBoundariesPosY(i); + } } -#else - if ((cu.blocks[COMPONENT_Y].height > 64) && (edgeDir == EDGE_HOR) && !mvSubBlocks) + for (int i = 0; i < picHeader->getNumVerVirtualBoundaries(); i++) { - orthogonalIncrement = 64 / 4; - orthogonalLength = cu.blocks[COMPONENT_Y].height / 4; + if (xPos <= picHeader->getVirtualBoundariesPosX(i) && picHeader->getVirtualBoundariesPosX(i) < xPos + width) + { + verVirBndryPos[numVerVirBndry++] = picHeader->getVirtualBoundariesPosX(i); + } } - if ((cu.blocks[COMPONENT_Y].width > 64) && (edgeDir == EDGE_VER) && !mvSubBlocks) + } + return numHorVirBndry > 0 || numVerVirBndry > 0; +} + +inline void LoopFilter::xDeriveEdgefilterParam( const int xPos, const int yPos, const int numVerVirBndry, const int numHorVirBndry, const int verVirBndryPos[], const int horVirBndryPos[], bool &verEdgeFilter, bool &horEdgeFilter ) +{ + for (int i = 0; i < numVerVirBndry; i++) + { + if (verVirBndryPos[i] == xPos) { - orthogonalIncrement = 64 / 4; - orthogonalLength = cu.blocks[COMPONENT_Y].width / 4; + verEdgeFilter = false; + break; + } + } + for (int i = 0; i < numHorVirBndry; i++) + { + if (horVirBndryPos[i] == yPos) + { + horEdgeFilter = false; + break; } -#endif } +} - for (int edge = 0; edge < orthogonalLength; edge += orthogonalIncrement) +void LoopFilter::xSetMaxFilterLengthPQFromTransformSizes( const DeblockEdgeDir edgeDir, const CodingUnit& cu, const TransformUnit& currTU ) +{ + const TransformUnit& tuQ = currTU; + + if ( edgeDir == EDGE_HOR ) { - if (cu.blocks[COMPONENT_Y].valid()) + for ( int cIdx = 0; cIdx < MAX_NUM_COMPONENT; cIdx++ ) // per component { - if (edge == 0) - { -#if FIX_DB_MAX_TRANSFORM_SIZE - xEdgeFilterLuma(cu, edgeDir, edge, maxFilterLengthP, maxFilterLengthQ); -#else - xEdgeFilterLuma(cu, edgeDir, edge, 7, maxFilterLength); -#endif - } - else + const ComponentID comp = ComponentID(cIdx); + const ChannelType ch = toChannelType(comp); + const int shiftHor = ( ( ch == CH_L ) ? 0 : m_shiftHor ); + const int shiftVer = ( ( ch == CH_L ) ? 0 : m_shiftVer ); + const int ctuXOff = currTU.block(comp).x - ( m_ctuXLumaSamples >> shiftHor ); // x offset from left edge of CTU in respective channel sample units + const int ctuYOff = currTU.block(comp).y - ( m_ctuYLumaSamples >> shiftVer ); // y offset from top edge of CTU in respective channel sample units + const int minCUWidth = cu.cs->pcv->minCUWidth >> shiftHor; + if ( currTU.block(comp).valid() && ( ( currTU.block(comp).y == cu.block(comp).y ) ? m_stLFCUParam.topEdge : m_stLFCUParam.internalEdge ) ) // Edge deblocking needs to be recomputed since ISP contains whole CU chroma transforms in last TU of the CU { -#if FIX_DB_MAX_TRANSFORM_SIZE - if (implicitTU && ((edge % (maxTsize / 4)) == 0)) -#else - if ( implicitTU && (edge == (64 / 4)) ) -#endif + for ( int x = 0; x < currTU.blocks[cIdx].width; x += minCUWidth ) { -#if FIX_DB_MAX_TRANSFORM_SIZE - xEdgeFilterLuma(cu, edgeDir, edge, maxFilterLengthQ, maxFilterLengthQ); -#else - xEdgeFilterLuma(cu, edgeDir, edge, maxFilterLength, maxFilterLength); -#endif + const Position posQ = Position( currTU.blocks[ch].x + x, currTU.blocks[ch].y ); + const Position posP = posQ.offset( 0, -1 ); + const int sizeQSide = tuQ.block(comp).height; + const TransformUnit& tuP = *cu.cs->getTU( posP, ch ); + const int sizePSide = tuP.block(comp).height; + m_transformEdge[cIdx][ctuXOff+x][ctuYOff] = true; + + if ( comp == COMPONENT_Y ) + { + bool smallBlock = (sizePSide <= 4) || (sizeQSide <= 4); + if (smallBlock) + { + m_maxFilterLengthQ[cIdx][ctuXOff + x][ctuYOff] = 1; + m_maxFilterLengthP[cIdx][ctuXOff + x][ctuYOff] = 1; + } + else + { + m_maxFilterLengthQ[cIdx][ctuXOff + x][ctuYOff] = (sizeQSide >= 32) ? 7 : 3; + m_maxFilterLengthP[cIdx][ctuXOff + x][ctuYOff] = (sizePSide >= 32) ? 7 : 3; + } + } + else + { + m_maxFilterLengthQ[cIdx][ctuXOff+x][ctuYOff] = ( sizeQSide >= 8 && sizePSide >= 8 ) ? 3 : 1; + m_maxFilterLengthP[cIdx][ctuXOff+x][ctuYOff] = ( sizeQSide >= 8 && sizePSide >= 8 ) ? 3 : 1; + } } -#if FIX_DB_MAX_TRANSFORM_SIZE - else if ((edge == 2 || edge == (orthogonalLength - 2)) || (implicitTU && (((edge - 2) % ((maxTsize) / 4) == 0) || ((edge + 2) % ((maxTsize) / 4) == 0)))) -#else - else if ( (edge == 2 || edge == (orthogonalLength - 2)) || (implicitTU && (edge == (56 / 4) || edge == (72 / 4))) ) -#endif + } + } + } + if ( edgeDir == EDGE_VER ) + { + for ( int cIdx = 0; cIdx < MAX_NUM_COMPONENT; cIdx++ ) // per component + { + const ComponentID comp = ComponentID(cIdx); + const ChannelType ch = toChannelType(comp); + const int shiftHor = ( ( ch == CH_L ) ? 0 : m_shiftHor ); + const int shiftVer = ( ( ch == CH_L ) ? 0 : m_shiftVer ); + const int ctuXOff = currTU.block(comp).x - ( m_ctuXLumaSamples >> shiftHor ); // x offset from left edge of CTU in respective channel sample units + const int ctuYOff = currTU.block(comp).y - ( m_ctuYLumaSamples >> shiftVer ); // y offset from top edge of CTU in respective channel sample units + const int minCUHeight = cu.cs->pcv->minCUHeight >> shiftVer; + if ( currTU.block(comp).valid() && ( ( currTU.block(comp).x == cu.block(comp).x ) ? m_stLFCUParam.leftEdge : m_stLFCUParam.internalEdge ) ) // Edge deblocking needs to be recomputed since ISP contains whole CU chroma transforms in last TU of the CU + { + for ( int y = 0; y < currTU.blocks[cIdx].height; y += minCUHeight ) { - xEdgeFilterLuma(cu, edgeDir, edge, 2, 2); + const Position posQ = Position( currTU.blocks[ch].x, currTU.blocks[ch].y + y ); + const Position posP = posQ.offset( -1, 0 ); + const int sizeQSide = tuQ.block(comp).width; + const TransformUnit& tuP = *cu.cs->getTU( posP, ch ); + const int sizePSide = tuP.block(comp).width; + m_transformEdge[cIdx][ctuXOff][ctuYOff+y] = true; + + if ( comp == COMPONENT_Y ) + { + bool smallBlock = (sizePSide <= 4) || (sizeQSide <= 4); + if (smallBlock) + { + m_maxFilterLengthQ[cIdx][ctuXOff][ctuYOff + y] = 1; + m_maxFilterLengthP[cIdx][ctuXOff][ctuYOff + y] = 1; + } + else + { + m_maxFilterLengthQ[cIdx][ctuXOff][ctuYOff + y] = (sizeQSide >= 32) ? 7 : 3; + m_maxFilterLengthP[cIdx][ctuXOff][ctuYOff + y] = (sizePSide >= 32) ? 7 : 3; + } + } + else + { + m_maxFilterLengthQ[cIdx][ctuXOff][ctuYOff+y] = ( sizeQSide >= 8 && sizePSide >= 8 ) ? 3 : 1; + m_maxFilterLengthP[cIdx][ctuXOff][ctuYOff+y] = ( sizeQSide >= 8 && sizePSide >= 8 ) ? 3 : 1; + } } - else + } + } + } +} + +void LoopFilter::xSetMaxFilterLengthPQForCodingSubBlocks( const DeblockEdgeDir edgeDir, const CodingUnit& cu, const PredictionUnit& currPU, const bool& mvSubBlocks, const int& subBlockSize, const Area& areaPu ) +{ + if ( mvSubBlocks && currPU.Y().valid() ) + { + const int cIdx = 0; + const ComponentID comp = ComponentID(cIdx); + const int ctuYOff = currPU.block(comp).y - m_ctuYLumaSamples; // y offset from top edge of CTU in luma samples + const int ctuXOff = currPU.block(comp).x - m_ctuXLumaSamples; // x offset from left edge of CTU in luma samples + const int minCUWidth = cu.cs->pcv->minCUWidth; + const int minCUHeight = cu.cs->pcv->minCUHeight; + if ( edgeDir == EDGE_HOR ) + { + for ( int y = 0; y < areaPu.height; y += subBlockSize ) + { + for ( int x = 0; x < areaPu.width; x += minCUWidth ) { - xEdgeFilterLuma(cu, edgeDir, edge, 3, 3); + if ( m_transformEdge[cIdx][ctuXOff+x][ctuYOff+y] ) + { + m_maxFilterLengthQ[cIdx][ctuXOff+x][ctuYOff+y] = std::min<int>(m_maxFilterLengthQ[cIdx][ctuXOff+x][ctuYOff+y], 5); + if ( y > 0 ) + { + m_maxFilterLengthP[cIdx][ctuXOff+x][ctuYOff+y] = std::min<int>(m_maxFilterLengthP[cIdx][ctuXOff+x][ctuYOff+y], 5); + } + } + else if (y > 0 && (m_transformEdge[cIdx][ctuXOff + x][ctuYOff + y - 4] || ((y + 4) >= areaPu.height) || m_transformEdge[cIdx][ctuXOff + x][ctuYOff + y + 4])) // adjacent to transform edge +/- 4 + { + m_maxFilterLengthQ[cIdx][ctuXOff + x][ctuYOff + y] = 1; + m_maxFilterLengthP[cIdx][ctuXOff + x][ctuYOff + y] = 1; + } + else if (y > 0 && ( m_transformEdge[cIdx][ctuXOff+x][ctuYOff+y-8] || (( y + 8 ) >= areaPu.height) || m_transformEdge[cIdx][ctuXOff+x][ctuYOff+y+8] )) // adjacent to transform edge on 8x8 grid + { + m_maxFilterLengthQ[cIdx][ctuXOff+x][ctuYOff+y] = 2; + m_maxFilterLengthP[cIdx][ctuXOff+x][ctuYOff+y] = 2; + } + else + { + m_maxFilterLengthQ[cIdx][ctuXOff+x][ctuYOff+y] = 3; + m_maxFilterLengthP[cIdx][ctuXOff+x][ctuYOff+y] = 3; + } } } } - if (cu.blocks[COMPONENT_Cb].valid() && pcv.chrFormat != CHROMA_400) + else // edgeDir == EDGE_VER { - xEdgeFilterChroma(cu, edgeDir, edge); + for ( int x = 0; x < areaPu.width; x += subBlockSize ) + { + for ( int y = 0; y < areaPu.height; y += minCUHeight ) + { + if ( m_transformEdge[cIdx][ctuXOff+x][ctuYOff+y] ) + { + m_maxFilterLengthQ[cIdx][ctuXOff+x][ctuYOff+y] = std::min<int>(m_maxFilterLengthQ[cIdx][ctuXOff+x][ctuYOff+y], 5); + if ( x > 0 ) + { + m_maxFilterLengthP[cIdx][ctuXOff+x][ctuYOff+y] = std::min<int>(m_maxFilterLengthP[cIdx][ctuXOff+x][ctuYOff+y], 5); + } + } + else if (x > 0 && (m_transformEdge[cIdx][ctuXOff + x - 4][ctuYOff + y] || ((x + 4) >= areaPu.width) || m_transformEdge[cIdx][ctuXOff + x + 4][ctuYOff + y])) // adjacent to transform edge +/- 4 + { + m_maxFilterLengthQ[cIdx][ctuXOff + x][ctuYOff + y] = 1; + m_maxFilterLengthP[cIdx][ctuXOff + x][ctuYOff + y] = 1; + } + else if ( x > 0 && ( m_transformEdge[cIdx][ctuXOff+x-8][ctuYOff+y] || ( (x + 8) >= areaPu.width ) || m_transformEdge[cIdx][ctuXOff+x+8][ctuYOff+y] ) ) // adjacent to transform edge on 8x8 grid + { + m_maxFilterLengthQ[cIdx][ctuXOff+x][ctuYOff+y] = 2; + m_maxFilterLengthP[cIdx][ctuXOff+x][ctuYOff+y] = 2; + } + else + { + m_maxFilterLengthQ[cIdx][ctuXOff+x][ctuYOff+y] = 3; + m_maxFilterLengthP[cIdx][ctuXOff+x][ctuYOff+y] = 3; + } + } + } } } } - void LoopFilter::xSetEdgefilterMultiple( const CodingUnit& cu, const DeblockEdgeDir edgeDir, const Area& area, @@ -476,9 +639,16 @@ void LoopFilter::xSetEdgefilterMultiple( const CodingUnit& cu, for( int ui = 0; ui < uiNumElem; ui++ ) { m_aapbEdgeFilter[edgeDir][uiBsIdx] = bValue; - if( ! EdgeIdx ) + if ( m_aapucBS[edgeDir][uiBsIdx] && bValue ) { - m_aapucBS[edgeDir][uiBsIdx] = bValue; + m_aapucBS[edgeDir][uiBsIdx] = 3; // both the TU and PU edge + } + else + { + if( ! EdgeIdx ) + { + m_aapucBS[edgeDir][uiBsIdx] = bValue; + } } uiBsIdx += uiAdd; } @@ -486,9 +656,7 @@ void LoopFilter::xSetEdgefilterMultiple( const CodingUnit& cu, void LoopFilter::xSetLoopfilterParam( const CodingUnit& cu ) { const Slice& slice = *cu.slice; -#if HEVC_TILES_WPP const PPS& pps = *cu.cs->pps; -#endif if( slice.getDeblockingFilterDisable() ) { @@ -499,14 +667,8 @@ void LoopFilter::xSetLoopfilterParam( const CodingUnit& cu ) const Position& pos = cu.blocks[cu.chType].pos(); m_stLFCUParam.internalEdge = true; -#if HEVC_TILES_WPP - m_stLFCUParam.leftEdge = ( 0 < pos.x ) && isAvailableLeft ( cu, *cu.cs->getCU( pos.offset( -1, 0 ), cu.chType ), !slice.getLFCrossSliceBoundaryFlag(), !pps.getLoopFilterAcrossTilesEnabledFlag() ); - m_stLFCUParam.topEdge = ( 0 < pos.y ) && isAvailableAbove( cu, *cu.cs->getCU( pos.offset( 0, -1 ), cu.chType ), !slice.getLFCrossSliceBoundaryFlag(), !pps.getLoopFilterAcrossTilesEnabledFlag() ); -#else - m_stLFCUParam.leftEdge = ( 0 < pos.x ) && isAvailable ( cu, *cu.cs->getCU( pos.offset( -1, 0 ), cu.chType ), !slice.getLFCrossSliceBoundaryFlag()); - m_stLFCUParam.topEdge = ( 0 < pos.y ) && isAvailable ( cu, *cu.cs->getCU( pos.offset( 0, -1 ), cu.chType ), !slice.getLFCrossSliceBoundaryFlag()); -#endif - m_stLFCUParam.internalEdge &= !cu.ispMode; + m_stLFCUParam.leftEdge = ( 0 < pos.x ) && isAvailableLeft ( cu, *cu.cs->getCU( pos.offset( -1, 0 ), cu.chType ), !pps.getLoopFilterAcrossSlicesEnabledFlag(), !pps.getLoopFilterAcrossTilesEnabledFlag() ); + m_stLFCUParam.topEdge = ( 0 < pos.y ) && isAvailableAbove( cu, *cu.cs->getCU( pos.offset( 0, -1 ), cu.chType ), !pps.getLoopFilterAcrossSlicesEnabledFlag(), !pps.getLoopFilterAcrossTilesEnabledFlag() ); } unsigned LoopFilter::xGetBoundaryStrengthSingle ( const CodingUnit& cu, const DeblockEdgeDir edgeDir, const Position& localPos ) const @@ -523,17 +685,20 @@ unsigned LoopFilter::xGetBoundaryStrengthSingle ( const CodingUnit& cu, const De const CodingUnit& cuQ = cu; const CodingUnit& cuP = *cu.cs->getCU( posP, cu.chType ); + //-- Set BS for Intra MB : BS = 4 or 3 if( ( MODE_INTRA == cuP.predMode ) || ( MODE_INTRA == cuQ.predMode ) ) { - return (BsSet(2, COMPONENT_Y) + BsSet(2, COMPONENT_Cb) + BsSet(2, COMPONENT_Cr)); + int bsY = (MODE_INTRA == cuP.predMode && cuP.bdpcmMode) && (MODE_INTRA == cuQ.predMode && cuQ.bdpcmMode) ? 0 : 2; + int bsC = (MODE_INTRA == cuP.predMode && cuP.bdpcmModeChroma) && (MODE_INTRA == cuQ.predMode && cuQ.bdpcmModeChroma) ? 0 : 2; + return (BsSet(bsY, COMPONENT_Y) + BsSet(bsC, COMPONENT_Cb) + BsSet(bsC, COMPONENT_Cr)); } const TransformUnit& tuQ = *cuQ.cs->getTU(posQ, cuQ.chType); - const TransformUnit& tuP = *cuP.cs->getTU(posP, cuP.chType); + const TransformUnit& tuP = *cuP.cs->getTU(posP, cuQ.chType); const PreCalcValues& pcv = *cu.cs->pcv; const unsigned rasterIdx = getRasterIdx( Position{ localPos.x, localPos.y }, pcv ); - if (m_aapucBS[edgeDir][rasterIdx] && (cuP.firstPU->mhIntraFlag || cuQ.firstPU->mhIntraFlag)) + if (m_aapucBS[edgeDir][rasterIdx] && (cuP.firstPU->ciipFlag || cuQ.firstPU->ciipFlag)) { return (BsSet(2, COMPONENT_Y) + BsSet(2, COMPONENT_Cb) + BsSet(2, COMPONENT_Cr)); } @@ -546,12 +711,12 @@ unsigned LoopFilter::xGetBoundaryStrengthSingle ( const CodingUnit& cu, const De tmpBs += BsSet(1, COMPONENT_Y); } // U - if (m_aapucBS[edgeDir][rasterIdx] && (TU::getCbf(tuQ, COMPONENT_Cb) || TU::getCbf(tuP, COMPONENT_Cb))) + if (m_aapucBS[edgeDir][rasterIdx] && (TU::getCbf(tuQ, COMPONENT_Cb) || TU::getCbf(tuP, COMPONENT_Cb) || tuQ.jointCbCr || tuP.jointCbCr)) { tmpBs += BsSet(1, COMPONENT_Cb); } // V - if (m_aapucBS[edgeDir][rasterIdx] && (TU::getCbf(tuQ, COMPONENT_Cr) || TU::getCbf(tuP, COMPONENT_Cr))) + if (m_aapucBS[edgeDir][rasterIdx] && (TU::getCbf(tuQ, COMPONENT_Cr) || TU::getCbf(tuP, COMPONENT_Cr) || tuQ.jointCbCr || tuP.jointCbCr)) { tmpBs += BsSet(1, COMPONENT_Cr); } @@ -559,7 +724,7 @@ unsigned LoopFilter::xGetBoundaryStrengthSingle ( const CodingUnit& cu, const De { return tmpBs; } - if ((cuP.firstPU->mhIntraFlag || cuQ.firstPU->mhIntraFlag)) + if ((cuP.firstPU->ciipFlag || cuQ.firstPU->ciipFlag)) { return 1; } @@ -570,6 +735,7 @@ unsigned LoopFilter::xGetBoundaryStrengthSingle ( const CodingUnit& cu, const De } // and now the pred + if ( m_aapucBS[edgeDir][rasterIdx] != 0 && m_aapucBS[edgeDir][rasterIdx] != 3 ) return tmpBs; const Position& lumaPosQ = Position{ localPos.x, localPos.y }; const Position lumaPosP = ( edgeDir == EDGE_VER ) ? lumaPosQ.offset( -1, 0 ) : lumaPosQ.offset( 0, -1 ); const MotionInfo& miQ = cuQ.cs->getMotionInfo( lumaPosQ ); @@ -589,7 +755,7 @@ unsigned LoopFilter::xGetBoundaryStrengthSingle ( const CodingUnit& cu, const De if( 0 <= miQ.refIdx[0] ) { mvQ0 = miQ.mv[0]; } if( 0 <= miQ.refIdx[1] ) { mvQ1 = miQ.mv[1]; } - int nThreshold = 1 << MV_FRACTIONAL_BITS_INTERNAL; + int nThreshold = (1 << MV_FRACTIONAL_BITS_INTERNAL) >> 1; unsigned uiBs = 0; //th can be optimized @@ -641,7 +807,7 @@ unsigned LoopFilter::xGetBoundaryStrengthSingle ( const CodingUnit& cu, const De Mv mvP0 = miP.mv[0]; Mv mvQ0 = miQ.mv[0]; - int nThreshold = 1 << MV_FRACTIONAL_BITS_INTERNAL; + int nThreshold = (1 << MV_FRACTIONAL_BITS_INTERNAL) >> 1; return ( ( abs( mvQ0.getHor() - mvP0.getHor() ) >= nThreshold ) || ( abs( mvQ0.getVer() - mvP0.getVer() ) >= nThreshold ) ) ? (tmpBs + 1) : tmpBs; } @@ -675,7 +841,7 @@ void LoopFilter::deriveLADFShift( const Pel* src, const int stride, int& shift, } #endif -void LoopFilter::xEdgeFilterLuma(const CodingUnit& cu, const DeblockEdgeDir edgeDir, const int iEdge, const int initialMaxFilterLengthP, const int initialMaxFilterLengthQ) +void LoopFilter::xEdgeFilterLuma( const CodingUnit& cu, const DeblockEdgeDir edgeDir, const int iEdge ) { const CompArea& lumaArea = cu.block(COMPONENT_Y); const PreCalcValues& pcv = *cu.cs->pcv; @@ -687,7 +853,7 @@ void LoopFilter::xEdgeFilterLuma(const CodingUnit& cu, const DeblockEdgeDir edge const PPS &pps = *(cu.cs->pps); const SPS &sps = *(cu.cs->sps); const Slice &slice = *(cu.slice); - const bool ppsTransquantBypassEnabledFlag = pps.getTransquantBypassEnabledFlag(); + const bool spsPaletteEnabledFlag = sps.getPLTMode(); const int bitDepthLuma = sps.getBitDepth(CHANNEL_TYPE_LUMA); const ClpRng& clpRng( cu.cs->slice->clpRng(COMPONENT_Y) ); @@ -697,7 +863,6 @@ void LoopFilter::xEdgeFilterLuma(const CodingUnit& cu, const DeblockEdgeDir edge unsigned uiBsAbsIdx = 0, uiBs = 0; int iOffset, iSrcStep; - bool bPCMFilter = (sps.getPCMEnabledFlag() && sps.getPCMFilterDisableFlag()) ? true : false; bool bPartPNoFilter = false; bool bPartQNoFilter = false; int betaOffsetDiv2 = slice.getDeblockingFilterBetaOffsetDiv2(); @@ -733,6 +898,15 @@ void LoopFilter::xEdgeFilterLuma(const CodingUnit& cu, const DeblockEdgeDir edge pos.x += xoffset; pos.y += yoffset; + // Deblock luma boundaries on 4x4 grid only + if (edgeDir == EDGE_HOR && (pos.y % 4) != 0) + { + continue; + } + if (edgeDir == EDGE_VER && (pos.x % 4) != 0) + { + continue; + } uiBsAbsIdx = getRasterIdx( pos, pcv ); uiBs = BsGet(m_aapucBS[edgeDir][uiBsAbsIdx], COMPONENT_Y); @@ -743,19 +917,19 @@ void LoopFilter::xEdgeFilterLuma(const CodingUnit& cu, const DeblockEdgeDir edge // Derive neighboring PU index if (edgeDir == EDGE_VER) { -#if HEVC_TILES_WPP - CHECK( !isAvailableLeft( cu, cuP, !slice.getLFCrossSliceBoundaryFlag(), !pps.getLoopFilterAcrossTilesEnabledFlag() ), "Neighbour not available" ); -#else - CHECK( !isAvailable( cu, cuP, !slice.getLFCrossSliceBoundaryFlag() ), "Neighbour not available" ); -#endif + if (!isAvailableLeft(cu, cuP, !pps.getLoopFilterAcrossSlicesEnabledFlag(), !pps.getLoopFilterAcrossTilesEnabledFlag())) + { + m_aapucBS[edgeDir][uiBsAbsIdx] = uiBs = 0; + continue; + } } else // (iDir == EDGE_HOR) { -#if HEVC_TILES_WPP - CHECK( !isAvailableAbove( cu, cuP, !slice.getLFCrossSliceBoundaryFlag(), !pps.getLoopFilterAcrossTilesEnabledFlag() ), "Neighbour not available" ); -#else - CHECK( !isAvailable( cu, cuP, !slice.getLFCrossSliceBoundaryFlag() ), "Neighbour not available" ); -#endif + if (!isAvailableAbove(cu, cuP, !pps.getLoopFilterAcrossSlicesEnabledFlag(), !pps.getLoopFilterAcrossTilesEnabledFlag())) + { + m_aapucBS[edgeDir][uiBsAbsIdx] = uiBs = 0; + continue; + } } iQP = (cuP.qp + cuQ.qp + 1) >> 1; @@ -771,23 +945,15 @@ void LoopFilter::xEdgeFilterLuma(const CodingUnit& cu, const DeblockEdgeDir edge bool sidePisLarge = false; bool sideQisLarge = false; - int maxFilterLengthP = initialMaxFilterLengthP; - int maxFilterLengthQ = initialMaxFilterLengthQ; + int maxFilterLengthP = m_maxFilterLengthP[COMPONENT_Y][pos.x-m_ctuXLumaSamples][pos.y-m_ctuYLumaSamples]; + int maxFilterLengthQ = m_maxFilterLengthQ[COMPONENT_Y][pos.x-m_ctuXLumaSamples][pos.y-m_ctuYLumaSamples]; if (maxFilterLengthP > 3) { - sidePisLarge = (edgeDir == EDGE_VER && cuP.block(COMPONENT_Y).width >= 32) - || (edgeDir == EDGE_HOR && cuP.block(COMPONENT_Y).height >= 32); - - if (sidePisLarge && maxFilterLengthP > 5) + sidePisLarge = true; + if ( maxFilterLengthP > 5 ) { // restrict filter length if sub-blocks are used (e.g affine or ATMVP) - bool ciipSubBlock = false; - if (cuP.firstPU->mhIntraFlag) - { - const uint32_t dirMode = PU::getFinalIntraMode(*(cuP.firstPU), cuP.chType); - ciipSubBlock = edgeDir == EDGE_HOR ? dirMode == VER_IDX : dirMode == HOR_IDX; - } - if (cuP.affine || ciipSubBlock) + if (cuP.affine) { maxFilterLengthP = std::min(maxFilterLengthP, 5); } @@ -795,8 +961,7 @@ void LoopFilter::xEdgeFilterLuma(const CodingUnit& cu, const DeblockEdgeDir edge } if (maxFilterLengthQ > 3) { - sideQisLarge = (edgeDir == EDGE_VER && cuQ.block(COMPONENT_Y).width >= 32) - || (edgeDir == EDGE_HOR && cuQ.block(COMPONENT_Y).height >= 32); + sideQisLarge = true; } if (edgeDir == EDGE_HOR && pos.y % slice.getSPS()->getCTUSize() == 0) @@ -806,7 +971,7 @@ void LoopFilter::xEdgeFilterLuma(const CodingUnit& cu, const DeblockEdgeDir edge const int iIndexTC = Clip3(0, MAX_QP + DEFAULT_INTRA_TC_OFFSET, int(iQP + DEFAULT_INTRA_TC_OFFSET*(uiBs - 1) + (tcOffsetDiv2 << 1))); const int iIndexB = Clip3(0, MAX_QP, iQP + (betaOffsetDiv2 << 1)); - const int iTc = sm_tcTable [iIndexTC] * iBitdepthScale; + const int iTc = bitDepthLuma < 10 ? ((sm_tcTable[iIndexTC] + 2) >> (10 - bitDepthLuma)) : ((sm_tcTable[iIndexTC]) << (bitDepthLuma - 10)); const int iBeta = sm_betaTable[iIndexB ] * iBitdepthScale; const int iSideThreshold = ( iBeta + ( iBeta >> 1 ) ) >> 3; const int iThrCut = iTc * 10; @@ -846,17 +1011,11 @@ void LoopFilter::xEdgeFilterLuma(const CodingUnit& cu, const DeblockEdgeDir edge int dL = d0L + d3L; bPartPNoFilter = bPartQNoFilter = false; - if (bPCMFilter) - { - // Check if each of PUs is I_PCM with LF disabling - bPartPNoFilter = cuP.ipcm; - bPartQNoFilter = cuQ.ipcm; - } - if (ppsTransquantBypassEnabledFlag) + if (spsPaletteEnabledFlag) { - // check if each of PUs is lossless coded - bPartPNoFilter = bPartPNoFilter || cuP.transQuantBypass; - bPartQNoFilter = bPartQNoFilter || cuQ.transQuantBypass; + // check if each of PUs is palette coded + bPartPNoFilter = bPartPNoFilter || CU::isPLT(cuP); + bPartQNoFilter = bPartQNoFilter || CU::isPLT(cuQ); } if (dL < iBeta) @@ -891,23 +1050,22 @@ void LoopFilter::xEdgeFilterLuma(const CodingUnit& cu, const DeblockEdgeDir edge const int d = d0 + d3; bPartPNoFilter = bPartQNoFilter = false; - if( bPCMFilter ) - { - // Check if each of PUs is I_PCM with LF disabling - bPartPNoFilter = cuP.ipcm; - bPartQNoFilter = cuQ.ipcm; - } - if( ppsTransquantBypassEnabledFlag ) + if( spsPaletteEnabledFlag) { - // check if each of PUs is lossless coded - bPartPNoFilter = bPartPNoFilter || cuP.transQuantBypass; - bPartQNoFilter = bPartQNoFilter || cuQ.transQuantBypass; + // check if each of PUs is palette coded + bPartPNoFilter = bPartPNoFilter || CU::isPLT(cuP); + bPartQNoFilter = bPartQNoFilter || CU::isPLT(cuQ); } if( d < iBeta ) { - const bool bFilterP = (dp < iSideThreshold); - const bool bFilterQ = (dq < iSideThreshold); + bool bFilterP = false; + bool bFilterQ = false; + if (maxFilterLengthP > 1 && maxFilterLengthQ > 1) + { + bFilterP = (dp < iSideThreshold); + bFilterQ = (dq < iSideThreshold); + } bool sw = false; if (maxFilterLengthP > 2 && maxFilterLengthQ > 2) { @@ -948,7 +1106,6 @@ void LoopFilter::xEdgeFilterChroma(const CodingUnit& cu, const DeblockEdgeDir ed int iOffset, iSrcStep; unsigned uiLoopLength; - bool bPCMFilter = (sps.getPCMEnabledFlag() && sps.getPCMFilterDisableFlag()) ? true : false; bool bPartPNoFilter = false; bool bPartQNoFilter = false; const int tcOffsetDiv2 = slice.getDeblockingFilterTcOffsetDiv2(); @@ -1018,49 +1175,38 @@ void LoopFilter::xEdgeFilterChroma(const CodingUnit& cu, const DeblockEdgeDir ed if (bS[0] > 0 || bS[1] > 0) { const CodingUnit& cuQ = cu; - const CodingUnit& cuP = *cu.cs->getCU( recalcPosition( cu.chromaFormat, CHANNEL_TYPE_LUMA, cu.chType, pos.offset( xoffset - uiNumPelsLuma, yoffset - uiNumPelsLuma ) ), cu.chType ); + CodingUnit& cuP1 = *cu.cs->getCU( recalcPosition( cu.chromaFormat, CHANNEL_TYPE_LUMA, cu.chType, pos.offset( xoffset - uiNumPelsLuma, yoffset - uiNumPelsLuma ) ), cu.chType ); + CodingUnit& cuP = *cu.cs->getCU( recalcPosition( cu.chromaFormat, CHANNEL_TYPE_LUMA, (cuP1.isSepTree() ? CHANNEL_TYPE_CHROMA : cu.chType), pos.offset( xoffset - uiNumPelsLuma, yoffset - uiNumPelsLuma ) ), (cuP1.isSepTree() ? CHANNEL_TYPE_CHROMA : cu.chType)); if (edgeDir == EDGE_VER) { -#if HEVC_TILES_WPP - CHECK(!isAvailableLeft(cu, cuP, !slice.getLFCrossSliceBoundaryFlag(), !pps.getLoopFilterAcrossTilesEnabledFlag()), "Neighbour not available"); -#else - CHECK(!isAvailable(cu, cuP, !slice.getLFCrossSliceBoundaryFlag()), "Neighbour not available"); -#endif + CHECK(!isAvailableLeft(cu, cuP, !pps.getLoopFilterAcrossSlicesEnabledFlag(), !pps.getLoopFilterAcrossTilesEnabledFlag()), "Neighbour not available"); } else // (iDir == EDGE_HOR) { -#if HEVC_TILES_WPP - CHECK(!isAvailableAbove(cu, cuP, !slice.getLFCrossSliceBoundaryFlag(), !pps.getLoopFilterAcrossTilesEnabledFlag()), "Neighbour not available"); -#else - CHECK(!isAvailable(cu, cuP, !slice.getLFCrossSliceBoundaryFlag()), "Neighbour not available"); -#endif + CHECK(!isAvailableAbove(cu, cuP, !pps.getLoopFilterAcrossSlicesEnabledFlag(), !pps.getLoopFilterAcrossTilesEnabledFlag()), "Neighbour not available"); } bPartPNoFilter = bPartQNoFilter = false; - if (bPCMFilter) + if ( sps.getPLTMode()) { - // Check if each of PUs is I_PCM with LF disabling - bPartPNoFilter = cuP.ipcm; - bPartQNoFilter = cuQ.ipcm; + // check if each of PUs is palette coded + bPartPNoFilter = bPartPNoFilter || CU::isPLT(cuP); + bPartQNoFilter = bPartQNoFilter || CU::isPLT(cuQ); } - if( pps.getTransquantBypassEnabledFlag() ) + + const int maxFilterLengthP = m_maxFilterLengthP[COMPONENT_Cb][(pos.x-m_ctuXLumaSamples)>>m_shiftHor][(pos.y-m_ctuYLumaSamples)>>m_shiftVer]; + const int maxFilterLengthQ = m_maxFilterLengthQ[COMPONENT_Cb][(pos.x-m_ctuXLumaSamples)>>m_shiftHor][(pos.y-m_ctuYLumaSamples)>>m_shiftVer]; + bool largeBoundary = false; + bool isChromaHorCTBBoundary = false; + if ( maxFilterLengthP >= 3 && maxFilterLengthQ >= 3 ) { - // check if each of PUs is lossless coded - bPartPNoFilter = bPartPNoFilter || cuP.transQuantBypass; - bPartQNoFilter = bPartQNoFilter || cuQ.transQuantBypass; + largeBoundary = true; } - const unsigned cuPWidth = cuP.block(COMPONENT_Cb).width; - const unsigned cuPHeight = cuP.block(COMPONENT_Cb).height; - const unsigned cuQWidth = cuQ.block(COMPONENT_Cb).width; - const unsigned cuQHeight = cuQ.block(COMPONENT_Cb).height; - - bool largeBoundary = ((edgeDir == EDGE_VER && cuPWidth >= 8 && cuQWidth >= 8) || (edgeDir == EDGE_HOR && cuPHeight >= 8 && cuQHeight >= 8)); - if (edgeDir == EDGE_HOR && pos.y % cuP.slice->getSPS()->getCTUSize() == 0) { - largeBoundary = false; + isChromaHorCTBBoundary = true; } for( int chromaIdx = 0; chromaIdx < 2; chromaIdx++ ) @@ -1068,53 +1214,54 @@ void LoopFilter::xEdgeFilterChroma(const CodingUnit& cu, const DeblockEdgeDir ed if ((bS[chromaIdx] == 2) || (largeBoundary && (bS[chromaIdx] == 1))) { const ClpRng& clpRng( cu.cs->slice->clpRng( ComponentID( chromaIdx + 1 )) ); - const int chromaQPOffset = pps.getQpOffset( ComponentID( chromaIdx + 1 ) ); Pel* piTmpSrcChroma = (chromaIdx == 0) ? piTmpSrcCb : piTmpSrcCr; - int iQP = ( ( cuP.qp + cuQ.qp + 1 ) >> 1 ) + chromaQPOffset; - if (iQP >= chromaQPMappingTableSize) - { - if( sps.getChromaFormatIdc() == CHROMA_420 ) - { - iQP -= 6; - } - else if( iQP > MAX_QP ) - { - iQP = MAX_QP; - } - } - else if( iQP >= 0 ) - { - iQP = getScaledChromaQP(iQP, sps.getChromaFormatIdc()); - } + int shiftHorP = cuP.Y().valid() ? 0 : ::getComponentScaleX(COMPONENT_Cb, cuP.firstPU->chromaFormat); + int shiftVerP = cuP.Y().valid() ? 0 : ::getComponentScaleY(COMPONENT_Cb, cuP.firstPU->chromaFormat); + int shiftHorQ = cuQ.Y().valid() ? 0 : ::getComponentScaleX(COMPONENT_Cb, cuQ.firstPU->chromaFormat); + int shiftVerQ = cuQ.Y().valid() ? 0 : ::getComponentScaleY(COMPONENT_Cb, cuQ.firstPU->chromaFormat); + const Position& posQ = Position{ pos.x >> shiftHorQ, pos.y >> shiftVerQ }; + const Position& posP1 = Position{ pos.x >> shiftHorP, pos.y >> shiftVerP }; + const Position posP = (edgeDir == EDGE_VER) ? posP1.offset(-1, 0) : posP1.offset(0, -1); - const int iIndexTC = Clip3<int>(0, MAX_QP + DEFAULT_INTRA_TC_OFFSET, iQP + DEFAULT_INTRA_TC_OFFSET * (bS[chromaIdx] - 1) + (tcOffsetDiv2 << 1)); - const int iTc = sm_tcTable[iIndexTC] * iBitdepthScale; + const TransformUnit& tuQ = *cuQ.cs->getTU(posQ, cuQ.chType); + const TransformUnit& tuP = *cuP.cs->getTU(posP, cuP.chType); + + const QpParam cQP(tuP, ComponentID(chromaIdx + 1)); + const QpParam cQQ(tuQ, ComponentID(chromaIdx + 1)); + const int qpBdOffset = tuP.cs->sps->getQpBDOffset(toChannelType(ComponentID(chromaIdx + 1))); + int baseQp_P = cQP.Qp(0) - qpBdOffset; + int baseQp_Q = cQQ.Qp(0) - qpBdOffset; + int iQP = ((baseQp_Q + baseQp_P + 1) >> 1); + + const int iIndexTC = Clip3<int>(0, MAX_QP + DEFAULT_INTRA_TC_OFFSET, iQP + DEFAULT_INTRA_TC_OFFSET * (bS[chromaIdx] - 1) + (tcOffsetDiv2 << 1)); + const int iTc = sps.getBitDepth(CHANNEL_TYPE_CHROMA) < 10 ? ((sm_tcTable[iIndexTC] + 2) >> (10 - sps.getBitDepth(CHANNEL_TYPE_CHROMA))) : ((sm_tcTable[iIndexTC]) << (sps.getBitDepth(CHANNEL_TYPE_CHROMA) - 10)); bool useLongFilter = false; if (largeBoundary) { const int indexB = Clip3<int>(0, MAX_QP, iQP + (betaOffsetDiv2 << 1)); const int beta = sm_betaTable[indexB] * iBitdepthScale; - const int dp0 = xCalcDP(piTmpSrcChroma + iSrcStep*(iIdx*uiLoopLength + 0), iOffset); + const int dp0 = xCalcDP(piTmpSrcChroma + iSrcStep*(iIdx*uiLoopLength + 0), iOffset, isChromaHorCTBBoundary); const int dq0 = xCalcDQ(piTmpSrcChroma + iSrcStep*(iIdx*uiLoopLength + 0), iOffset); - const int dp1 = xCalcDP(piTmpSrcChroma + iSrcStep*(iIdx*uiLoopLength + 1), iOffset); - const int dq1 = xCalcDQ(piTmpSrcChroma + iSrcStep*(iIdx*uiLoopLength + 1), iOffset); + const int subSamplingShift = ( edgeDir == EDGE_VER ) ? m_shiftVer : m_shiftHor; + const int dp3 = (subSamplingShift == 1) ? xCalcDP(piTmpSrcChroma + iSrcStep*(iIdx*uiLoopLength + 1), iOffset, isChromaHorCTBBoundary) : xCalcDP(piTmpSrcChroma + iSrcStep*(iIdx*uiLoopLength + 3), iOffset, isChromaHorCTBBoundary); + const int dq3 = ( subSamplingShift == 1 ) ? xCalcDQ(piTmpSrcChroma + iSrcStep*(iIdx*uiLoopLength + 1), iOffset) : xCalcDQ(piTmpSrcChroma + iSrcStep*(iIdx*uiLoopLength + 3), iOffset); const int d0 = dp0 + dq0; - const int d1 = dp1 + dq1; - const int d = d0 + d1; + const int d3 = dp3 + dq3; + const int d = d0 + d3; if (d < beta) { useLongFilter = true; - const bool sw = xUseStrongFiltering(piTmpSrcChroma + iSrcStep*(iIdx*uiLoopLength + 0), iOffset, 2 * d0, beta, iTc) - && xUseStrongFiltering(piTmpSrcChroma + iSrcStep*(iIdx*uiLoopLength + 1), iOffset, 2 * d1, beta, iTc); + const bool sw = xUseStrongFiltering(piTmpSrcChroma + iSrcStep*(iIdx*uiLoopLength + 0), iOffset, 2 * d0, beta, iTc, false, false, 7, 7, isChromaHorCTBBoundary) + && xUseStrongFiltering(piTmpSrcChroma + iSrcStep*(iIdx*uiLoopLength + ((subSamplingShift == 1) ? 1 : 3)), iOffset, 2 * d3, beta, iTc, false, false, 7, 7, isChromaHorCTBBoundary); for (unsigned step = 0; step < uiLoopLength; step++) { - xPelFilterChroma(piTmpSrcChroma + iSrcStep*(step + iIdx*uiLoopLength), iOffset, iTc, sw, bPartPNoFilter, bPartQNoFilter, clpRng, largeBoundary); + xPelFilterChroma(piTmpSrcChroma + iSrcStep*(step + iIdx*uiLoopLength), iOffset, iTc, sw, bPartPNoFilter, bPartQNoFilter, clpRng, largeBoundary, isChromaHorCTBBoundary); } } } @@ -1122,7 +1269,7 @@ void LoopFilter::xEdgeFilterChroma(const CodingUnit& cu, const DeblockEdgeDir ed { for (unsigned step = 0; step < uiLoopLength; step++) { - xPelFilterChroma(piTmpSrcChroma + iSrcStep*(step + iIdx*uiLoopLength), iOffset, iTc, false, bPartPNoFilter, bPartQNoFilter, clpRng, largeBoundary); + xPelFilterChroma(piTmpSrcChroma + iSrcStep*(step + iIdx*uiLoopLength), iOffset, iTc, false, bPartPNoFilter, bPartQNoFilter, clpRng, largeBoundary, isChromaHorCTBBoundary); } } } @@ -1342,7 +1489,7 @@ inline void LoopFilter::xPelFilterLuma(Pel* piSrc, const int iOffset, const int \param bPartQNoFilter indicator to disable filtering on partQ \param bitDepthChroma chroma bit depth */ -inline void LoopFilter::xPelFilterChroma( Pel* piSrc, const int iOffset, const int tc, const bool sw, const bool bPartPNoFilter, const bool bPartQNoFilter, const ClpRng& clpRng, const bool largeBoundary ) const +inline void LoopFilter::xPelFilterChroma(Pel* piSrc, const int iOffset, const int tc, const bool sw, const bool bPartPNoFilter, const bool bPartQNoFilter, const ClpRng& clpRng, const bool largeBoundary, const bool isChromaHorCTBBoundary) const { int delta; @@ -1357,12 +1504,22 @@ inline void LoopFilter::xPelFilterChroma( Pel* piSrc, const int iOffset, const i if (sw) { + if (isChromaHorCTBBoundary) + { + piSrc[-iOffset * 1] = Clip3(m3 - tc, m3 + tc, ((3 * m2 + 2 * m3 + m4 + m5 + m6 + 4) >> 3)); // p0 + piSrc[0] = Clip3(m4 - tc, m4 + tc, ((2 * m2 + m3 + 2 * m4 + m5 + m6 + m7 + 4) >> 3)); // q0 + piSrc[iOffset * 1] = Clip3(m5 - tc, m5 + tc, ((m2 + m3 + m4 + 2 * m5 + m6 + 2 * m7 + 4) >> 3)); // q1 + piSrc[iOffset * 2] = Clip3(m6 - tc, m6 + tc, ((m3 + m4 + m5 + 2 * m6 + 3 * m7 + 4) >> 3)); // q2 + } + else + { piSrc[-iOffset * 3] = Clip3(m1 - tc, m1 + tc, ((3 * m0 + 2 * m1 + m2 + m3 + m4 + 4) >> 3)); // p2 piSrc[-iOffset * 2] = Clip3(m2 - tc, m2 + tc, ((2 * m0 + m1 + 2 * m2 + m3 + m4 + m5 + 4) >> 3)); // p1 piSrc[-iOffset * 1] = Clip3(m3 - tc, m3 + tc, ((m0 + m1 + m2 + 2 * m3 + m4 + m5 + m6 + 4) >> 3)); // p0 - piSrc[0] = Clip3(m4 - tc, m4 + tc, ((m1 + m2 + m3 + 2 * m4 + m5 + m6 + m7 + 4) >> 3)); // q0 - piSrc[iOffset * 1] = Clip3(m5 - tc, m5 + tc, ((m2 + m3 + m4 + 2 * m5 + m6 + 2 * m7 + 4) >> 3)); // q1 - piSrc[iOffset * 2] = Clip3(m6 - tc, m6 + tc, ((m3 + m4 + m5 + 2 * m6 + 3 * m7 + 4) >> 3)); // q2 + piSrc[0] = Clip3(m4 - tc, m4 + tc, ((m1 + m2 + m3 + 2 * m4 + m5 + m6 + m7 + 4) >> 3)); // q0 + piSrc[iOffset * 1] = Clip3(m5 - tc, m5 + tc, ((m2 + m3 + m4 + 2 * m5 + m6 + 2 * m7 + 4) >> 3)); // q1 + piSrc[iOffset * 2] = Clip3(m6 - tc, m6 + tc, ((m3 + m4 + m5 + 2 * m6 + 3 * m7 + 4) >> 3)); // q2 + } } else { @@ -1401,13 +1558,18 @@ inline void LoopFilter::xPelFilterChroma( Pel* piSrc, const int iOffset, const i \param tc tc value \param piSrc pointer to picture data */ -inline bool LoopFilter::xUseStrongFiltering( Pel* piSrc, const int iOffset, const int d, const int beta, const int tc, bool sidePisLarge, bool sideQisLarge, int maxFilterLengthP, int maxFilterLengthQ ) const +inline bool LoopFilter::xUseStrongFiltering(Pel* piSrc, const int iOffset, const int d, const int beta, const int tc, bool sidePisLarge, bool sideQisLarge, int maxFilterLengthP, int maxFilterLengthQ, bool isChromaHorCTBBoundary) const { const Pel m4 = piSrc[ 0 ]; const Pel m3 = piSrc[-iOffset ]; const Pel m7 = piSrc[ iOffset * 3]; const Pel m0 = piSrc[-iOffset * 4]; - int sp3 = abs(m0 - m3); + const Pel m2 = piSrc[-iOffset * 2]; + int sp3 = abs(m0 - m3); + if (isChromaHorCTBBoundary) + { + sp3 = abs(m2 - m3); + } int sq3 = abs(m7 - m4); const int d_strong = sp3 + sq3; @@ -1415,6 +1577,41 @@ inline bool LoopFilter::xUseStrongFiltering( Pel* piSrc, const int iOffset, cons { Pel mP4; Pel m11; +#if JVET_Q0054 + if (sidePisLarge) + { + if (maxFilterLengthP == 7) + { + const Pel mP5 = piSrc[-iOffset * 5]; + const Pel mP6 = piSrc[-iOffset * 6]; + const Pel mP7 = piSrc[-iOffset * 7];; + mP4 = piSrc[-iOffset * 8]; + sp3 = sp3 + abs(mP5 - mP6 - mP7 + mP4); + } + else + { + mP4 = piSrc[-iOffset * 6]; + } + sp3 = (sp3 + abs(m0 - mP4) + 1) >> 1; + } + if (sideQisLarge) + { + if (maxFilterLengthQ == 7) + { + const Pel m8 = piSrc[iOffset * 4]; + const Pel m9 = piSrc[iOffset * 5]; + const Pel m10 = piSrc[iOffset * 6];; + m11 = piSrc[iOffset * 7]; + sq3 = sq3 + abs(m8 - m9 - m10 + m11); + } + else + { + m11 = piSrc[iOffset * 5]; + } + sq3 = (sq3 + abs(m11 - m7) + 1) >> 1; + } + return ((sp3 + sq3) < (beta*3 >> 5)) && (d < (beta >> 4)) && (abs(m3 - m4) < ((tc * 5 + 1) >> 1)); +#else if (maxFilterLengthP == 5) { mP4 = piSrc[-iOffset * 6]; @@ -1441,14 +1638,22 @@ inline bool LoopFilter::xUseStrongFiltering( Pel* piSrc, const int iOffset, cons sq3 = (sq3 + abs(m11 - m7) + 1) >> 1; } return ((sp3 + sq3) < (beta*3 >> 5)) && (d < (beta >> 2)) && (abs(m3 - m4) < ((tc * 5 + 1) >> 1)); +#endif } else return ( ( d_strong < ( beta >> 3 ) ) && ( d < ( beta >> 2 ) ) && ( abs( m3 - m4 ) < ( ( tc * 5 + 1 ) >> 1 ) ) ); } -inline int LoopFilter::xCalcDP( Pel* piSrc, const int iOffset ) const +inline int LoopFilter::xCalcDP(Pel* piSrc, const int iOffset, const bool isChromaHorCTBBoundary) const { - return abs( piSrc[-iOffset * 3] - 2 * piSrc[-iOffset * 2] + piSrc[-iOffset] ); + if (isChromaHorCTBBoundary) + { + return abs(piSrc[-iOffset * 2] - 2 * piSrc[-iOffset * 2] + piSrc[-iOffset]); + } + else + { + return abs(piSrc[-iOffset * 3] - 2 * piSrc[-iOffset * 2] + piSrc[-iOffset]); + } } inline int LoopFilter::xCalcDQ( Pel* piSrc, const int iOffset ) const diff --git a/source/Lib/CommonLib/LoopFilter.h b/source/Lib/CommonLib/LoopFilter.h index 6ff62b0ad50a49c1558287c041ff436440e2fcc4..b3c916544ef91e1089e8c7a614a0c4822eb2d74c 100644 --- a/source/Lib/CommonLib/LoopFilter.h +++ b/source/Lib/CommonLib/LoopFilter.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -58,6 +58,11 @@ private: static_vector<char, MAX_NUM_PARTS_IN_CTU> m_aapucBS [NUM_EDGE_DIR]; ///< Bs for [Ver/Hor][Y/U/V][Blk_Idx] static_vector<bool, MAX_NUM_PARTS_IN_CTU> m_aapbEdgeFilter[NUM_EDGE_DIR]; LFCUParam m_stLFCUParam; ///< status structure + int m_ctuXLumaSamples, m_ctuYLumaSamples; // location of left-edge and top-edge of CTU + int m_shiftHor, m_shiftVer; // shift values to convert location from luma sample units to chroma sample units + uint8_t m_maxFilterLengthP[MAX_NUM_COMPONENT][MAX_CU_SIZE][MAX_CU_SIZE]; // maxFilterLengthP for [component][luma/chroma sample distance from left edge of CTU][luma/chroma sample distance from top edge of CTU] + uint8_t m_maxFilterLengthQ[MAX_NUM_COMPONENT][MAX_CU_SIZE][MAX_CU_SIZE]; // maxFilterLengthQ for [component][luma/chroma sample distance from left edge of CTU][luma/chroma sample distance from top edge of CTU] + bool m_transformEdge[MAX_NUM_COMPONENT][MAX_CU_SIZE][MAX_CU_SIZE]; // transform edge flag for [component][luma/chroma sample distance from left edge of CTU][luma/chroma sample distance from top edge of CTU] PelStorage m_encPicYuvBuffer; bool m_enc; private: @@ -74,24 +79,29 @@ private: const Area& area, const bool bValue, const bool EdgeIdx = false ); - void xEdgeFilterLuma ( const CodingUnit& cu, const DeblockEdgeDir edgeDir, const int iEdge, const int initialMaxFilterLengthP, const int initialMaxFilterLengthQ ); + void xEdgeFilterLuma( const CodingUnit& cu, const DeblockEdgeDir edgeDir, const int iEdge ); void xEdgeFilterChroma(const CodingUnit& cu, const DeblockEdgeDir edgeDir, const int iEdge); #if LUMA_ADAPTIVE_DEBLOCKING_FILTER_QP_OFFSET void deriveLADFShift( const Pel* src, const int stride, int& shift, const DeblockEdgeDir edgeDir, const SPS sps ); #endif + void xSetMaxFilterLengthPQFromTransformSizes( const DeblockEdgeDir edgeDir, const CodingUnit& cu, const TransformUnit& currTU ); + void xSetMaxFilterLengthPQForCodingSubBlocks( const DeblockEdgeDir edgeDir, const CodingUnit& cu, const PredictionUnit& currPU, const bool& mvSubBlocks, const int& subBlockSize, const Area& areaPu ); inline void xBilinearFilter ( Pel* srcP, Pel* srcQ, int offset, int refMiddle, int refP, int refQ, int numberPSide, int numberQSide, const int* dbCoeffsP, const int* dbCoeffsQ, int tc ) const; inline void xFilteringPandQ ( Pel* src, int offset, int numberPSide, int numberQSide, int tc ) const; inline void xPelFilterLuma ( Pel* piSrc, const int iOffset, const int tc, const bool sw, const bool bPartPNoFilter, const bool bPartQNoFilter, const int iThrCut, const bool bFilterSecondP, const bool bFilterSecondQ, const ClpRng& clpRng, bool sidePisLarge = false, bool sideQisLarge = false, int maxFilterLengthP = 7, int maxFilterLengthQ = 7 ) const; - inline void xPelFilterChroma ( Pel* piSrc, const int iOffset, const int tc, const bool sw, const bool bPartPNoFilter, const bool bPartQNoFilter, const ClpRng& clpRng, const bool largeBoundary ) const; - inline bool xUseStrongFiltering ( Pel* piSrc, const int iOffset, const int d, const int beta, const int tc, bool sidePisLarge = false, bool sideQisLarge = false, int maxFilterLengthP = 7, int maxFilterLengthQ = 7 ) const;//move the computation outside the function + inline void xPelFilterChroma(Pel* piSrc, const int iOffset, const int tc, const bool sw, const bool bPartPNoFilter, const bool bPartQNoFilter, const ClpRng& clpRng, const bool largeBoundary, const bool isChromaHorCTBBoundary) const; + inline bool xUseStrongFiltering(Pel* piSrc, const int iOffset, const int d, const int beta, const int tc, bool sidePisLarge = false, bool sideQisLarge = false, int maxFilterLengthP = 7, int maxFilterLengthQ = 7, bool isChromaHorCTBBoundary = false) const;//move the computation outside the function inline unsigned BsSet(unsigned val, const ComponentID compIdx) const; inline unsigned BsGet(unsigned val, const ComponentID compIdx) const; - inline int xCalcDP ( Pel* piSrc, const int iOffset ) const; + inline bool isCrossedByVirtualBoundaries ( const int xPos, const int yPos, const int width, const int height, int& numHorVirBndry, int& numVerVirBndry, int horVirBndryPos[], int verVirBndryPos[], const PicHeader* picHeader ); + inline void xDeriveEdgefilterParam ( const int xPos, const int yPos, const int numVerVirBndry, const int numHorVirBndry, const int verVirBndryPos[], const int horVirBndryPos[], bool &verEdgeFilter, bool &horEdgeFilter ); + + inline int xCalcDP(Pel* piSrc, const int iOffset, const bool isChromaHorCTBBoundary = false) const; inline int xCalcDQ ( Pel* piSrc, const int iOffset ) const; - static const uint8_t sm_tcTable[MAX_QP + 3]; + static const uint16_t sm_tcTable[MAX_QP + 3]; static const uint8_t sm_betaTable[MAX_QP + 1]; public: diff --git a/source/Lib/CommonLib/MCTS.cpp b/source/Lib/CommonLib/MCTS.cpp index dd87fb11bdc1c50df48be97763be4e8ba6522ebb..6af615f59d214e42dd0af09f52e0bafc94eb3df1 100644 --- a/source/Lib/CommonLib/MCTS.cpp +++ b/source/Lib/CommonLib/MCTS.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -90,22 +90,20 @@ void MCTSHelper::clipMvToArea( Mv& rcMv, const Area& block, const Area& clipArea Area MCTSHelper::getTileArea( const CodingStructure* cs, const int ctuAddr ) { - const TileMap* tileMap = cs->picture->tileMap; - const int tileIdx = tileMap->getTileIdxMap( ctuAddr ); - const Tile& currentTile = tileMap->tiles[tileIdx]; - - const int frameWidthInCtus = cs->pcv->widthInCtus; - const int firstCtuRsAddrOfTile = currentTile.getFirstCtuRsAddr(); - - const int tileXPosInCtus = firstCtuRsAddrOfTile % frameWidthInCtus; - const int tileYPosInCtus = firstCtuRsAddrOfTile / frameWidthInCtus; - const int tileWidthtInCtus = currentTile.getTileWidthInCtus(); - const int tileHeightInCtus = currentTile.getTileHeightInCtus(); - + const PPS *pps = cs->pps; const int maxCUWidth = cs->pcv->maxCUWidth; const int maxCUHeight = cs->pcv->maxCUHeight; - const int tileLeftTopPelPosX = maxCUWidth * tileXPosInCtus; + const uint32_t tileIdx = pps->getTileIdx( (uint32_t)ctuAddr ); + const uint32_t tileX = tileIdx % pps->getNumTileColumns(); + const uint32_t tileY = tileIdx / pps->getNumTileColumns(); + + const int tileWidthtInCtus = pps->getTileColumnWidth( tileX ); + const int tileHeightInCtus = pps->getTileRowHeight ( tileY ); + const int tileXPosInCtus = pps->getTileColumnBd( tileX ); + const int tileYPosInCtus = pps->getTileRowBd( tileY ); + + const int tileLeftTopPelPosX = maxCUWidth * tileXPosInCtus; const int tileLeftTopPelPosY = maxCUHeight * tileYPosInCtus; const int tileRightBottomPelPosX = std::min<int>( ( ( tileWidthtInCtus + tileXPosInCtus ) * maxCUWidth ), (int)cs->picture->lwidth() ) - 1; const int tileRightBottomPelPosY = std::min<int>( ( ( tileHeightInCtus + tileYPosInCtus ) * maxCUHeight ), (int)cs->picture->lheight() ) - 1; diff --git a/source/Lib/CommonLib/MCTS.h b/source/Lib/CommonLib/MCTS.h index ed651655c3c02c7200bc4a4aa0c0594bd3edf457..3d9d3bf52e4207673645aabed5160ecc2fc41c5a 100644 --- a/source/Lib/CommonLib/MCTS.h +++ b/source/Lib/CommonLib/MCTS.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/source/Lib/CommonLib/MatrixIntraPrediction.cpp b/source/Lib/CommonLib/MatrixIntraPrediction.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f3376aa25da490108aa96b72541b597bebead49c --- /dev/null +++ b/source/Lib/CommonLib/MatrixIntraPrediction.cpp @@ -0,0 +1,340 @@ +/* The copyright in this software is being made available under the BSD +* License, included below. This software may be subject to other third party +* and contributor rights, including patent rights, and no such rights are +* granted under this license. +* +* Copyright (c) 2010-2020, ITU/ISO/IEC +* All rights reserved. +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are met: +* +* * Redistributions of source code must retain the above copyright notice, +* this list of conditions and the following disclaimer. +* * Redistributions in binary form must reproduce the above copyright notice, +* this list of conditions and the following disclaimer in the documentation +* and/or other materials provided with the distribution. +* * Neither the name of the ITU/ISO/IEC nor the names of its contributors may +* be used to endorse or promote products derived from this software without +* specific prior written permission. +* +* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS +* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +* THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/** \file MatrixIntraPrediction.cpp +\brief matrix-based intra prediction class +*/ + +#include "MatrixIntraPrediction.h" +#include "dtrace_next.h" + +#include "UnitTools.h" +#include "MipData.h" + + +MatrixIntraPrediction::MatrixIntraPrediction(): + m_reducedBoundary (MIP_MAX_INPUT_SIZE), + m_reducedBoundaryTransposed(MIP_MAX_INPUT_SIZE), + m_inputOffset ( 0 ), + m_inputOffsetTransp( 0 ), + m_refSamplesTop (MIP_MAX_WIDTH), + m_refSamplesLeft(MIP_MAX_HEIGHT), + m_blockSize( 0, 0 ), + m_sizeId( 0 ), + m_reducedBdrySize( 0 ), + m_reducedPredSize( 0 ), + m_upsmpFactorHor( 0 ), + m_upsmpFactorVer( 0 ) +{ +} + + +void MatrixIntraPrediction::prepareInputForPred(const CPelBuf &pSrc, const Area& block, const int bitDepth) +{ + // Step 1: Save block size and calculate dependent values + initPredBlockParams(block); + + // Step 2: Get the input data (left and top reference samples) + m_refSamplesTop.resize(block.width); + for (int x = 0; x < block.width; x++) + { + m_refSamplesTop[x] = pSrc.at(x + 1, 0); + } + + m_refSamplesLeft.resize(block.height); + for (int y = 0; y < block.height; y++) + { + m_refSamplesLeft[y] = pSrc.at(y + 1, 1); + } + + // Step 3: Compute the reduced boundary via Haar-downsampling (input for the prediction) + const int inputSize = 2 * m_reducedBdrySize; + m_reducedBoundary .resize( inputSize ); + m_reducedBoundaryTransposed.resize( inputSize ); + + int* const topReduced = m_reducedBoundary.data(); + boundaryDownsampling1D( topReduced, m_refSamplesTop.data(), block.width, m_reducedBdrySize ); + + int* const leftReduced = m_reducedBoundary.data() + m_reducedBdrySize; + boundaryDownsampling1D( leftReduced, m_refSamplesLeft.data(), block.height, m_reducedBdrySize ); + + int* const leftReducedTransposed = m_reducedBoundaryTransposed.data(); + int* const topReducedTransposed = m_reducedBoundaryTransposed.data() + m_reducedBdrySize; + for( int x = 0; x < m_reducedBdrySize; x++ ) + { + topReducedTransposed[x] = topReduced[x]; + } + for( int y = 0; y < m_reducedBdrySize; y++ ) + { + leftReducedTransposed[y] = leftReduced[y]; + } + + // Step 4: Rebase the reduced boundary + + m_inputOffset = m_reducedBoundary[0]; + m_inputOffsetTransp = m_reducedBoundaryTransposed[0]; + + const bool hasFirstCol = (m_sizeId < 2); + m_reducedBoundary [0] = hasFirstCol ? (m_inputOffset - (1 << (bitDepth - 1))) : 0; // first column of matrix not needed for large blocks + m_reducedBoundaryTransposed[0] = hasFirstCol ? (m_inputOffsetTransp - (1 << (bitDepth - 1))) : 0; + for (int i = 1; i < inputSize; i++) + { + m_reducedBoundary [i] -= m_inputOffset; + m_reducedBoundaryTransposed[i] -= m_inputOffsetTransp; + } +} + +void MatrixIntraPrediction::predBlock(int* const result, const int modeIdx, const bool transpose, const int bitDepth) +{ + const bool needUpsampling = ( m_upsmpFactorHor > 1 ) || ( m_upsmpFactorVer > 1 ); + + const uint8_t* matrix; + int shiftMatrix = 0, offsetMatrix = 0; + getMatrixData(matrix, shiftMatrix, offsetMatrix, modeIdx); + + static_vector<int, MIP_MAX_REDUCED_OUTPUT_SAMPLES> bufReducedPred( m_reducedPredSize * m_reducedPredSize ); + int* const reducedPred = needUpsampling ? bufReducedPred.data() : result; + const int* const reducedBoundary = transpose ? m_reducedBoundaryTransposed.data() : m_reducedBoundary.data(); + computeReducedPred( reducedPred, reducedBoundary, matrix, shiftMatrix, offsetMatrix, transpose, bitDepth ); + if( needUpsampling ) + { + predictionUpsampling( result, reducedPred ); + } +} + + +void MatrixIntraPrediction::initPredBlockParams(const Size& block) +{ + m_blockSize = block; + // init size index + m_sizeId = getMipSizeId( m_blockSize ); + + // init reduced boundary size + m_reducedBdrySize = (m_sizeId == 0) ? 2 : 4; + + // init reduced prediction size + m_reducedPredSize = ( m_sizeId < 2 ) ? 4 : 8; + + + // init upsampling factors + m_upsmpFactorHor = m_blockSize.width / m_reducedPredSize; + m_upsmpFactorVer = m_blockSize.height / m_reducedPredSize; + + CHECKD( (m_upsmpFactorHor < 1) || ((m_upsmpFactorHor & (m_upsmpFactorHor - 1)) != 0), "Need power of two horizontal upsampling factor." ); + CHECKD( (m_upsmpFactorVer < 1) || ((m_upsmpFactorVer & (m_upsmpFactorVer - 1)) != 0), "Need power of two vertical upsampling factor." ); +} + + + +void MatrixIntraPrediction::boundaryDownsampling1D(int* reducedDst, const int* const fullSrc, const SizeType srcLen, const SizeType dstLen) +{ + if (dstLen < srcLen) + { + // Create reduced boundary by downsampling + const SizeType downsmpFactor = srcLen / dstLen; + const int log2DownsmpFactor = floorLog2(downsmpFactor); + const int roundingOffset = (1 << (log2DownsmpFactor - 1)); + + SizeType srcIdx = 0; + for( SizeType dstIdx = 0; dstIdx < dstLen; dstIdx++ ) + { + int sum = 0; + for( int k = 0; k < downsmpFactor; k++ ) + { + sum += fullSrc[srcIdx++]; + } + reducedDst[dstIdx] = (sum + roundingOffset) >> log2DownsmpFactor; + } + } + else + { + // Copy boundary if no downsampling is needed + for (SizeType i = 0; i < dstLen; ++i) + { + reducedDst[i] = fullSrc[i]; + } + } +} + + +void MatrixIntraPrediction::predictionUpsampling1D(int* const dst, const int* const src, const int* const bndry, + const SizeType srcSizeUpsmpDim, const SizeType srcSizeOrthDim, + const SizeType srcStep, const SizeType srcStride, + const SizeType dstStep, const SizeType dstStride, + const SizeType bndryStep, + const unsigned int upsmpFactor ) +{ + const int log2UpsmpFactor = floorLog2( upsmpFactor ); + CHECKD( upsmpFactor <= 1, "Upsampling factor must be at least 2." ); + const int roundingOffset = 1 << (log2UpsmpFactor - 1); + + SizeType idxOrthDim = 0; + const int* srcLine = src; + int* dstLine = dst; + const int* bndryLine = bndry + bndryStep - 1; + while( idxOrthDim < srcSizeOrthDim ) + { + SizeType idxUpsmpDim = 0; + const int* before = bndryLine; + const int* behind = srcLine; + int* currDst = dstLine; + while( idxUpsmpDim < srcSizeUpsmpDim ) + { + SizeType pos = 1; + int scaledBefore = ( *before ) << log2UpsmpFactor; + int scaledBehind = 0; + while( pos <= upsmpFactor ) + { + scaledBefore -= *before; + scaledBehind += *behind; + *currDst = (scaledBefore + scaledBehind + roundingOffset) >> log2UpsmpFactor; + + pos++; + currDst += dstStep; + } + + idxUpsmpDim++; + before = behind; + behind += srcStep; + } + + idxOrthDim++; + srcLine += srcStride; + dstLine += dstStride; + bndryLine += bndryStep; + } +} + + +void MatrixIntraPrediction::predictionUpsampling( int* const dst, const int* const src ) const +{ + const int* verSrc = src; + SizeType verSrcStep = m_blockSize.width; + + if( m_upsmpFactorHor > 1 ) + { + int* const horDst = dst + (m_upsmpFactorVer - 1) * m_blockSize.width; + verSrc = horDst; + verSrcStep *= m_upsmpFactorVer; + + predictionUpsampling1D( horDst, src, m_refSamplesLeft.data(), + m_reducedPredSize, m_reducedPredSize, + 1, m_reducedPredSize, 1, verSrcStep, + m_upsmpFactorVer, m_upsmpFactorHor ); + } + + if( m_upsmpFactorVer > 1 ) + { + predictionUpsampling1D( dst, verSrc, m_refSamplesTop.data(), + m_reducedPredSize, m_blockSize.width, + verSrcStep, 1, m_blockSize.width, 1, + 1, m_upsmpFactorVer ); + } +} + +void MatrixIntraPrediction::getMatrixData(const uint8_t*& matrix, int &shiftMatrix, int &offsetMatrix, const int modeIdx) const +{ + switch( m_sizeId ) + { + case 0: matrix = &mipMatrix4x4 [modeIdx][0][0]; + shiftMatrix = mipShiftMatrix4x4 [modeIdx]; + offsetMatrix = mipOffsetMatrix4x4[modeIdx]; + break; + + case 1: matrix = &mipMatrix8x8 [modeIdx][0][0]; + shiftMatrix = mipShiftMatrix8x8 [modeIdx]; + offsetMatrix = mipOffsetMatrix8x8[modeIdx]; + break; + + case 2: matrix = &mipMatrix16x16 [modeIdx][0][0]; + shiftMatrix = mipShiftMatrix16x16 [modeIdx]; + offsetMatrix = mipOffsetMatrix16x16[modeIdx]; + break; + + default: THROW( "Invalid mipSizeId" ); + } +} + +void MatrixIntraPrediction::computeReducedPred( int*const result, const int* const input, + const uint8_t*matrix, const int shiftMatrix, const int offsetMatrix, + const bool transpose, const int bitDepth ) +{ + const int inputSize = 2 * m_reducedBdrySize; + + // use local buffer for transposed result + static_vector<int, MIP_MAX_REDUCED_OUTPUT_SAMPLES> resBufTransposed( m_reducedPredSize * m_reducedPredSize ); + int*const resPtr = (transpose) ? resBufTransposed.data() : result; + + int sum = 0; + for( int i = 0; i < inputSize; i++ ) { sum += input[i]; } + const int offset = (1 << (shiftMatrix - 1)) - offsetMatrix * sum; + CHECK( inputSize != 4 * (inputSize >> 2), "Error, input size not divisible by four" ); + + const uint8_t *weight = matrix; + const int inputOffset = transpose ? m_inputOffsetTransp : m_inputOffset; + + const bool redSize = (m_sizeId == 2); + int posRes = 0; + for( int y = 0; y < m_reducedPredSize; y++ ) + { + for( int x = 0; x < m_reducedPredSize; x++ ) + { + if( redSize ) weight -= 1; + int tmp0 = redSize ? 0 : (input[0] * weight[0]); + int tmp1 = input[1] * weight[1]; + int tmp2 = input[2] * weight[2]; + int tmp3 = input[3] * weight[3]; + for (int i = 4; i < inputSize; i += 4) + { + tmp0 += input[i] * weight[i]; + tmp1 += input[i + 1] * weight[i + 1]; + tmp2 += input[i + 2] * weight[i + 2]; + tmp3 += input[i + 3] * weight[i + 3]; + } + resPtr[posRes++] = ClipBD<int>( ((tmp0 + tmp1 + tmp2 + tmp3 + offset) >> shiftMatrix) + inputOffset, bitDepth ); + + weight += inputSize; + } + } + + if( transpose ) + { + for( int y = 0; y < m_reducedPredSize; y++ ) + { + for( int x = 0; x < m_reducedPredSize; x++ ) + { + result[ y * m_reducedPredSize + x ] = resPtr[ x * m_reducedPredSize + y ]; + } + } + } +} diff --git a/source/Lib/CommonLib/MatrixIntraPrediction.h b/source/Lib/CommonLib/MatrixIntraPrediction.h new file mode 100644 index 0000000000000000000000000000000000000000..bf90ae11d4ea414386fe60c855787d8d8f11ae86 --- /dev/null +++ b/source/Lib/CommonLib/MatrixIntraPrediction.h @@ -0,0 +1,91 @@ +/* The copyright in this software is being made available under the BSD +* License, included below. This software may be subject to other third party +* and contributor rights, including patent rights, and no such rights are +* granted under this license. +* +* Copyright (c) 2010-2020, ITU/ISO/IEC +* All rights reserved. +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are met: +* +* * Redistributions of source code must retain the above copyright notice, +* this list of conditions and the following disclaimer. +* * Redistributions in binary form must reproduce the above copyright notice, +* this list of conditions and the following disclaimer in the documentation +* and/or other materials provided with the distribution. +* * Neither the name of the ITU/ISO/IEC nor the names of its contributors may +* be used to endorse or promote products derived from this software without +* specific prior written permission. +* +* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS +* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +* THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/** \file MatrixIntraPrediction.h +\brief matrix-based intra prediction class (header) +*/ + +#ifndef __MATRIXINTRAPPREDICTION__ +#define __MATRIXINTRAPPREDICTION__ + + +#include "Unit.h" + +static const int MIP_MAX_INPUT_SIZE = 8; +static const int MIP_MAX_REDUCED_OUTPUT_SAMPLES = 64; + + +class MatrixIntraPrediction +{ +public: + MatrixIntraPrediction(); + + void prepareInputForPred(const CPelBuf &pSrc, const Area& block, const int bitDepth); + void predBlock(int* const result, const int modeIdx, const bool transpose, const int bitDepth); + + private: + static_vector<int, MIP_MAX_INPUT_SIZE> m_reducedBoundary; // downsampled boundary of a block + static_vector<int, MIP_MAX_INPUT_SIZE> m_reducedBoundaryTransposed; // downsampled, transposed boundary of a block + int m_inputOffset; + int m_inputOffsetTransp; + static_vector<int, MIP_MAX_WIDTH> m_refSamplesTop; // top reference samples for upsampling + static_vector<int, MIP_MAX_HEIGHT> m_refSamplesLeft; // left reference samples for upsampling + + Size m_blockSize; + int m_sizeId; + int m_reducedBdrySize; + int m_reducedPredSize; + unsigned int m_upsmpFactorHor; + unsigned int m_upsmpFactorVer; + + void initPredBlockParams(const Size& block); + + static void boundaryDownsampling1D(int* reducedDst, const int* const fullSrc, const SizeType srcLen, const SizeType dstLen); + + void predictionUpsampling( int* const dst, const int* const src ) const; + static void predictionUpsampling1D( int* const dst, const int* const src, const int* const bndry, + const SizeType srcSizeUpsmpDim, const SizeType srcSizeOrthDim, + const SizeType srcStep, const SizeType srcStride, + const SizeType dstStep, const SizeType dstStride, + const SizeType bndryStep, + const unsigned int upsmpFactor ); + + void getMatrixData(const uint8_t*& matrix, int &shiftMatrix, int &offsetMatrix, const int modeIdx) const; + + + void computeReducedPred( int*const result, const int* const input, + const uint8_t*matrix, const int shiftMatrix, const int offsetMatrix, + const bool transpose, const int bitDepth ); + }; + +#endif //__MATRIXINTRAPPREDICTION__ diff --git a/source/Lib/CommonLib/MipData.h b/source/Lib/CommonLib/MipData.h new file mode 100644 index 0000000000000000000000000000000000000000..487565eb11e846bbc5f4e9e279f855e54f786d11 --- /dev/null +++ b/source/Lib/CommonLib/MipData.h @@ -0,0 +1,894 @@ +/* The copyright in this software is being made available under the BSD +* License, included below. This software may be subject to other third party +* and contributor rights, including patent rights, and no such rights are +* granted under this license. +* +* Copyright (c) 2010-2020, ITU/ISO/IEC +* All rights reserved. +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are met: +* +* * Redistributions of source code must retain the above copyright notice, +* this list of conditions and the following disclaimer. +* * Redistributions in binary form must reproduce the above copyright notice, +* this list of conditions and the following disclaimer in the documentation +* and/or other materials provided with the distribution. +* * Neither the name of the ITU/ISO/IEC nor the names of its contributors may +* be used to endorse or promote products derived from this software without +* specific prior written permission. +* +* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS +* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +* THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/** \file MipData.h +\brief weight and bias data for matrix-based intra prediction (MIP) +*/ + +ALIGN_DATA(MEMORY_ALIGN_DEF_SIZE, const uint8_t mipMatrix4x4[16][16][4]) = +{ + { + { 5, 16, 51, 2}, + { 5, 22, 18, 36}, + { 5, 15, 5, 55}, + { 5, 10, 6, 59}, + { 4, 6, 12, 59}, + { 5, 3, 4, 66}, + { 7, 0, 5, 67}, + { 8, 1, 7, 65}, + { 6, 2, 3, 67}, + { 7, 1, 5, 66}, + { 9, 1, 6, 66}, + { 10, 2, 6, 66}, + { 7, 3, 5, 64}, + { 9, 2, 6, 64}, + { 10, 2, 6, 65}, + { 10, 3, 7, 64} + }, + { + { 28, 27, 58, 26}, + { 28, 27, 52, 27}, + { 29, 39, 43, 28}, + { 28, 70, 38, 27}, + { 28, 28, 61, 28}, + { 28, 27, 62, 26}, + { 29, 30, 60, 25}, + { 25, 61, 49, 23}, + { 29, 28, 35, 54}, + { 29, 28, 42, 47}, + { 28, 26, 50, 36}, + { 16, 42, 46, 19}, + { 28, 28, 23, 64}, + { 29, 28, 24, 64}, + { 25, 24, 23, 58}, + { 0, 19, 21, 31} + }, + { + { 28, 27, 39, 26}, + { 29, 32, 29, 27}, + { 29, 60, 31, 27}, + { 27, 68, 31, 26}, + { 28, 27, 51, 27}, + { 28, 39, 39, 26}, + { 26, 64, 33, 28}, + { 21, 64, 27, 35}, + { 27, 28, 38, 50}, + { 19, 44, 31, 51}, + { 10, 57, 22, 54}, + { 7, 53, 16, 58}, + { 19, 31, 12, 72}, + { 6, 47, 14, 64}, + { 0, 50, 15, 60}, + { 2, 48, 15, 60} + }, + { + { 42, 40, 64, 45}, + { 43, 41, 44, 52}, + { 43, 67, 34, 49}, + { 41, 76, 38, 45}, + { 42, 41, 50, 67}, + { 41, 39, 42, 71}, + { 38, 52, 33, 63}, + { 31, 70, 31, 47}, + { 40, 41, 39, 76}, + { 31, 36, 38, 74}, + { 17, 30, 35, 69}, + { 9, 47, 30, 50}, + { 32, 37, 37, 75}, + { 15, 29, 36, 68}, + { 3, 22, 37, 62}, + { 0, 26, 38, 52} + }, + { + { 57, 53, 73, 55}, + { 58, 75, 61, 55}, + { 55, 91, 53, 63}, + { 45, 87, 43, 74}, + { 58, 60, 80, 63}, + { 51, 71, 62, 73}, + { 30, 65, 48, 77}, + { 14, 50, 47, 73}, + { 50, 56, 53, 89}, + { 20, 43, 50, 77}, + { 2, 32, 53, 67}, + { 2, 30, 54, 65}, + { 26, 41, 47, 82}, + { 2, 28, 53, 65}, + { 0, 27, 55, 63}, + { 3, 29, 55, 64} + }, + { + { 23, 19, 88, 8}, + { 23, 44, 61, 25}, + { 22, 64, 24, 52}, + { 21, 56, 0, 71}, + { 21, 25, 53, 61}, + { 20, 26, 31, 80}, + { 20, 22, 21, 88}, + { 19, 20, 17, 88}, + { 20, 22, 19, 89}, + { 19, 21, 19, 89}, + { 18, 20, 20, 88}, + { 17, 19, 20, 86}, + { 19, 20, 20, 87}, + { 18, 21, 21, 86}, + { 16, 21, 21, 85}, + { 13, 19, 21, 83} + }, + { + { 9, 0, 11, 6}, + { 9, 19, 11, 8}, + { 9, 70, 11, 8}, + { 9, 76, 12, 8}, + { 9, 0, 12, 6}, + { 9, 19, 11, 7}, + { 9, 70, 11, 9}, + { 9, 76, 12, 9}, + { 9, 0, 12, 6}, + { 9, 20, 12, 7}, + { 9, 70, 12, 9}, + { 9, 75, 12, 9}, + { 9, 1, 11, 8}, + { 9, 20, 12, 8}, + { 9, 70, 12, 8}, + { 9, 75, 11, 9} + }, + { + { 6, 3, 61, 7}, + { 7, 0, 34, 7}, + { 7, 13, 5, 6}, + { 6, 56, 1, 4}, + { 6, 7, 74, 5}, + { 6, 4, 70, 6}, + { 6, 0, 51, 6}, + { 6, 8, 19, 5}, + { 7, 7, 55, 23}, + { 7, 7, 71, 8}, + { 6, 5, 74, 5}, + { 6, 3, 59, 7}, + { 5, 5, 3, 74}, + { 6, 6, 36, 41}, + { 7, 7, 62, 15}, + { 6, 7, 66, 10} + }, + { + { 35, 26, 78, 28}, + { 34, 39, 35, 33}, + { 34, 96, 32, 35}, + { 35, 102, 35, 35}, + { 31, 30, 107, 25}, + { 29, 34, 66, 24}, + { 31, 93, 32, 31}, + { 35, 101, 34, 35}, + { 31, 31, 64, 72}, + { 19, 28, 86, 30}, + { 18, 85, 47, 20}, + { 31, 99, 34, 33}, + { 32, 32, 27, 106}, + { 12, 26, 52, 71}, + { 0, 72, 60, 19}, + { 20, 93, 37, 26} + }, + { + { 10, 6, 60, 0}, + { 7, 14, 39, 4}, + { 4, 65, 21, 11}, + { 1, 80, 12, 16}, + { 10, 11, 52, 46}, + { 6, 9, 36, 57}, + { 2, 18, 20, 65}, + { 0, 23, 11, 65}, + { 13, 13, 13, 80}, + { 12, 13, 12, 80}, + { 12, 12, 13, 80}, + { 11, 12, 14, 79}, + { 15, 14, 12, 79}, + { 15, 14, 13, 79}, + { 16, 16, 15, 78}, + { 16, 16, 17, 76} + }, + { + { 50, 48, 85, 42}, + { 47, 44, 59, 44}, + { 43, 52, 51, 43}, + { 14, 107, 51, 29}, + { 51, 49, 92, 77}, + { 47, 44, 70, 81}, + { 35, 32, 47, 79}, + { 0, 47, 35, 60}, + { 50, 49, 49, 116}, + { 49, 47, 49, 117}, + { 45, 41, 49, 115}, + { 30, 32, 47, 106}, + { 50, 49, 46, 117}, + { 49, 49, 48, 115}, + { 50, 48, 49, 115}, + { 47, 45, 50, 113} + }, + { + { 67, 65, 96, 61}, + { 66, 65, 90, 62}, + { 68, 83, 79, 64}, + { 59, 102, 70, 61}, + { 67, 67, 93, 74}, + { 68, 67, 93, 74}, + { 64, 75, 88, 69}, + { 30, 85, 69, 49}, + { 67, 67, 68, 97}, + { 68, 67, 68, 98}, + { 43, 60, 69, 80}, + { 4, 68, 65, 40}, + { 66, 66, 63, 101}, + { 62, 63, 62, 99}, + { 22, 49, 60, 76}, + { 0, 65, 64, 41} + }, + { + { 27, 18, 53, 22}, + { 26, 35, 22, 28}, + { 27, 90, 27, 27}, + { 28, 97, 29, 28}, + { 22, 19, 80, 29}, + { 15, 24, 42, 22}, + { 17, 72, 27, 20}, + { 22, 89, 28, 23}, + { 19, 22, 40, 82}, + { 6, 13, 36, 68}, + { 0, 19, 14, 53}, + { 8, 33, 10, 38}, + { 22, 26, 24, 95}, + { 13, 19, 25, 91}, + { 6, 14, 22, 87}, + { 4, 14, 13, 79} + }, + { + { 50, 48, 80, 49}, + { 50, 45, 73, 49}, + { 50, 48, 62, 49}, + { 49, 61, 54, 48}, + { 50, 50, 84, 48}, + { 50, 49, 84, 48}, + { 50, 48, 82, 49}, + { 50, 47, 76, 49}, + { 50, 50, 69, 64}, + { 51, 50, 77, 56}, + { 48, 48, 82, 50}, + { 37, 43, 81, 45}, + { 49, 48, 44, 87}, + { 40, 43, 50, 78}, + { 17, 32, 58, 58}, + { 0, 27, 64, 44} + }, + { + { 29, 26, 37, 36}, + { 24, 42, 16, 40}, + { 25, 62, 35, 28}, + { 28, 54, 51, 22}, + { 24, 29, 22, 52}, + { 9, 44, 21, 31}, + { 16, 50, 54, 14}, + { 27, 40, 65, 19}, + { 17, 36, 21, 49}, + { 0, 40, 41, 10}, + { 15, 37, 65, 9}, + { 28, 34, 66, 21}, + { 16, 41, 36, 33}, + { 6, 37, 54, 3}, + { 19, 32, 64, 12}, + { 28, 33, 62, 24} + }, + { + { 19, 20, 50, 19}, + { 19, 21, 49, 20}, + { 19, 27, 47, 19}, + { 19, 34, 43, 19}, + { 19, 22, 54, 18}, + { 19, 22, 55, 18}, + { 19, 22, 55, 18}, + { 19, 22, 53, 18}, + { 21, 22, 45, 27}, + { 19, 22, 47, 24}, + { 18, 21, 48, 23}, + { 18, 21, 47, 24}, + { 9, 14, 2, 65}, + { 3, 12, 2, 62}, + { 0, 11, 4, 59}, + { 0, 12, 6, 57} + } +}; + +ALIGN_DATA(MEMORY_ALIGN_DEF_SIZE, const uint8_t mipOffsetMatrix4x4[16]) = +{ 1, 28, 28, 42, 56, 22, 9, 6, 35, 14, 50, 66, 29, 50, 31, 19}; + +ALIGN_DATA(MEMORY_ALIGN_DEF_SIZE, const uint8_t mipShiftMatrix4x4[16]) = +{ 6, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5}; + +ALIGN_DATA(MEMORY_ALIGN_DEF_SIZE, const uint8_t mipMatrix8x8[8][16][8]) = +{ + { + { 18, 77, 44, 26, 0, 17, 18, 19}, + { 19, 70, 83, 26, 16, 13, 16, 17}, + { 20, 40, 98, 36, 16, 16, 15, 16}, + { 20, 29, 75, 68, 15, 17, 16, 16}, + { 19, 82, 61, 29, 14, 10, 12, 22}, + { 21, 59, 88, 30, 17, 13, 14, 17}, + { 23, 46, 93, 36, 16, 17, 14, 16}, + { 23, 33, 96, 43, 16, 18, 15, 17}, + { 19, 82, 64, 30, 16, 17, 8, 17}, + { 22, 61, 89, 29, 17, 17, 12, 15}, + { 24, 44, 94, 38, 17, 17, 15, 16}, + { 25, 34, 90, 47, 16, 19, 15, 17}, + { 19, 77, 61, 30, 15, 17, 20, 10}, + { 22, 63, 83, 30, 17, 16, 18, 10}, + { 24, 44, 89, 39, 17, 18, 16, 16}, + { 24, 35, 82, 50, 16, 19, 16, 17} + }, + { + { 14, 15, 10, 12, 99, 11, 12, 13}, + { 12, 61, 6, 9, 32, 8, 14, 13}, + { 12, 104, 54, 2, 9, 17, 11, 14}, + { 12, 24, 110, 33, 11, 18, 10, 14}, + { 14, 14, 15, 11, 69, 102, 6, 12}, + { 13, 13, 11, 10, 105, 30, 5, 14}, + { 12, 45, 6, 8, 55, 5, 13, 12}, + { 12, 72, 38, 6, 18, 14, 12, 13}, + { 14, 12, 18, 10, 2, 77, 102, 3}, + { 13, 12, 19, 8, 41, 122, 22, 7}, + { 13, 4, 18, 8, 97, 60, 0, 13}, + { 12, 16, 18, 9, 73, 17, 11, 12}, + { 14, 12, 19, 9, 10, 8, 67, 98}, + { 13, 12, 20, 8, 4, 57, 104, 20}, + { 13, 10, 22, 7, 29, 109, 37, 12}, + { 12, 5, 24, 9, 66, 70, 12, 17} + }, + { + { 23, 42, 18, 23, 18, 41, 20, 23}, + { 23, 86, 33, 20, 20, 33, 21, 23}, + { 23, 18, 90, 25, 22, 32, 20, 23}, + { 23, 25, 12, 94, 22, 33, 21, 23}, + { 23, 36, 21, 23, 0, 79, 31, 21}, + { 23, 68, 29, 21, 0, 67, 29, 21}, + { 23, 29, 70, 24, 5, 58, 28, 21}, + { 23, 22, 21, 78, 11, 50, 29, 22}, + { 22, 28, 23, 23, 18, 9, 97, 25}, + { 21, 35, 25, 22, 16, 8, 99, 22}, + { 19, 30, 36, 22, 15, 6, 100, 21}, + { 18, 22, 22, 45, 16, 5, 92, 23}, + { 19, 24, 23, 21, 20, 28, 4, 101}, + { 16, 23, 23, 20, 18, 28, 6, 99}, + { 11, 24, 22, 18, 16, 28, 8, 97}, + { 8, 21, 21, 23, 16, 25, 11, 93} + }, + { + { 45, 61, 48, 48, 60, 81, 44, 44}, + { 45, 46, 72, 53, 41, 84, 46, 43}, + { 45, 42, 60, 78, 38, 76, 50, 43}, + { 44, 46, 43, 94, 39, 63, 55, 45}, + { 44, 44, 53, 51, 43, 83, 68, 44}, + { 45, 33, 59, 63, 39, 68, 77, 44}, + { 44, 43, 42, 79, 38, 54, 85, 46}, + { 41, 46, 40, 82, 39, 44, 80, 52}, + { 44, 41, 49, 52, 43, 39, 98, 60}, + { 41, 40, 46, 63, 43, 33, 92, 66}, + { 34, 43, 39, 70, 41, 30, 80, 75}, + { 26, 40, 41, 68, 35, 36, 62, 83}, + { 39, 42, 45, 52, 41, 43, 35, 117}, + { 26, 44, 41, 56, 38, 44, 30, 115}, + { 8, 36, 44, 53, 28, 51, 23, 108}, + { 0, 26, 48, 51, 22, 56, 21, 103} + }, + { + { 10, 15, 10, 11, 79, 18, 7, 10}, + { 10, 18, 15, 10, 79, 14, 8, 10}, + { 10, 15, 24, 13, 72, 12, 8, 9}, + { 10, 12, 8, 40, 59, 13, 8, 9}, + { 10, 10, 11, 10, 0, 80, 17, 7}, + { 10, 9, 11, 11, 4, 82, 12, 6}, + { 9, 11, 11, 11, 9, 81, 10, 6}, + { 9, 10, 12, 15, 15, 72, 12, 6}, + { 10, 11, 10, 10, 12, 2, 77, 14}, + { 10, 12, 11, 11, 11, 8, 76, 10}, + { 9, 11, 12, 11, 9, 15, 73, 7}, + { 9, 11, 11, 14, 9, 24, 63, 8}, + { 10, 11, 10, 11, 8, 12, 1, 82}, + { 10, 12, 11, 11, 9, 10, 8, 76}, + { 10, 11, 12, 12, 10, 7, 17, 69}, + { 9, 11, 10, 15, 10, 8, 25, 60} + }, + { + { 14, 34, 30, 13, 20, 58, 8, 14}, + { 13, 1, 44, 32, 7, 32, 33, 13}, + { 13, 12, 2, 56, 11, 11, 36, 33}, + { 12, 17, 5, 38, 13, 7, 23, 58}, + { 13, 7, 20, 22, 10, 21, 65, 18}, + { 11, 10, 6, 29, 13, 5, 39, 56}, + { 9, 17, 4, 20, 13, 9, 12, 83}, + { 8, 14, 15, 11, 12, 13, 9, 85}, + { 12, 14, 9, 19, 15, 7, 23, 74}, + { 8, 15, 10, 14, 13, 13, 0, 93}, + { 6, 14, 15, 9, 11, 15, 0, 93}, + { 6, 14, 16, 9, 10, 15, 5, 87}, + { 11, 14, 12, 15, 13, 15, 2, 89}, + { 8, 13, 15, 12, 11, 15, 3, 88}, + { 6, 14, 15, 11, 11, 16, 4, 87}, + { 6, 14, 15, 12, 11, 16, 6, 83} + }, + { + { 10, 6, 9, 11, 70, 11, 8, 9}, + { 9, 8, 6, 11, 49, 4, 10, 8}, + { 9, 38, 4, 11, 25, 6, 11, 8}, + { 9, 41, 22, 14, 15, 9, 11, 8}, + { 9, 8, 9, 11, 21, 68, 11, 7}, + { 9, 6, 7, 11, 49, 49, 4, 8}, + { 8, 9, 4, 11, 64, 23, 6, 8}, + { 9, 16, 7, 9, 52, 11, 11, 7}, + { 9, 10, 8, 10, 7, 19, 73, 5}, + { 8, 9, 7, 11, 4, 50, 51, 0}, + { 8, 9, 7, 11, 15, 66, 24, 3}, + { 9, 10, 6, 12, 33, 53, 14, 6}, + { 8, 9, 8, 10, 11, 5, 21, 67}, + { 7, 10, 6, 11, 11, 1, 55, 37}, + { 7, 11, 6, 11, 8, 13, 69, 15}, + { 8, 12, 6, 12, 11, 31, 52, 9} + }, + { + { 26, 43, 21, 27, 21, 46, 23, 26}, + { 26, 66, 67, 19, 25, 27, 28, 26}, + { 27, 18, 64, 63, 27, 27, 27, 27}, + { 28, 28, 16, 101, 28, 27, 27, 28}, + { 25, 42, 27, 24, 0, 81, 39, 21}, + { 25, 39, 84, 23, 23, 32, 28, 25}, + { 27, 22, 40, 84, 28, 25, 27, 27}, + { 29, 28, 15, 102, 28, 28, 27, 28}, + { 24, 34, 37, 22, 21, 19, 87, 30}, + { 25, 22, 82, 38, 25, 29, 33, 25}, + { 28, 26, 27, 95, 27, 28, 25, 27}, + { 30, 27, 17, 101, 28, 28, 27, 28}, + { 24, 24, 42, 27, 27, 27, 12, 95}, + { 26, 19, 64, 55, 29, 28, 23, 39}, + { 29, 28, 26, 94, 27, 28, 27, 26}, + { 30, 28, 20, 98, 28, 28, 27, 30} + } +}; + +ALIGN_DATA(MEMORY_ALIGN_DEF_SIZE, const short mipOffsetMatrix8x8[8]) = +{ 15, 14, 23, 45, 10, 14, 10, 27}; + +ALIGN_DATA(MEMORY_ALIGN_DEF_SIZE, const short mipShiftMatrix8x8[8]) = +{ 7, 7, 6, 6, 6, 6, 6, 6}; + +ALIGN_DATA(MEMORY_ALIGN_DEF_SIZE, const uint8_t mipMatrix16x16[6][64][7]) = +{ + { + { 22, 13, 15, 50, 16, 17, 14}, + { 55, 5, 15, 25, 22, 16, 14}, + { 83, 7, 14, 18, 22, 17, 14}, + { 58, 47, 7, 15, 21, 17, 15}, + { 15, 81, 9, 12, 20, 18, 14}, + { 5, 61, 38, 12, 19, 18, 14}, + { 14, 20, 73, 13, 17, 18, 15}, + { 18, 4, 84, 16, 16, 19, 16}, + { 30, 12, 15, 57, 36, 15, 14}, + { 54, 8, 14, 44, 33, 18, 13}, + { 69, 14, 12, 29, 32, 20, 12}, + { 48, 46, 8, 18, 30, 22, 13}, + { 17, 68, 14, 11, 26, 24, 13}, + { 10, 47, 43, 10, 22, 24, 13}, + { 16, 16, 70, 12, 18, 24, 14}, + { 20, 6, 77, 15, 16, 24, 16}, + { 24, 15, 15, 28, 67, 15, 15}, + { 38, 14, 14, 34, 52, 23, 13}, + { 46, 21, 13, 29, 45, 27, 11}, + { 36, 38, 13, 19, 38, 31, 11}, + { 21, 46, 22, 13, 31, 34, 11}, + { 18, 32, 43, 10, 26, 34, 12}, + { 21, 13, 62, 11, 21, 33, 14}, + { 22, 7, 66, 13, 18, 31, 17}, + { 18, 17, 15, 12, 58, 40, 12}, + { 22, 19, 15, 18, 50, 42, 12}, + { 24, 23, 15, 19, 44, 45, 11}, + { 25, 27, 19, 16, 37, 47, 11}, + { 24, 26, 27, 13, 31, 48, 12}, + { 25, 18, 39, 11, 27, 46, 13}, + { 25, 12, 48, 10, 23, 43, 16}, + { 25, 11, 51, 11, 20, 40, 19}, + { 16, 17, 16, 14, 24, 71, 13}, + { 14, 19, 16, 15, 27, 67, 14}, + { 14, 20, 17, 15, 26, 66, 13}, + { 18, 18, 21, 15, 24, 65, 13}, + { 24, 14, 27, 15, 22, 64, 14}, + { 27, 12, 31, 13, 20, 61, 16}, + { 27, 13, 35, 12, 19, 56, 19}, + { 26, 14, 37, 12, 18, 50, 23}, + { 15, 16, 16, 17, 8, 68, 31}, + { 13, 17, 17, 16, 8, 72, 26}, + { 13, 17, 18, 17, 8, 74, 23}, + { 16, 14, 21, 17, 8, 73, 22}, + { 21, 11, 24, 17, 9, 72, 22}, + { 25, 11, 24, 16, 9, 69, 24}, + { 26, 14, 25, 15, 10, 64, 26}, + { 25, 15, 28, 14, 12, 56, 30}, + { 14, 15, 16, 16, 9, 37, 62}, + { 14, 15, 17, 18, 4, 52, 51}, + { 15, 14, 18, 19, 1, 60, 44}, + { 17, 13, 19, 20, 0, 64, 41}, + { 20, 11, 20, 19, 1, 63, 40}, + { 22, 12, 20, 18, 3, 61, 40}, + { 23, 14, 20, 17, 5, 58, 40}, + { 23, 15, 22, 16, 8, 52, 42}, + { 16, 14, 17, 16, 12, 15, 81}, + { 17, 13, 17, 17, 6, 29, 71}, + { 18, 13, 18, 19, 3, 38, 64}, + { 18, 13, 18, 19, 1, 44, 60}, + { 19, 13, 17, 20, 2, 45, 58}, + { 20, 14, 17, 19, 3, 45, 56}, + { 21, 14, 18, 18, 5, 45, 55}, + { 22, 15, 19, 18, 7, 43, 53} + }, + { + { 42, 21, 26, 96, 39, 29, 20}, + { 42, 32, 28, 49, 66, 35, 21}, + { 30, 41, 30, 29, 73, 40, 23}, + { 23, 42, 35, 23, 72, 42, 26}, + { 25, 34, 44, 22, 68, 46, 26}, + { 26, 32, 49, 22, 65, 47, 25}, + { 24, 33, 53, 21, 62, 47, 25}, + { 22, 30, 62, 21, 57, 47, 25}, + { 27, 25, 27, 46, 97, 33, 22}, + { 11, 31, 31, 22, 94, 53, 22}, + { 0, 29, 35, 16, 89, 60, 26}, + { 6, 17, 41, 15, 86, 62, 28}, + { 20, 4, 42, 16, 84, 63, 28}, + { 26, 8, 35, 17, 84, 61, 28}, + { 23, 24, 25, 18, 83, 60, 27}, + { 21, 31, 27, 19, 77, 58, 28}, + { 16, 22, 27, 14, 107, 48, 24}, + { 11, 21, 32, 13, 84, 70, 26}, + { 11, 17, 36, 14, 77, 73, 29}, + { 13, 15, 35, 15, 77, 71, 31}, + { 16, 14, 34, 15, 80, 68, 30}, + { 17, 16, 31, 15, 84, 64, 30}, + { 19, 19, 29, 16, 85, 61, 29}, + { 19, 23, 30, 18, 81, 60, 29}, + { 17, 18, 28, 14, 81, 76, 24}, + { 15, 17, 31, 15, 71, 77, 33}, + { 15, 16, 33, 14, 70, 74, 36}, + { 15, 17, 32, 14, 73, 71, 36}, + { 15, 18, 32, 13, 77, 67, 36}, + { 15, 18, 32, 13, 81, 65, 34}, + { 16, 17, 32, 13, 82, 64, 32}, + { 18, 19, 32, 15, 78, 63, 32}, + { 18, 17, 28, 18, 52, 97, 31}, + { 16, 16, 31, 15, 64, 76, 41}, + { 14, 17, 31, 14, 66, 72, 43}, + { 15, 17, 32, 13, 70, 70, 42}, + { 16, 18, 33, 11, 72, 70, 38}, + { 16, 18, 33, 11, 74, 71, 35}, + { 17, 17, 34, 13, 73, 72, 32}, + { 18, 19, 33, 14, 70, 72, 32}, + { 17, 19, 26, 16, 39, 87, 55}, + { 14, 19, 29, 13, 57, 78, 49}, + { 15, 18, 31, 12, 61, 76, 45}, + { 15, 18, 32, 13, 63, 76, 42}, + { 16, 17, 33, 13, 63, 79, 38}, + { 17, 15, 35, 13, 62, 82, 34}, + { 18, 15, 36, 13, 62, 82, 33}, + { 19, 18, 34, 15, 59, 81, 33}, + { 17, 19, 25, 15, 34, 59, 89}, + { 14, 19, 29, 13, 47, 80, 56}, + { 14, 19, 31, 12, 54, 84, 44}, + { 16, 18, 32, 13, 57, 83, 41}, + { 17, 17, 33, 14, 55, 85, 38}, + { 18, 15, 35, 14, 53, 87, 36}, + { 19, 14, 36, 15, 53, 86, 35}, + { 20, 17, 36, 17, 52, 81, 37}, + { 19, 19, 26, 17, 32, 42, 105}, + { 17, 19, 28, 15, 40, 75, 65}, + { 16, 19, 30, 14, 45, 84, 49}, + { 17, 18, 32, 14, 50, 83, 45}, + { 18, 17, 33, 16, 50, 82, 43}, + { 19, 16, 35, 16, 49, 84, 40}, + { 20, 15, 38, 17, 48, 83, 40}, + { 21, 17, 38, 18, 48, 78, 41} + }, + { + { 52, 46, 46, 55, 50, 46, 47}, + { 65, 45, 45, 42, 52, 47, 47}, + { 74, 48, 45, 40, 51, 48, 47}, + { 67, 61, 44, 39, 50, 48, 48}, + { 52, 74, 45, 39, 48, 48, 49}, + { 44, 75, 53, 39, 47, 48, 50}, + { 44, 57, 71, 40, 46, 46, 51}, + { 50, 31, 92, 42, 45, 41, 55}, + { 55, 45, 45, 48, 61, 48, 46}, + { 65, 46, 45, 36, 57, 53, 46}, + { 72, 50, 44, 31, 54, 54, 48}, + { 66, 61, 44, 31, 51, 54, 49}, + { 54, 73, 45, 32, 48, 52, 52}, + { 45, 76, 52, 33, 46, 49, 55}, + { 45, 59, 70, 36, 44, 44, 59}, + { 50, 30, 93, 40, 43, 37, 64}, + { 54, 46, 45, 39, 66, 53, 46}, + { 65, 46, 45, 31, 57, 60, 47}, + { 71, 50, 44, 26, 52, 61, 50}, + { 67, 60, 44, 26, 48, 58, 53}, + { 56, 72, 45, 27, 45, 53, 58}, + { 47, 76, 51, 29, 43, 47, 63}, + { 46, 59, 69, 33, 41, 39, 69}, + { 51, 29, 93, 37, 40, 30, 76}, + { 53, 46, 46, 35, 60, 64, 47}, + { 64, 47, 45, 27, 53, 65, 50}, + { 71, 50, 45, 22, 49, 62, 55}, + { 68, 59, 44, 22, 46, 57, 61}, + { 58, 71, 45, 24, 43, 50, 67}, + { 48, 76, 51, 27, 41, 41, 74}, + { 46, 59, 69, 32, 39, 31, 82}, + { 51, 30, 93, 36, 38, 21, 88}, + { 52, 46, 46, 35, 51, 71, 50}, + { 63, 47, 45, 24, 51, 63, 58}, + { 70, 51, 45, 19, 49, 56, 65}, + { 68, 59, 44, 19, 45, 50, 72}, + { 58, 71, 45, 22, 42, 41, 80}, + { 48, 76, 51, 26, 39, 31, 88}, + { 46, 59, 69, 30, 37, 21, 95}, + { 51, 30, 93, 34, 36, 13, 100}, + { 52, 46, 46, 35, 47, 65, 61}, + { 62, 47, 45, 24, 49, 56, 69}, + { 69, 51, 45, 19, 47, 47, 77}, + { 68, 60, 44, 19, 44, 38, 85}, + { 59, 71, 45, 22, 40, 29, 94}, + { 49, 75, 51, 25, 37, 19, 102}, + { 47, 59, 69, 28, 35, 11, 108}, + { 52, 31, 93, 33, 34, 6, 110}, + { 52, 47, 46, 36, 46, 50, 77}, + { 61, 48, 45, 26, 46, 42, 84}, + { 67, 52, 45, 21, 44, 34, 92}, + { 66, 60, 44, 21, 41, 26, 100}, + { 59, 70, 46, 22, 38, 17, 108}, + { 50, 73, 53, 25, 35, 10, 114}, + { 48, 57, 70, 28, 33, 3, 118}, + { 52, 31, 92, 32, 32, 1, 118}, + { 52, 47, 46, 38, 45, 32, 93}, + { 59, 49, 46, 30, 43, 27, 99}, + { 65, 53, 46, 25, 41, 21, 106}, + { 64, 60, 46, 23, 38, 15, 111}, + { 58, 67, 48, 24, 36, 9, 117}, + { 51, 68, 56, 25, 34, 4, 121}, + { 49, 55, 72, 27, 32, 0, 123}, + { 52, 32, 92, 31, 30, 0, 123} + }, + { + { 18, 16, 16, 71, 13, 16, 16}, + { 36, 14, 16, 63, 18, 15, 17}, + { 64, 11, 16, 60, 20, 13, 17}, + { 64, 30, 13, 55, 22, 13, 17}, + { 33, 63, 9, 50, 25, 12, 18}, + { 15, 64, 24, 44, 27, 11, 18}, + { 19, 27, 59, 38, 30, 10, 18}, + { 24, 0, 81, 32, 31, 12, 18}, + { 22, 16, 16, 65, 46, 10, 17}, + { 25, 17, 16, 63, 50, 8, 18}, + { 35, 16, 16, 58, 54, 6, 19}, + { 41, 19, 16, 53, 58, 6, 19}, + { 36, 30, 15, 48, 62, 5, 19}, + { 28, 35, 20, 43, 63, 6, 19}, + { 26, 24, 35, 37, 62, 8, 18}, + { 26, 10, 50, 31, 59, 11, 19}, + { 18, 17, 16, 22, 81, 10, 18}, + { 18, 18, 16, 22, 81, 12, 19}, + { 19, 18, 16, 21, 81, 13, 19}, + { 22, 16, 17, 20, 80, 15, 18}, + { 25, 16, 17, 20, 77, 20, 17}, + { 26, 19, 17, 20, 73, 25, 16}, + { 25, 19, 20, 20, 68, 29, 16}, + { 24, 16, 27, 21, 60, 31, 17}, + { 16, 17, 16, 9, 59, 45, 14}, + { 16, 17, 16, 10, 55, 49, 14}, + { 16, 17, 16, 12, 50, 54, 14}, + { 17, 16, 16, 13, 45, 59, 13}, + { 19, 15, 16, 15, 39, 63, 13}, + { 21, 16, 16, 17, 35, 66, 14}, + { 22, 17, 16, 18, 33, 65, 16}, + { 22, 17, 19, 18, 33, 59, 21}, + { 17, 16, 16, 15, 20, 78, 16}, + { 17, 16, 16, 16, 18, 77, 19}, + { 17, 16, 15, 17, 15, 77, 22}, + { 17, 16, 15, 17, 13, 76, 24}, + { 18, 16, 15, 18, 11, 75, 27}, + { 18, 16, 15, 19, 11, 71, 31}, + { 19, 17, 15, 19, 13, 65, 35}, + { 19, 17, 16, 18, 16, 57, 39}, + { 17, 16, 16, 18, 9, 61, 42}, + { 17, 16, 15, 18, 10, 55, 47}, + { 18, 16, 15, 18, 9, 51, 52}, + { 18, 17, 15, 18, 9, 47, 56}, + { 18, 17, 15, 18, 10, 43, 60}, + { 18, 17, 14, 18, 11, 39, 63}, + { 18, 18, 14, 17, 12, 36, 65}, + { 19, 17, 15, 18, 13, 35, 65}, + { 16, 16, 15, 16, 14, 24, 75}, + { 17, 17, 15, 17, 14, 21, 78}, + { 17, 17, 15, 17, 14, 19, 80}, + { 18, 17, 15, 17, 13, 17, 82}, + { 18, 18, 14, 17, 13, 16, 84}, + { 18, 18, 15, 17, 13, 15, 84}, + { 19, 17, 15, 17, 13, 16, 84}, + { 19, 17, 16, 17, 14, 18, 81}, + { 16, 16, 16, 16, 16, 8, 89}, + { 17, 16, 16, 16, 16, 8, 89}, + { 18, 17, 15, 16, 15, 8, 90}, + { 18, 18, 15, 16, 15, 8, 90}, + { 19, 18, 15, 17, 15, 8, 90}, + { 19, 18, 15, 17, 15, 7, 90}, + { 19, 17, 16, 17, 14, 8, 90}, + { 19, 17, 17, 17, 15, 10, 87} + }, + { + { 29, 12, 13, 53, 18, 11, 15}, + { 56, 12, 13, 24, 32, 7, 16}, + { 54, 32, 12, 12, 33, 11, 15}, + { 21, 61, 15, 8, 27, 18, 13}, + { 5, 54, 36, 8, 20, 22, 13}, + { 10, 21, 64, 9, 15, 24, 16}, + { 16, 2, 75, 11, 11, 23, 23}, + { 16, 2, 68, 12, 9, 19, 33}, + { 29, 16, 14, 41, 56, 2, 17}, + { 28, 29, 15, 18, 56, 10, 15}, + { 14, 42, 21, 7, 42, 24, 13}, + { 6, 36, 37, 7, 25, 35, 14}, + { 11, 14, 55, 10, 13, 37, 21}, + { 16, 0, 61, 11, 8, 33, 31}, + { 16, 0, 56, 11, 6, 26, 43}, + { 15, 6, 46, 12, 7, 20, 53}, + { 13, 22, 16, 9, 73, 17, 14}, + { 8, 28, 22, 9, 46, 40, 11}, + { 6, 23, 32, 11, 21, 53, 15}, + { 11, 9, 43, 13, 9, 50, 27}, + { 16, 0, 46, 14, 4, 40, 42}, + { 17, 2, 41, 14, 5, 28, 55}, + { 15, 7, 34, 13, 6, 20, 65}, + { 14, 11, 29, 12, 9, 15, 69}, + { 10, 18, 19, 8, 35, 62, 9}, + { 9, 16, 25, 15, 13, 67, 18}, + { 12, 9, 32, 17, 4, 54, 36}, + { 15, 5, 34, 17, 4, 36, 53}, + { 16, 4, 31, 15, 7, 22, 67}, + { 16, 8, 27, 15, 9, 14, 75}, + { 14, 12, 22, 13, 10, 11, 79}, + { 12, 14, 21, 12, 12, 10, 78}, + { 12, 13, 19, 16, 6, 73, 23}, + { 13, 10, 24, 17, 4, 49, 46}, + { 14, 7, 26, 17, 7, 27, 65}, + { 15, 7, 25, 16, 10, 13, 77}, + { 15, 9, 22, 15, 12, 7, 83}, + { 14, 12, 19, 14, 13, 5, 85}, + { 13, 14, 18, 13, 13, 5, 85}, + { 12, 14, 18, 12, 14, 7, 82}, + { 14, 12, 19, 16, 7, 40, 56}, + { 14, 9, 22, 15, 11, 17, 74}, + { 14, 8, 22, 15, 13, 6, 84}, + { 14, 9, 21, 15, 14, 2, 87}, + { 13, 11, 18, 14, 14, 2, 88}, + { 13, 13, 17, 14, 15, 2, 88}, + { 12, 14, 16, 13, 16, 3, 87}, + { 12, 15, 16, 12, 15, 6, 83}, + { 14, 11, 18, 14, 14, 10, 81}, + { 14, 9, 19, 14, 15, 3, 87}, + { 14, 10, 19, 14, 15, 0, 90}, + { 13, 11, 17, 14, 15, 0, 90}, + { 13, 12, 17, 14, 15, 1, 89}, + { 13, 14, 16, 13, 16, 2, 88}, + { 12, 15, 15, 12, 16, 3, 86}, + { 12, 14, 16, 13, 16, 6, 83}, + { 13, 11, 17, 14, 16, 2, 87}, + { 14, 11, 18, 14, 16, 1, 88}, + { 14, 11, 17, 14, 15, 2, 88}, + { 13, 13, 17, 14, 15, 3, 87}, + { 13, 13, 17, 14, 15, 3, 86}, + { 13, 14, 16, 13, 16, 4, 85}, + { 12, 15, 16, 12, 16, 5, 83}, + { 12, 15, 16, 13, 15, 7, 81} + }, + { + { 11, 11, 11, 53, 9, 12, 11}, + { 18, 10, 10, 32, 13, 10, 11}, + { 40, 8, 11, 20, 13, 11, 11}, + { 56, 14, 9, 16, 13, 12, 10}, + { 47, 37, 6, 13, 12, 12, 10}, + { 23, 60, 10, 12, 13, 11, 11}, + { 10, 50, 33, 11, 12, 11, 11}, + { 13, 15, 66, 12, 12, 11, 11}, + { 15, 11, 11, 62, 26, 9, 10}, + { 13, 12, 10, 57, 17, 11, 10}, + { 17, 12, 10, 43, 15, 11, 10}, + { 34, 8, 11, 30, 14, 12, 10}, + { 49, 12, 10, 22, 13, 12, 10}, + { 45, 31, 7, 17, 13, 12, 10}, + { 28, 49, 11, 14, 13, 12, 10}, + { 17, 41, 31, 13, 13, 11, 10}, + { 14, 11, 11, 33, 59, 6, 11}, + { 13, 11, 11, 48, 42, 8, 10}, + { 12, 12, 11, 51, 30, 9, 10}, + { 18, 10, 11, 45, 22, 11, 10}, + { 31, 8, 11, 37, 17, 12, 10}, + { 41, 13, 10, 28, 15, 12, 9}, + { 39, 27, 8, 21, 15, 12, 10}, + { 29, 36, 15, 17, 15, 12, 10}, + { 11, 12, 11, 11, 63, 26, 8}, + { 12, 11, 11, 23, 61, 16, 10}, + { 12, 12, 11, 34, 53, 10, 10}, + { 13, 12, 11, 41, 43, 9, 10}, + { 18, 10, 11, 42, 33, 9, 10}, + { 27, 9, 11, 37, 26, 10, 10}, + { 33, 14, 10, 31, 22, 11, 10}, + { 32, 23, 12, 24, 20, 11, 10}, + { 11, 12, 11, 10, 32, 61, 5}, + { 11, 12, 11, 11, 49, 42, 6}, + { 11, 12, 11, 17, 58, 26, 8}, + { 11, 12, 11, 25, 58, 16, 9}, + { 13, 11, 11, 32, 52, 11, 9}, + { 17, 10, 11, 35, 43, 10, 10}, + { 23, 11, 10, 34, 35, 10, 10}, + { 27, 15, 11, 29, 29, 12, 9}, + { 11, 12, 11, 13, 9, 70, 17}, + { 11, 11, 11, 12, 23, 65, 10}, + { 11, 12, 10, 12, 38, 53, 6}, + { 11, 12, 10, 14, 51, 37, 7}, + { 11, 11, 11, 20, 56, 24, 8}, + { 12, 11, 11, 26, 53, 17, 9}, + { 16, 11, 11, 30, 47, 14, 9}, + { 20, 13, 11, 30, 38, 14, 10}, + { 11, 11, 10, 13, 9, 35, 52}, + { 11, 11, 10, 13, 11, 51, 34}, + { 11, 11, 10, 13, 20, 57, 20}, + { 11, 12, 10, 13, 33, 52, 12}, + { 10, 12, 10, 14, 44, 42, 10}, + { 11, 12, 10, 18, 49, 32, 9}, + { 12, 12, 11, 23, 49, 24, 10}, + { 15, 13, 11, 25, 43, 21, 10}, + { 11, 11, 11, 11, 16, 0, 81}, + { 10, 12, 10, 12, 16, 16, 65}, + { 11, 11, 10, 13, 17, 33, 47}, + { 11, 12, 10, 13, 22, 43, 31}, + { 11, 12, 10, 13, 29, 45, 21}, + { 11, 12, 11, 15, 37, 40, 16}, + { 11, 12, 11, 18, 41, 34, 14}, + { 12, 13, 12, 20, 40, 28, 14} + } +}; + +ALIGN_DATA(MEMORY_ALIGN_DEF_SIZE, const uint8_t mipOffsetMatrix16x16[6]) = +{ 15, 19, 46, 16, 14, 11}; + +ALIGN_DATA(MEMORY_ALIGN_DEF_SIZE, const uint8_t mipShiftMatrix16x16[6]) = +{ 6, 7, 5, 6, 6, 6}; diff --git a/source/Lib/CommonLib/MotionInfo.h b/source/Lib/CommonLib/MotionInfo.h index 020323575ff4e0d2cb70e25c9e827457aa7a29cb..20059c29ce0e150cb974be3494775170ce0383b1 100644 --- a/source/Lib/CommonLib/MotionInfo.h +++ b/source/Lib/CommonLib/MotionInfo.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -103,14 +103,15 @@ struct MotionInfo bool isInter; bool isIBCmot; char interDir; + bool useAltHpelIf; uint16_t sliceIdx; Mv mv [ NUM_REF_PIC_LIST_01 ]; int16_t refIdx [ NUM_REF_PIC_LIST_01 ]; - uint8_t GBiIdx; + uint8_t BcwIdx; Mv bv; - MotionInfo() : isInter(false), isIBCmot(false), interDir(0), sliceIdx(0), refIdx{ NOT_VALID, NOT_VALID }, GBiIdx(0) { } + MotionInfo() : isInter(false), isIBCmot(false), interDir(0), useAltHpelIf(false), sliceIdx(0), refIdx{ NOT_VALID, NOT_VALID }, BcwIdx(0) { } // ensure that MotionInfo(0) produces '\x000....' bit pattern - needed to work with AreaBuf - don't use this constructor for anything else - MotionInfo(int i) : isInter(i != 0), isIBCmot(false), interDir(0), sliceIdx(0), refIdx{ 0, 0 }, GBiIdx(0) { CHECKD(i != 0, "The argument for this constructor has to be '0'"); } + MotionInfo(int i) : isInter(i != 0), isIBCmot(false), interDir(0), useAltHpelIf(false), sliceIdx(0), refIdx{ 0, 0 }, BcwIdx(0) { CHECKD(i != 0, "The argument for this constructor has to be '0'"); } bool operator==( const MotionInfo& mi ) const { @@ -143,7 +144,7 @@ struct MotionInfo } }; -class GBiMotionParam +class BcwMotionParam { bool m_readOnly[2][33]; // 2 RefLists, 33 RefFrams Mv m_mv[2][33]; @@ -221,7 +222,10 @@ struct LutMotionCand { static_vector<MotionInfo, MAX_NUM_HMVP_CANDS> lut; static_vector<MotionInfo, MAX_NUM_HMVP_CANDS> lutIbc; - static_vector<MotionInfo, MAX_NUM_HMVP_CANDS> lutShare; - static_vector<MotionInfo, MAX_NUM_HMVP_CANDS> lutShareIbc; +}; +struct PatentBvCand +{ + Mv m_bvCands[IBC_NUM_CANDIDATES]; + int currCnt; }; #endif // __MOTIONINFO__ diff --git a/source/Lib/CommonLib/Mv.cpp b/source/Lib/CommonLib/Mv.cpp index 732e756b5ff21ed995286ef3344c52670bdc348a..386d0874680f79432ae174c44271c0a8cc11491b 100644 --- a/source/Lib/CommonLib/Mv.cpp +++ b/source/Lib/CommonLib/Mv.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -40,45 +40,64 @@ #include "Common.h" #include "Slice.h" -const MvPrecision Mv::m_amvrPrecision[3] = { MV_PRECISION_QUARTER, MV_PRECISION_INT, MV_PRECISION_4PEL }; // for cu.imv=0, 1 and 2 +const MvPrecision Mv::m_amvrPrecision[4] = { MV_PRECISION_QUARTER, MV_PRECISION_INT, MV_PRECISION_4PEL, MV_PRECISION_HALF }; // for cu.imv=0, 1, 2 and 3 +const MvPrecision Mv::m_amvrPrecAffine[3] = { MV_PRECISION_QUARTER, MV_PRECISION_SIXTEENTH, MV_PRECISION_INT }; // for cu.imv=0, 1 and 2 +const MvPrecision Mv::m_amvrPrecIbc[3] = { MV_PRECISION_INT, MV_PRECISION_INT, MV_PRECISION_4PEL }; // for cu.imv=0, 1 and 2 void roundAffineMv( int& mvx, int& mvy, int nShift ) { const int nOffset = 1 << (nShift - 1); - mvx = mvx >= 0 ? (mvx + nOffset) >> nShift : -((-mvx + nOffset) >> nShift); - mvy = mvy >= 0 ? (mvy + nOffset) >> nShift : -((-mvy + nOffset) >> nShift); + mvx = (mvx + nOffset - (mvx >= 0)) >> nShift; + mvy = (mvy + nOffset - (mvy >= 0)) >> nShift; } -void clipMv( Mv& rcMv, const Position& pos, - const struct Size& size, - const SPS& sps ) +void clipMv( Mv& rcMv, const Position& pos, const struct Size& size, const SPS& sps, const PPS& pps ) { + if (sps.getWrapAroundEnabledFlag()) + { + wrapClipMv(rcMv, pos, size, &sps, &pps); + return; + } + int iMvShift = MV_FRACTIONAL_BITS_INTERNAL; int iOffset = 8; - int iHorMax = ( sps.getPicWidthInLumaSamples() + iOffset - ( int ) pos.x - 1 ) << iMvShift; + int iHorMax = ( pps.getPicWidthInLumaSamples() + iOffset - (int)pos.x - 1 ) << iMvShift; int iHorMin = ( -( int ) sps.getMaxCUWidth() - iOffset - ( int ) pos.x + 1 ) << iMvShift; - int iVerMax = ( sps.getPicHeightInLumaSamples() + iOffset - ( int ) pos.y - 1 ) << iMvShift; + int iVerMax = ( pps.getPicHeightInLumaSamples() + iOffset - (int)pos.y - 1 ) << iMvShift; int iVerMin = ( -( int ) sps.getMaxCUHeight() - iOffset - ( int ) pos.y + 1 ) << iMvShift; - if( sps.getWrapAroundEnabledFlag() ) + rcMv.setHor( std::min( iHorMax, std::max( iHorMin, rcMv.getHor() ) ) ); + rcMv.setVer( std::min( iVerMax, std::max( iVerMin, rcMv.getVer() ) ) ); +} + +bool wrapClipMv( Mv& rcMv, const Position& pos, const struct Size& size, const SPS *sps, const PPS *pps ) +{ + bool wrapRef = true; + int iMvShift = MV_FRACTIONAL_BITS_INTERNAL; + int iOffset = 8; + int iHorMax = ( pps->getPicWidthInLumaSamples() + sps->getMaxCUWidth() - size.width + iOffset - (int)pos.x - 1 ) << iMvShift; + int iHorMin = ( -( int ) sps->getMaxCUWidth() - iOffset - ( int ) pos.x + 1 ) << iMvShift; + int iVerMax = ( pps->getPicHeightInLumaSamples() + iOffset - (int)pos.y - 1 ) << iMvShift; + int iVerMin = ( -( int ) sps->getMaxCUHeight() - iOffset - ( int ) pos.y + 1 ) << iMvShift; + int mvX = rcMv.getHor(); + + if(mvX > iHorMax) { - int iHorMax = ( sps.getPicWidthInLumaSamples() + sps.getMaxCUWidth() - size.width + iOffset - ( int ) pos.x - 1 ) << iMvShift; - int iHorMin = ( -( int ) sps.getMaxCUWidth() - iOffset - ( int ) pos.x + 1 ) << iMvShift; - int mvX = rcMv.getHor(); - while( mvX > iHorMax ) { - mvX -= ( sps.getWrapAroundOffset() << iMvShift ); - } - while( mvX < iHorMin ) { - mvX += ( sps.getWrapAroundOffset() << iMvShift ); - } - rcMv.setHor( mvX ); - rcMv.setVer( std::min( iVerMax, std::max( iVerMin, rcMv.getVer() ) ) ); - return; + mvX -= ( sps->getWrapAroundOffset() << iMvShift ); + mvX = std::min( iHorMax, std::max( iHorMin, mvX ) ); + wrapRef = false; + } + if(mvX < iHorMin) + { + mvX += ( sps->getWrapAroundOffset() << iMvShift ); + mvX = std::min( iHorMax, std::max( iHorMin, mvX ) ); + wrapRef = false; } - rcMv.setHor( std::min( iHorMax, std::max( iHorMin, rcMv.getHor() ) ) ); + rcMv.setHor( mvX ); rcMv.setVer( std::min( iVerMax, std::max( iVerMin, rcMv.getVer() ) ) ); + return wrapRef; } //! \} diff --git a/source/Lib/CommonLib/Mv.h b/source/Lib/CommonLib/Mv.h index 51d08d6822ae026c4a8f1725d8b66824cb6e2fdb..e06db56a94c67556308ff24f48bc10e77eb4f55c 100644 --- a/source/Lib/CommonLib/Mv.h +++ b/source/Lib/CommonLib/Mv.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -53,14 +53,20 @@ enum MvPrecision MV_PRECISION_INT = 2, // 1-pel, shift 2 bits from 4-pel MV_PRECISION_HALF = 3, // 1/2-pel MV_PRECISION_QUARTER = 4, // 1/4-pel (the precision of regular MV difference signaling), shift 4 bits from 4-pel - MV_PRECISION_INTERNAL = 6, // 1/16-pel (the precision of internal MV), shift 6 bits from 4-pel + MV_PRECISION_SIXTEENTH = 6, // 1/16-pel (the precision of internal MV), shift 6 bits from 4-pel + MV_PRECISION_INTERNAL = 2 + MV_FRACTIONAL_BITS_INTERNAL, }; /// basic motion vector class class Mv { private: - static const MvPrecision m_amvrPrecision[3]; + static const MvPrecision m_amvrPrecision[4]; + static const MvPrecision m_amvrPrecAffine[3]; + static const MvPrecision m_amvrPrecIbc[3]; + + static const int mvClipPeriod = (1 << MV_BITS); + static const int halMvClipPeriod = (1 << (MV_BITS - 1)); public: int hor; ///< horizontal component of motion vector @@ -121,13 +127,12 @@ public: //! shift right with rounding void divideByPowerOf2 (const int i) { -#if ME_ENABLE_ROUNDING_OF_MVS - const int offset = (i == 0) ? 0 : 1 << (i - 1); - hor += offset; - ver += offset; -#endif - hor >>= i; - ver >>= i; + if (i != 0) + { + const int offset = (1 << (i - 1)); + hor = (hor + offset - (hor >= 0)) >> i; + ver = (ver + offset - (ver >= 0)) >> i; + } } const Mv& operator<<= (const int i) @@ -139,8 +144,12 @@ public: const Mv& operator>>= ( const int i ) { - hor >>= i; - ver >>= i; + if (i != 0) + { + const int offset = (1 << (i - 1)); + hor = (hor + offset - (hor >= 0)) >> i; + ver = (ver + offset - (ver >= 0)) >> i; + } return *this; } @@ -166,8 +175,8 @@ public: const Mv scaleMv( int iScale ) const { - const int mvx = Clip3( -131072, 131071, (iScale * getHor() + 127 + (iScale * getHor() < 0)) >> 8 ); - const int mvy = Clip3( -131072, 131071, (iScale * getVer() + 127 + (iScale * getVer() < 0)) >> 8 ); + const int mvx = Clip3(MV_MIN, MV_MAX, (iScale * getHor() + 128 - (iScale * getHor() >= 0)) >> 8); + const int mvy = Clip3(MV_MIN, MV_MAX, (iScale * getVer() + 128 - (iScale * getVer() >= 0)) >> 8); return Mv( mvx, mvy ); } @@ -182,27 +191,66 @@ public: { const int rightShift = -shift; const int nOffset = 1 << (rightShift - 1); - hor = hor >= 0 ? (hor + nOffset) >> rightShift : -((-hor + nOffset) >> rightShift); - ver = ver >= 0 ? (ver + nOffset) >> rightShift : -((-ver + nOffset) >> rightShift); + hor = hor >= 0 ? (hor + nOffset - 1) >> rightShift : (hor + nOffset) >> rightShift; + ver = ver >= 0 ? (ver + nOffset - 1) >> rightShift : (ver + nOffset) >> rightShift; } } - void changePrecisionAmvr(const int amvr, const MvPrecision& dst) - { - changePrecision(m_amvrPrecision[amvr], dst); - } - void roundToPrecision(const MvPrecision& src, const MvPrecision& dst) { changePrecision(src, dst); changePrecision(dst, src); } - void roundToAmvrSignalPrecision(const MvPrecision& src, const int amvr) + // translational MV + void changeTransPrecInternal2Amvr(const int amvr) + { + changePrecision(MV_PRECISION_INTERNAL, m_amvrPrecision[amvr]); + } + + void changeTransPrecAmvr2Internal(const int amvr) + { + changePrecision(m_amvrPrecision[amvr], MV_PRECISION_INTERNAL); + } + + void roundTransPrecInternal2Amvr(const int amvr) { - roundToPrecision(src, m_amvrPrecision[amvr]); + roundToPrecision(MV_PRECISION_INTERNAL, m_amvrPrecision[amvr]); } + // affine MV + void changeAffinePrecInternal2Amvr(const int amvr) + { + changePrecision(MV_PRECISION_INTERNAL, m_amvrPrecAffine[amvr]); + } + + void changeAffinePrecAmvr2Internal(const int amvr) + { + changePrecision(m_amvrPrecAffine[amvr], MV_PRECISION_INTERNAL); + } + + void roundAffinePrecInternal2Amvr(const int amvr) + { + roundToPrecision(MV_PRECISION_INTERNAL, m_amvrPrecAffine[amvr]); + } + + // IBC block vector + void changeIbcPrecInternal2Amvr(const int amvr) + { + changePrecision(MV_PRECISION_INTERNAL, m_amvrPrecIbc[amvr]); + } + + void changeIbcPrecAmvr2Internal(const int amvr) + { + changePrecision(m_amvrPrecIbc[amvr], MV_PRECISION_INTERNAL); + } + + void roundIbcPrecInternal2Amvr(const int amvr) + { + roundToPrecision(MV_PRECISION_INTERNAL, m_amvrPrecIbc[amvr]); + } + + Mv getSymmvdMv(const Mv& curMvPred, const Mv& tarMvPred) { return Mv(tarMvPred.hor - hor + curMvPred.hor, tarMvPred.ver - ver + curMvPred.ver); @@ -213,6 +261,13 @@ public: hor = Clip3( -(1 << 17), (1 << 17) - 1, hor ); ver = Clip3( -(1 << 17), (1 << 17) - 1, ver ); } + void mvCliptoStorageBitDepth() // periodic clipping + { + hor = (hor + mvClipPeriod) & (mvClipPeriod - 1); + hor = (hor >= halMvClipPeriod) ? (hor - mvClipPeriod) : hor; + ver = (ver + mvClipPeriod) & (mvClipPeriod - 1); + ver = (ver >= halMvClipPeriod) ? (ver - mvClipPeriod) : ver; + } };// END CLASS DEFINITION MV namespace std @@ -228,7 +283,15 @@ namespace std }; void clipMv ( Mv& rcMv, const struct Position& pos, const struct Size& size, - const class SPS& sps ); + const class SPS& sps + , const class PPS& pps +); + +bool wrapClipMv( Mv& rcMv, const Position& pos, + const struct Size& size, + const SPS *sps + , const PPS* pps +); void roundAffineMv( int& mvx, int& mvy, int nShift ); diff --git a/source/Lib/CommonLib/NAL.h b/source/Lib/CommonLib/NAL.h index 57f98f7812e751f783c99c28c97d7bfbebfa5374..9e167bc790e85c3c2fc1cf49af324a887f54442c 100644 --- a/source/Lib/CommonLib/NAL.h +++ b/source/Lib/CommonLib/NAL.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -50,20 +50,32 @@ struct NALUnit NalUnitType m_nalUnitType; ///< nal_unit_type uint32_t m_temporalId; ///< temporal_id uint32_t m_nuhLayerId; ///< nuh_layer_id + uint32_t m_forbiddenZeroBit; + uint32_t m_nuhReservedZeroBit; NALUnit(const NALUnit &src) :m_nalUnitType (src.m_nalUnitType) ,m_temporalId (src.m_temporalId) ,m_nuhLayerId (src.m_nuhLayerId) + , m_forbiddenZeroBit(src.m_forbiddenZeroBit) + , m_nuhReservedZeroBit(src.m_nuhReservedZeroBit) { } /** construct an NALunit structure with given header values. */ NALUnit( NalUnitType nalUnitType, int temporalId = 0, + uint32_t nuhReservedZeroBit = 0, + uint32_t forbiddenZeroBit = 0, int nuhLayerId = 0) :m_nalUnitType (nalUnitType) ,m_temporalId (temporalId) ,m_nuhLayerId (nuhLayerId) +#if JVET_O0179_PROPOSALB + , m_forbiddenZeroBit(forbiddenZeroBit) + , m_nuhReservedZeroBit(nuhReservedZeroBit) +#endif + + {} /** default constructor - no initialization; must be performed by user */ @@ -74,32 +86,14 @@ struct NALUnit /** returns true if the NALunit is a slice NALunit */ bool isSlice() { -#if JVET_M0101_HLS return m_nalUnitType == NAL_UNIT_CODED_SLICE_TRAIL || m_nalUnitType == NAL_UNIT_CODED_SLICE_STSA || m_nalUnitType == NAL_UNIT_CODED_SLICE_IDR_W_RADL || m_nalUnitType == NAL_UNIT_CODED_SLICE_IDR_N_LP || m_nalUnitType == NAL_UNIT_CODED_SLICE_CRA + || m_nalUnitType == NAL_UNIT_CODED_SLICE_GDR || m_nalUnitType == NAL_UNIT_CODED_SLICE_RADL || m_nalUnitType == NAL_UNIT_CODED_SLICE_RASL; -#else - return m_nalUnitType == NAL_UNIT_CODED_SLICE_TRAIL_R - || m_nalUnitType == NAL_UNIT_CODED_SLICE_TRAIL_N - || m_nalUnitType == NAL_UNIT_CODED_SLICE_TSA_R - || m_nalUnitType == NAL_UNIT_CODED_SLICE_TSA_N - || m_nalUnitType == NAL_UNIT_CODED_SLICE_STSA_R - || m_nalUnitType == NAL_UNIT_CODED_SLICE_STSA_N - || m_nalUnitType == NAL_UNIT_CODED_SLICE_BLA_W_LP - || m_nalUnitType == NAL_UNIT_CODED_SLICE_BLA_W_RADL - || m_nalUnitType == NAL_UNIT_CODED_SLICE_BLA_N_LP - || m_nalUnitType == NAL_UNIT_CODED_SLICE_IDR_W_RADL - || m_nalUnitType == NAL_UNIT_CODED_SLICE_IDR_N_LP - || m_nalUnitType == NAL_UNIT_CODED_SLICE_CRA - || m_nalUnitType == NAL_UNIT_CODED_SLICE_RADL_N - || m_nalUnitType == NAL_UNIT_CODED_SLICE_RADL_R - || m_nalUnitType == NAL_UNIT_CODED_SLICE_RASL_N - || m_nalUnitType == NAL_UNIT_CODED_SLICE_RASL_R; -#endif } bool isSei() { @@ -109,7 +103,14 @@ struct NALUnit bool isVcl() { - return ( (uint32_t)m_nalUnitType < 32 ); + return m_nalUnitType == NAL_UNIT_CODED_SLICE_TRAIL + || m_nalUnitType == NAL_UNIT_CODED_SLICE_STSA + || m_nalUnitType == NAL_UNIT_CODED_SLICE_RADL + || m_nalUnitType == NAL_UNIT_CODED_SLICE_RASL + || m_nalUnitType == NAL_UNIT_CODED_SLICE_IDR_W_RADL + || m_nalUnitType == NAL_UNIT_CODED_SLICE_IDR_N_LP + || m_nalUnitType == NAL_UNIT_CODED_SLICE_CRA + || m_nalUnitType == NAL_UNIT_CODED_SLICE_GDR; } }; @@ -148,6 +149,7 @@ struct NALUnitEBSP : public NALUnit class AccessUnit : public std::list<NALUnitEBSP*> // NOTE: Should not inherit from STL. { public: + int temporalId; ~AccessUnit() { for (AccessUnit::iterator it = this->begin(); it != this->end(); it++) diff --git a/source/Lib/CommonLib/PicYuvMD5.cpp b/source/Lib/CommonLib/PicYuvMD5.cpp index 323b3d888b6c6cada8bfc0479a781f6a32a08d62..febcddaa654c5577514d2f4fb339d8981e30a4c0 100644 --- a/source/Lib/CommonLib/PicYuvMD5.cpp +++ b/source/Lib/CommonLib/PicYuvMD5.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/source/Lib/CommonLib/Picture.cpp b/source/Lib/CommonLib/Picture.cpp index 3736daac0c35d20f22b3ef186b867dce329e2888..387ec60c54ce03b303447138e5c0345850268eed 100644 --- a/source/Lib/CommonLib/Picture.cpp +++ b/source/Lib/CommonLib/Picture.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * -* Copyright (c) 2010-2019, ITU/ISO/IEC +* Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -38,93 +38,10 @@ #include "Picture.h" #include "SEI.h" #include "ChromaFormat.h" -#if ENABLE_WPP_PARALLELISM -#if ENABLE_WPP_STATIC_LINK -#include <atomic> -#else -#include <condition_variable> -#endif -#endif - - -#if ENABLE_WPP_PARALLELISM || ENABLE_SPLIT_PARALLELISM -#if ENABLE_WPP_PARALLELISM -#if ENABLE_WPP_STATIC_LINK -class SyncObj -{ -public: - SyncObj() : m_Val(-1) {} - ~SyncObj() {} - - void reset() - { - m_Val = -1; - } - - bool isReady( int64_t val ) const - { -// std::cout << "is ready m_Val " << m_Val << " val " << val << std::endl; - return m_Val >= val; - } +#include "CommonLib/InterpolationFilter.h" - void wait( int64_t idx, int ctuPosY ) - { - while( ! isReady( idx ) ) - { - } - } - - void set( int64_t val, int ctuPosY) - { - m_Val = val; - } - -private: - std::atomic<int> m_Val; -}; -#else -class SyncObj -{ -public: - SyncObj() : m_Val(-1) {} - ~SyncObj() {} - void reset() - { - std::unique_lock< std::mutex > lock( m_mutex ); - - m_Val = -1; - } - - bool isReady( int64_t val ) const - { - return m_Val >= val; - } - - void wait( int64_t idx, int ctuPosY ) - { - std::unique_lock< std::mutex > lock( m_mutex ); - - while( ! isReady( idx ) ) - { - m_cv.wait( lock ); - } - } - - void set( int64_t val, int ctuPosY) - { - std::unique_lock< std::mutex > lock( m_mutex ); - m_Val = val; - m_cv.notify_all(); - } - -private: - int64_t m_Val; - std::condition_variable m_cv; - std::mutex m_mutex; -}; -#endif -#endif +#if ENABLE_SPLIT_PARALLELISM int g_wppThreadId( 0 ); #pragma omp threadprivate(g_wppThreadId) @@ -138,13 +55,6 @@ int g_splitJobId( 0 ); #endif Scheduler::Scheduler() : -#if ENABLE_WPP_PARALLELISM - m_numWppThreads( 1 ), - m_numWppDataInstances( 1 ) -#endif -#if ENABLE_SPLIT_PARALLELISM && ENABLE_WPP_PARALLELISM - , -#endif #if ENABLE_SPLIT_PARALLELISM m_numSplitThreads( 1 ) #endif @@ -153,13 +63,6 @@ Scheduler::Scheduler() : Scheduler::~Scheduler() { -#if ENABLE_WPP_PARALLELISM - for( auto & so : m_SyncObjs ) - { - delete so; - } - m_SyncObjs.clear(); -#endif } #if ENABLE_SPLIT_PARALLELISM @@ -227,37 +130,6 @@ void Scheduler::setSplitThreadId( const int tId ) #endif -#if ENABLE_WPP_PARALLELISM -unsigned Scheduler::getWppDataId( int lID ) const -{ - const int tId = lID == CURR_THREAD_ID ? g_wppThreadId : lID; - -#if ENABLE_SPLIT_PARALLELISM - if( m_numSplitThreads > 1 ) - { - return tId * NUM_RESERVERD_SPLIT_JOBS; - } - else - { - return tId; - } -#else - return tId; -#endif -} - -unsigned Scheduler::getWppThreadId() const -{ - return g_wppThreadId; -} - -void Scheduler::setWppThreadId( const int tId ) -{ - g_wppThreadId = tId == CURR_THREAD_ID ? omp_get_thread_num() : tId; - - CHECK( g_wppThreadId >= PARL_WPP_MAX_NUM_THREADS, "The WPP thread ID " << g_wppThreadId << " is invalid!" ); -} -#endif unsigned Scheduler::getDataId() const { @@ -266,12 +138,6 @@ unsigned Scheduler::getDataId() const { return getSplitDataId(); } -#endif -#if ENABLE_WPP_PARALLELISM - if( m_numWppThreads > 1 ) - { - return getWppDataId(); - } #endif return 0; } @@ -281,44 +147,6 @@ bool Scheduler::init( const int ctuYsize, const int ctuXsize, const int numWppTh #if ENABLE_SPLIT_PARALLELISM m_numSplitThreads = numSplitThreads; #endif -#if ENABLE_WPP_PARALLELISM - m_firstNonFinishedLine = 0; - m_numWppThreadsRunning = 1; - m_numWppDataInstances = numWppThreadsRunning+numWppExtraLines; - m_numWppThreads = numWppThreadsRunning; - m_ctuYsize = ctuYsize; - m_ctuXsize = ctuXsize; - - if( m_SyncObjs.size() == 0 ) - { - m_SyncObjs.reserve( ctuYsize ); - for( int i = (int)m_SyncObjs.size(); i < ctuYsize; i++ ) - { - m_SyncObjs.push_back( new SyncObj ); - } - } - else - { - CHECK( m_SyncObjs.size() != ctuYsize, ""); - } - - for( int i = 0; i < ctuYsize; i++ ) - { - m_SyncObjs[i]->reset(); - } - - if( m_numWppThreads != m_numWppDataInstances ) - { - m_LineDone.clear(); - m_LineDone.resize(ctuYsize, -1); - - m_LineProc.clear(); - m_LineProc.resize(ctuYsize, false); - - m_SyncObjs[0]->set(0,0); - m_LineProc[0]=true; - } -#endif return true; } @@ -328,107 +156,11 @@ int Scheduler::getNumPicInstances() const { #if !ENABLE_SPLIT_PARALLELISM return 1; -#elif !ENABLE_WPP_PARALLELISM - return ( m_numSplitThreads > 1 ? m_numSplitThreads : 1 ); #else - return m_numSplitThreads > 1 ? m_numWppDataInstances * m_numSplitThreads : 1; + return ( m_numSplitThreads > 1 ? m_numSplitThreads : 1 ); #endif } -#if ENABLE_WPP_PARALLELISM -void Scheduler::wait( const int ctuPosX, const int ctuPosY ) -{ - if( m_numWppThreads == m_numWppDataInstances ) - { - if( ctuPosY > 0 && ctuPosX+1 < m_ctuXsize) - { - m_SyncObjs[ctuPosY-1]->wait( ctuPosX+1, ctuPosY-1 ); - } - return; - } - - m_SyncObjs[ctuPosY]->wait( ctuPosX, ctuPosY ); -} - -void Scheduler::setReady(const int ctuPosX, const int ctuPosY) -{ - if( m_numWppThreads == m_numWppDataInstances ) - { - m_SyncObjs[ctuPosY]->set( ctuPosX, ctuPosY); - return; - } - - std::unique_lock< std::mutex > lock( m_mutex ); - - if( ctuPosX+1 == m_ctuXsize ) - { - m_LineProc[ctuPosY] = true; //prevent line from be further evaluated - m_LineDone[ctuPosY] = std::numeric_limits<int>::max(); - m_firstNonFinishedLine = ctuPosY+1; - } - else - { - m_LineDone[ctuPosY] = ctuPosX; - m_LineProc[ctuPosY] = false; // mark currently not processed - } - - int lastLine = m_firstNonFinishedLine + m_numWppDataInstances; - lastLine = std::min( m_ctuYsize, lastLine )-1-m_firstNonFinishedLine; - - m_numWppThreadsRunning--; - - Position pos; - //if the current encoder is the last - const bool c1 = (ctuPosY == m_firstNonFinishedLine + m_numWppThreads - 1); - const bool c2 = (ctuPosY+1 <= m_firstNonFinishedLine+lastLine); - const bool c3 = (ctuPosX >= m_ctuXsize/4); - if( c1 && c2 && c3 && getNextCtu( pos, ctuPosY+1, 4 ) ) - { - // try to continue in the next row - // go on in the current line - m_SyncObjs[pos.y]->set(pos.x, pos.y); - m_numWppThreadsRunning++; - } - else if( getNextCtu( pos, ctuPosY, 1 ) ) - { - // try to continue in the same row - // go on in the current line - m_SyncObjs[pos.y]->set(pos.x, pos.y); - m_numWppThreadsRunning++; - } - for( int i = m_numWppThreadsRunning; i < m_numWppThreads; i++ ) - { - // just go and get a job - for( int y = 0; y <= lastLine; y++ ) - { - if( getNextCtu( pos, m_firstNonFinishedLine+y, 1 )) - { - m_SyncObjs[pos.y]->set(pos.x, pos.y); - m_numWppThreadsRunning++; - break; - } - } - } -} - - -bool Scheduler::getNextCtu( Position& pos, int ctuLine, int offset) -{ - int x = m_LineDone[ctuLine] + 1; - if( ! m_LineProc[ctuLine] ) - { - int maxXOffset = x+offset >= m_ctuXsize ? m_ctuXsize-1 : x+offset; - if( (ctuLine == 0 || m_LineDone[ctuLine-1]>=maxXOffset) && (x==0 || m_LineDone[ctuLine]>=+x-1)) - { - m_LineProc[ctuLine] = true; - pos.x = x; pos.y = ctuLine; - return true; - } - } - return false; -} - -#endif #endif @@ -436,288 +168,10 @@ bool Scheduler::getNextCtu( Position& pos, int ctuLine, int offset) // picture methods // --------------------------------------------------------------------------- -#if HEVC_TILES_WPP - -Tile::Tile() -: m_tileWidthInCtus (0) -, m_tileHeightInCtus (0) -, m_rightEdgePosInCtus (0) -, m_bottomEdgePosInCtus (0) -, m_firstCtuRsAddr (0) -{ -} - -Tile::~Tile() -{ -} - - -TileMap::TileMap() - : pcv(nullptr) - , tiles(0) - , numTiles(0) - , numTileColumns(0) - , numTileRows(0) - , tileIdxMap(nullptr) - , ctuTsToRsAddrMap(nullptr) - , ctuRsToTsAddrMap(nullptr) -{ -} - -void TileMap::create( const SPS& sps, const PPS& pps ) -{ - pcv = pps.pcv; - - numTileColumns = pps.getNumTileColumnsMinus1() + 1; - numTileRows = pps.getNumTileRowsMinus1() + 1; - numTiles = numTileColumns * numTileRows; - tiles.resize( numTiles ); - - const uint32_t numCtusInFrame = pcv->sizeInCtus; - tileIdxMap = new uint32_t[numCtusInFrame]; - ctuTsToRsAddrMap = new uint32_t[numCtusInFrame+1]; - ctuRsToTsAddrMap = new uint32_t[numCtusInFrame+1]; - - initTileMap( sps, pps ); - initCtuTsRsAddrMap(); -} - -void TileMap::destroy() -{ - tiles.clear(); - - if ( tileIdxMap ) - { - delete[] tileIdxMap; - tileIdxMap = nullptr; - } - - if ( ctuTsToRsAddrMap ) - { - delete[] ctuTsToRsAddrMap; - ctuTsToRsAddrMap = nullptr; - } - - if ( ctuRsToTsAddrMap ) - { - delete[] ctuRsToTsAddrMap; - ctuRsToTsAddrMap = nullptr; - } -} - -void TileMap::initTileMap( const SPS& sps, const PPS& pps ) -{ - const uint32_t frameWidthInCtus = pcv->widthInCtus; - const uint32_t frameHeightInCtus = pcv->heightInCtus; - - if( pps.getTileUniformSpacingFlag() ) - { - //set width and height for each (uniform) tile - for(int row=0; row < numTileRows; row++) - { - for(int col=0; col < numTileColumns; col++) - { - const int tileIdx = row * numTileColumns + col; - tiles[tileIdx].setTileWidthInCtus( (col+1)*frameWidthInCtus/numTileColumns - (col*frameWidthInCtus)/numTileColumns ); - tiles[tileIdx].setTileHeightInCtus( (row+1)*frameHeightInCtus/numTileRows - (row*frameHeightInCtus)/numTileRows ); - } - } - } - else - { - //set the width for each tile - for(int row=0; row < numTileRows; row++) - { - int cumulativeTileWidth = 0; - for(int col=0; col < numTileColumns - 1; col++) - { - tiles[row * numTileColumns + col].setTileWidthInCtus( pps.getTileColumnWidth(col) ); - cumulativeTileWidth += pps.getTileColumnWidth(col); - } - tiles[row * numTileColumns + numTileColumns - 1].setTileWidthInCtus( frameWidthInCtus-cumulativeTileWidth ); - } - - //set the height for each tile - for(int col=0; col < numTileColumns; col++) - { - int cumulativeTileHeight = 0; - for(int row=0; row < numTileRows - 1; row++) - { - tiles[row * numTileColumns + col].setTileHeightInCtus( pps.getTileRowHeight(row) ); - cumulativeTileHeight += pps.getTileRowHeight(row); - } - tiles[(numTileRows - 1) * numTileColumns + col].setTileHeightInCtus( frameHeightInCtus-cumulativeTileHeight ); - } - } - - // Tile size check - int minWidth = 1; - int minHeight = 1; -#if !JVET_M0101_HLS - const int profileIdc = sps.getPTL()->getGeneralPTL()->getProfileIdc(); -#else - const int profileIdc = sps.getProfileTierLevel()->getProfileIdc(); -#endif - if ( profileIdc == Profile::MAIN || profileIdc == Profile::MAIN10) - { - if (pps.getTilesEnabledFlag()) - { - minHeight = 64 / sps.getMaxCUHeight(); - minWidth = 256 / sps.getMaxCUWidth(); - } - } - for(int row=0; row < numTileRows; row++) - { - for(int col=0; col < numTileColumns; col++) - { - const int tileIdx = row * numTileColumns + col; - if(tiles[tileIdx].getTileWidthInCtus() < minWidth) { THROW("Invalid tile size"); } - if(tiles[tileIdx].getTileHeightInCtus() < minHeight) { THROW("Invalid tile size"); } - } - } - - //initialize each tile of the current picture - for( int row=0; row < numTileRows; row++ ) - { - for( int col=0; col < numTileColumns; col++ ) - { - const int tileIdx = row * numTileColumns + col; - - //initialize the RightEdgePosInCU for each tile - int rightEdgePosInCTU = 0; - for( int i=0; i <= col; i++ ) - { - rightEdgePosInCTU += tiles[row * numTileColumns + i].getTileWidthInCtus(); - } - tiles[tileIdx].setRightEdgePosInCtus(rightEdgePosInCTU-1); - - //initialize the BottomEdgePosInCU for each tile - int bottomEdgePosInCTU = 0; - for( int i=0; i <= row; i++ ) - { - bottomEdgePosInCTU += tiles[i * numTileColumns + col].getTileHeightInCtus(); - } - tiles[tileIdx].setBottomEdgePosInCtus(bottomEdgePosInCTU-1); - - //initialize the FirstCUAddr for each tile - tiles[tileIdx].setFirstCtuRsAddr( (tiles[tileIdx].getBottomEdgePosInCtus() - tiles[tileIdx].getTileHeightInCtus() + 1) * frameWidthInCtus + - tiles[tileIdx].getRightEdgePosInCtus() - tiles[tileIdx].getTileWidthInCtus() + 1); - } - } - - int columnIdx = 0; - int rowIdx = 0; - - //initialize the TileIdxMap - const uint32_t numCtusInFrame = pcv->sizeInCtus; - for( int i=0; i<numCtusInFrame; i++) - { - for( int col=0; col < numTileColumns; col++) - { - if(i % frameWidthInCtus <= tiles[col].getRightEdgePosInCtus()) - { - columnIdx = col; - break; - } - } - for(int row=0; row < numTileRows; row++) - { - if(i / frameWidthInCtus <= tiles[row*numTileColumns].getBottomEdgePosInCtus()) - { - rowIdx = row; - break; - } - } - tileIdxMap[i] = rowIdx * numTileColumns + columnIdx; - } -} - -void TileMap::initCtuTsRsAddrMap() -{ - //generate the Coding Order Map and Inverse Coding Order Map - const uint32_t numCtusInFrame = pcv->sizeInCtus; - for(int ctuTsAddr=0, ctuRsAddr=0; ctuTsAddr<numCtusInFrame; ctuTsAddr++, ctuRsAddr = calculateNextCtuRSAddr(ctuRsAddr)) - { - ctuTsToRsAddrMap[ctuTsAddr] = ctuRsAddr; - ctuRsToTsAddrMap[ctuRsAddr] = ctuTsAddr; - } - ctuTsToRsAddrMap[numCtusInFrame] = numCtusInFrame; - ctuRsToTsAddrMap[numCtusInFrame] = numCtusInFrame; -} - -uint32_t TileMap::calculateNextCtuRSAddr( const uint32_t currCtuRsAddr ) const -{ - const uint32_t frameWidthInCtus = pcv->widthInCtus; - uint32_t nextCtuRsAddr; - - //get the tile index for the current CTU - const uint32_t uiTileIdx = getTileIdxMap(currCtuRsAddr); - //get the raster scan address for the next CTU - if( currCtuRsAddr % frameWidthInCtus == tiles[uiTileIdx].getRightEdgePosInCtus() && currCtuRsAddr / frameWidthInCtus == tiles[uiTileIdx].getBottomEdgePosInCtus() ) - //the current CTU is the last CTU of the tile - { - if(uiTileIdx+1 == numTiles) - { - nextCtuRsAddr = pcv->sizeInCtus; - } - else - { - nextCtuRsAddr = tiles[uiTileIdx+1].getFirstCtuRsAddr(); - } - } - else //the current CTU is not the last CTU of the tile - { - if( currCtuRsAddr % frameWidthInCtus == tiles[uiTileIdx].getRightEdgePosInCtus() ) //the current CTU is on the rightmost edge of the tile - { - nextCtuRsAddr = currCtuRsAddr + frameWidthInCtus - tiles[uiTileIdx].getTileWidthInCtus() + 1; - } - else - { - nextCtuRsAddr = currCtuRsAddr + 1; - } - } - - return nextCtuRsAddr; -} - -uint32_t TileMap::getSubstreamForCtuAddr(const uint32_t ctuAddr, const bool bAddressInRaster, Slice *pcSlice) const -{ - const bool bWPPEnabled = pcSlice->getPPS()->getEntropyCodingSyncEnabledFlag(); - uint32_t subStrm; - - if( (bWPPEnabled && pcv->heightInCtus > 1) || (numTiles > 1) ) // wavefronts, and possibly tiles being used. - { - const uint32_t ctuRsAddr = bAddressInRaster ? ctuAddr : getCtuTsToRsAddrMap(ctuAddr); - const uint32_t tileIndex = getTileIdxMap(ctuRsAddr); - - if (bWPPEnabled) - { - const uint32_t firstCtuRsAddrOfTile = tiles[tileIndex].getFirstCtuRsAddr(); - const uint32_t tileYInCtus = firstCtuRsAddrOfTile / pcv->widthInCtus; - const uint32_t ctuLine = ctuRsAddr / pcv->widthInCtus; - const uint32_t startingSubstreamForTile = (tileYInCtus * numTileColumns) + (tiles[tileIndex].getTileHeightInCtus() * (tileIndex % numTileColumns)); - - subStrm = startingSubstreamForTile + (ctuLine - tileYInCtus); - } - else - { - subStrm = tileIndex; - } - } - else - { - subStrm = 0; - } - return subStrm; -} -#endif Picture::Picture() { -#if HEVC_TILES_WPP - tileMap = nullptr; -#endif cs = nullptr; m_bIsBorderExtended = false; usedByCurr = false; @@ -728,20 +182,24 @@ Picture::Picture() layer = std::numeric_limits<uint32_t>::max(); fieldPic = false; topField = false; + precedingDRAP = false; for( int i = 0; i < MAX_NUM_CHANNEL_TYPE; i++ ) { m_prevQP[i] = -1; } m_spliceIdx = NULL; m_ctuNums = 0; + layerId = NOT_VALID; } -void Picture::create(const ChromaFormat &_chromaFormat, const Size &size, const unsigned _maxCUSize, const unsigned _margin, const bool _decoder) +void Picture::create( const ChromaFormat &_chromaFormat, const Size &size, const unsigned _maxCUSize, const unsigned _margin, const bool _decoder, const int _layerId ) { + layerId = _layerId; UnitArea::operator=( UnitArea( _chromaFormat, Area( Position{ 0, 0 }, size ) ) ); - margin = _margin; + margin = MAX_SCALING_RATIO*_margin; const Area a = Area( Position(), size ); - M_BUFS( 0, PIC_RECONSTRUCTION ).create( _chromaFormat, a, _maxCUSize, _margin, MEMORY_ALIGN_DEF_SIZE ); + M_BUFS( 0, PIC_RECONSTRUCTION ).create( _chromaFormat, a, _maxCUSize, margin, MEMORY_ALIGN_DEF_SIZE ); + M_BUFS( 0, PIC_RECON_WRAP ).create( _chromaFormat, a, _maxCUSize, margin, MEMORY_ALIGN_DEF_SIZE ); if( !_decoder ) { @@ -758,11 +216,7 @@ void Picture::create(const ChromaFormat &_chromaFormat, const Size &size, const void Picture::destroy() { #if ENABLE_SPLIT_PARALLELISM -#if ENABLE_WPP_PARALLELISM - for( int jId = 0; jId < ( PARL_SPLIT_MAX_NUM_THREADS * PARL_WPP_MAX_NUM_THREADS ); jId++ ) -#else for( int jId = 0; jId < PARL_SPLIT_MAX_NUM_THREADS; jId++ ) -#endif #endif for (uint32_t t = 0; t < NUM_PIC_TYPES; t++) { @@ -788,14 +242,6 @@ void Picture::destroy() } SEIs.clear(); -#if HEVC_TILES_WPP - if ( tileMap ) - { - tileMap->destroy(); - delete tileMap; - tileMap = nullptr; - } -#endif if (m_spliceIdx) { delete[] m_spliceIdx; @@ -868,16 +314,16 @@ const CPelBuf Picture::getResiBuf(const CompArea &blk) const { return getBu PelUnitBuf Picture::getResiBuf(const UnitArea &unit) { return getBuf(unit, PIC_RESIDUAL); } const CPelUnitBuf Picture::getResiBuf(const UnitArea &unit) const { return getBuf(unit, PIC_RESIDUAL); } - PelBuf Picture::getRecoBuf(const ComponentID compID) { return getBuf(compID, PIC_RECONSTRUCTION); } -const CPelBuf Picture::getRecoBuf(const ComponentID compID) const { return getBuf(compID, PIC_RECONSTRUCTION); } - PelBuf Picture::getRecoBuf(const CompArea &blk) { return getBuf(blk, PIC_RECONSTRUCTION); } -const CPelBuf Picture::getRecoBuf(const CompArea &blk) const { return getBuf(blk, PIC_RECONSTRUCTION); } - PelUnitBuf Picture::getRecoBuf(const UnitArea &unit) { return getBuf(unit, PIC_RECONSTRUCTION); } -const CPelUnitBuf Picture::getRecoBuf(const UnitArea &unit) const { return getBuf(unit, PIC_RECONSTRUCTION); } - PelUnitBuf Picture::getRecoBuf() { return M_BUFS(scheduler.getSplitPicId(), PIC_RECONSTRUCTION); } -const CPelUnitBuf Picture::getRecoBuf() const { return M_BUFS(scheduler.getSplitPicId(), PIC_RECONSTRUCTION); } + PelBuf Picture::getRecoBuf(const ComponentID compID, bool wrap) { return getBuf(compID, wrap ? PIC_RECON_WRAP : PIC_RECONSTRUCTION); } +const CPelBuf Picture::getRecoBuf(const ComponentID compID, bool wrap) const { return getBuf(compID, wrap ? PIC_RECON_WRAP : PIC_RECONSTRUCTION); } + PelBuf Picture::getRecoBuf(const CompArea &blk, bool wrap) { return getBuf(blk, wrap ? PIC_RECON_WRAP : PIC_RECONSTRUCTION); } +const CPelBuf Picture::getRecoBuf(const CompArea &blk, bool wrap) const { return getBuf(blk, wrap ? PIC_RECON_WRAP : PIC_RECONSTRUCTION); } + PelUnitBuf Picture::getRecoBuf(const UnitArea &unit, bool wrap) { return getBuf(unit, wrap ? PIC_RECON_WRAP : PIC_RECONSTRUCTION); } +const CPelUnitBuf Picture::getRecoBuf(const UnitArea &unit, bool wrap) const { return getBuf(unit, wrap ? PIC_RECON_WRAP : PIC_RECONSTRUCTION); } + PelUnitBuf Picture::getRecoBuf(bool wrap) { return M_BUFS(scheduler.getSplitPicId(), wrap ? PIC_RECON_WRAP : PIC_RECONSTRUCTION); } +const CPelUnitBuf Picture::getRecoBuf(bool wrap) const { return M_BUFS(scheduler.getSplitPicId(), wrap ? PIC_RECON_WRAP : PIC_RECONSTRUCTION); } -void Picture::finalInit(const SPS& sps, const PPS& pps, APS& aps) +void Picture::finalInit( const VPS* vps, const SPS& sps, const PPS& pps, PicHeader *picHeader, APS** alfApss, APS* lmcsAps, APS* scalingListAps ) { for( auto &sei : SEIs ) { @@ -886,18 +332,10 @@ void Picture::finalInit(const SPS& sps, const PPS& pps, APS& aps) SEIs.clear(); clearSliceBuffer(); -#if HEVC_TILES_WPP - if( tileMap ) - { - tileMap->destroy(); - delete tileMap; - tileMap = nullptr; - } -#endif const ChromaFormat chromaFormatIDC = sps.getChromaFormatIdc(); - const int iWidth = sps.getPicWidthInLumaSamples(); - const int iHeight = sps.getPicHeightInLumaSamples(); + const int iWidth = pps.getPicWidthInLumaSamples(); + const int iHeight = pps.getPicHeightInLumaSamples(); if( cs ) { @@ -907,22 +345,23 @@ void Picture::finalInit(const SPS& sps, const PPS& pps, APS& aps) { cs = new CodingStructure( g_globalUnitCache.cuCache, g_globalUnitCache.puCache, g_globalUnitCache.tuCache ); cs->sps = &sps; - cs->create( chromaFormatIDC, Area( 0, 0, iWidth, iHeight ), true ); + cs->create(chromaFormatIDC, Area(0, 0, iWidth, iHeight), true, (bool)sps.getPLTMode()); } + cs->vps = vps; cs->picture = this; cs->slice = nullptr; // the slices for this picture have not been set at this point. update cs->slice after swapSliceObject() cs->pps = &pps; - cs->aps = &aps; -#if HEVC_VPS - cs->vps = nullptr; -#endif + picHeader->setSPSId( sps.getSPSId() ); + picHeader->setPPSId( pps.getPPSId() ); + cs->picHeader = picHeader; + memcpy(cs->alfApss, alfApss, sizeof(cs->alfApss)); + cs->lmcsAps = lmcsAps; + cs->scalinglistAps = scalingListAps; cs->pcv = pps.pcv; + m_conformanceWindow = pps.getConformanceWindow(); + m_scalingWindow = pps.getScalingWindow(); -#if HEVC_TILES_WPP - tileMap = new TileMap; - tileMap->create( sps, pps ); -#endif if (m_spliceIdx == NULL) { m_ctuNums = cs->pcv->sizeInCtus; @@ -935,10 +374,12 @@ void Picture::allocateNewSlice() { slices.push_back(new Slice); Slice& slice = *slices.back(); + memcpy(slice.getAlfAPSs(), cs->alfApss, sizeof(cs->alfApss)); + - slice.setAPS(cs->aps); slice.setPPS( cs->pps); slice.setSPS( cs->sps); + slice.setVPS( cs->vps); if(slices.size()>=2) { slice.copySliceInfo( slices[slices.size()-2] ); @@ -950,13 +391,17 @@ Slice *Picture::swapSliceObject(Slice * p, uint32_t i) { p->setSPS(cs->sps); p->setPPS(cs->pps); - p->setAPS(cs->aps); + p->setVPS(cs->vps); + p->setAlfAPSs(cs->alfApss); + Slice * pTmp = slices[i]; slices[i] = p; pTmp->setSPS(0); pTmp->setPPS(0); - pTmp->setAPS(0); + pTmp->setVPS(0); + memset(pTmp->getAlfAPSs(), 0, sizeof(*pTmp->getAlfAPSs())*ALF_CTB_MAX_NUM_APS); + return pTmp; } @@ -986,22 +431,308 @@ void Picture::finishParallelPart( const UnitArea& area ) } } -#if ENABLE_WPP_PARALLELISM -void Picture::finishCtuPart( const UnitArea& ctuArea ) + +#endif + +const TFilterCoeff DownsamplingFilterSRC[8][16][12] = +{ + { // D = 1 + { 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, 0, 0 }, + { 0, 0, 0, 2, -6, 127, 7, -2, 0, 0, 0, 0 }, + { 0, 0, 0, 3, -12, 125, 16, -5, 1, 0, 0, 0 }, + { 0, 0, 0, 4, -16, 120, 26, -7, 1, 0, 0, 0 }, + { 0, 0, 0, 5, -18, 114, 36, -10, 1, 0, 0, 0 }, + { 0, 0, 0, 5, -20, 107, 46, -12, 2, 0, 0, 0 }, + { 0, 0, 0, 5, -21, 99, 57, -15, 3, 0, 0, 0 }, + { 0, 0, 0, 5, -20, 89, 68, -18, 4, 0, 0, 0 }, + { 0, 0, 0, 4, -19, 79, 79, -19, 4, 0, 0, 0 }, + { 0, 0, 0, 4, -18, 68, 89, -20, 5, 0, 0, 0 }, + { 0, 0, 0, 3, -15, 57, 99, -21, 5, 0, 0, 0 }, + { 0, 0, 0, 2, -12, 46, 107, -20, 5, 0, 0, 0 }, + { 0, 0, 0, 1, -10, 36, 114, -18, 5, 0, 0, 0 }, + { 0, 0, 0, 1, -7, 26, 120, -16, 4, 0, 0, 0 }, + { 0, 0, 0, 1, -5, 16, 125, -12, 3, 0, 0, 0 }, + { 0, 0, 0, 0, -2, 7, 127, -6, 2, 0, 0, 0 } + }, + { // D = 1.5 + { 0, 2, 0, -14, 33, 86, 33, -14, 0, 2, 0, 0 }, + { 0, 1, 1, -14, 29, 85, 38, -13, -1, 2, 0, 0 }, + { 0, 1, 2, -14, 24, 84, 43, -12, -2, 2, 0, 0 }, + { 0, 1, 2, -13, 19, 83, 48, -11, -3, 2, 0, 0 }, + { 0, 0, 3, -13, 15, 81, 53, -10, -4, 3, 0, 0 }, + { 0, 0, 3, -12, 11, 79, 57, -8, -5, 3, 0, 0 }, + { 0, 0, 3, -11, 7, 76, 62, -5, -7, 3, 0, 0 }, + { 0, 0, 3, -10, 3, 73, 65, -2, -7, 3, 0, 0 }, + { 0, 0, 3, -9, 0, 70, 70, 0, -9, 3, 0, 0 }, + { 0, 0, 3, -7, -2, 65, 73, 3, -10, 3, 0, 0 }, + { 0, 0, 3, -7, -5, 62, 76, 7, -11, 3, 0, 0 }, + { 0, 0, 3, -5, -8, 57, 79, 11, -12, 3, 0, 0 }, + { 0, 0, 3, -4, -10, 53, 81, 15, -13, 3, 0, 0 }, + { 0, 0, 2, -3, -11, 48, 83, 19, -13, 2, 1, 0 }, + { 0, 0, 2, -2, -12, 43, 84, 24, -14, 2, 1, 0 }, + { 0, 0, 2, -1, -13, 38, 85, 29, -14, 1, 1, 0 } + }, + { // D = 2 + { 0, 5, -6, -10, 37, 76, 37, -10, -6, 5, 0, 0}, //0 + { 0, 5, -4, -11, 33, 76, 40, -9, -7, 5, 0, 0}, //1 + //{ 0, 5, -3, -12, 28, 75, 44, -7, -8, 5, 1, 0}, //2 + { -1, 5, -3, -12, 29, 75, 45, -7, -8, 5, 0, 0}, //2 new coefficients in m24499 + { -1, 4, -2, -13, 25, 75, 48, -5, -9, 5, 1, 0}, //3 + { -1, 4, -1, -13, 22, 73, 52, -3, -10, 4, 1, 0}, //4 + { -1, 4, 0, -13, 18, 72, 55, -1, -11, 4, 2, -1}, //5 + { -1, 4, 1, -13, 14, 70, 59, 2, -12, 3, 2, -1}, //6 + { -1, 3, 1, -13, 11, 68, 62, 5, -12, 3, 2, -1}, //7 + { -1, 3, 2, -13, 8, 65, 65, 8, -13, 2, 3, -1}, //8 + { -1, 2, 3, -12, 5, 62, 68, 11, -13, 1, 3, -1}, //9 + { -1, 2, 3, -12, 2, 59, 70, 14, -13, 1, 4, -1}, //10 + { -1, 2, 4, -11, -1, 55, 72, 18, -13, 0, 4, -1}, //11 + { 0, 1, 4, -10, -3, 52, 73, 22, -13, -1, 4, -1}, //12 + { 0, 1, 5, -9, -5, 48, 75, 25, -13, -2, 4, -1}, //13 + //{ 0, 1, 5, -8, -7, 44, 75, 28, -12, -3, 5, 0}, //14 + { 0, 0, 5, -8, -7, 45, 75, 29, -12, -3, 5, -1} , //14 new coefficients in m24499 + { 0, 0, 5, -7, -9, 40, 76, 33, -11, -4, 5, 0}, //15 + }, + { // D = 2.5 + { 2, -3, -9, 6, 39, 58, 39, 6, -9, -3, 2, 0}, // 0 + { 2, -3, -9, 4, 38, 58, 43, 7, -9, -4, 1, 0}, // 1 + { 2, -2, -9, 2, 35, 58, 44, 9, -8, -4, 1, 0}, // 2 + { 1, -2, -9, 1, 34, 58, 46, 11, -8, -5, 1, 0}, // 3 + //{ 1, -1, -8, -1, 31, 57, 48, 13, -8, -5, 1, 0}, // 4 + { 1, -1, -8, -1, 31, 57, 47, 13, -7, -5, 1, 0}, // 4 new coefficients in m24499 + { 1, -1, -8, -2, 29, 56, 49, 15, -7, -6, 1, 1}, // 5 + { 1, 0, -8, -3, 26, 55, 51, 17, -7, -6, 1, 1}, // 6 + { 1, 0, -7, -4, 24, 54, 52, 19, -6, -7, 1, 1}, // 7 + { 1, 0, -7, -5, 22, 53, 53, 22, -5, -7, 0, 1}, // 8 + { 1, 1, -7, -6, 19, 52, 54, 24, -4, -7, 0, 1}, // 9 + { 1, 1, -6, -7, 17, 51, 55, 26, -3, -8, 0, 1}, // 10 + { 1, 1, -6, -7, 15, 49, 56, 29, -2, -8, -1, 1}, // 11 + //{ 0, 1, -5, -8, 13, 48, 57, 31, -1, -8, -1, 1}, // 12 new coefficients in m24499 + { 0, 1, -5, -7, 13, 47, 57, 31, -1, -8, -1, 1}, // 12 + { 0, 1, -5, -8, 11, 46, 58, 34, 1, -9, -2, 1}, // 13 + { 0, 1, -4, -8, 9, 44, 58, 35, 2, -9, -2, 2}, // 14 + { 0, 1, -4, -9, 7, 43, 58, 38, 4, -9, -3, 2}, // 15 + }, + { // D = 3 + { -2, -7, 0, 17, 35, 43, 35, 17, 0, -7, -5, 2 }, + { -2, -7, -1, 16, 34, 43, 36, 18, 1, -7, -5, 2 }, + { -1, -7, -1, 14, 33, 43, 36, 19, 1, -6, -5, 2 }, + { -1, -7, -2, 13, 32, 42, 37, 20, 3, -6, -5, 2 }, + { 0, -7, -3, 12, 31, 42, 38, 21, 3, -6, -5, 2 }, + { 0, -7, -3, 11, 30, 42, 39, 23, 4, -6, -6, 1 }, + { 0, -7, -4, 10, 29, 42, 40, 24, 5, -6, -6, 1 }, + { 1, -7, -4, 9, 27, 41, 40, 25, 6, -5, -6, 1 }, + { 1, -6, -5, 7, 26, 41, 41, 26, 7, -5, -6, 1 }, + { 1, -6, -5, 6, 25, 40, 41, 27, 9, -4, -7, 1 }, + { 1, -6, -6, 5, 24, 40, 42, 29, 10, -4, -7, 0 }, + { 1, -6, -6, 4, 23, 39, 42, 30, 11, -3, -7, 0 }, + { 2, -5, -6, 3, 21, 38, 42, 31, 12, -3, -7, 0 }, + { 2, -5, -6, 3, 20, 37, 42, 32, 13, -2, -7, -1 }, + { 2, -5, -6, 1, 19, 36, 43, 33, 14, -1, -7, -1 }, + { 2, -5, -7, 1, 18, 36, 43, 34, 16, -1, -7, -2 } + }, + { // D = 3.5 + { -6, -3, 5, 19, 31, 36, 31, 19, 5, -3, -6, 0 }, + { -6, -4, 4, 18, 31, 37, 32, 20, 6, -3, -6, -1 }, + { -6, -4, 4, 17, 30, 36, 33, 21, 7, -3, -6, -1 }, + { -5, -5, 3, 16, 30, 36, 33, 22, 8, -2, -6, -2 }, + { -5, -5, 2, 15, 29, 36, 34, 23, 9, -2, -6, -2 }, + { -5, -5, 2, 15, 28, 36, 34, 24, 10, -2, -6, -3 }, + { -4, -5, 1, 14, 27, 36, 35, 24, 10, -1, -6, -3 }, + { -4, -5, 0, 13, 26, 35, 35, 25, 11, 0, -5, -3 }, + { -4, -6, 0, 12, 26, 36, 36, 26, 12, 0, -6, -4 }, + { -3, -5, 0, 11, 25, 35, 35, 26, 13, 0, -5, -4 }, + { -3, -6, -1, 10, 24, 35, 36, 27, 14, 1, -5, -4 }, + { -3, -6, -2, 10, 24, 34, 36, 28, 15, 2, -5, -5 }, + { -2, -6, -2, 9, 23, 34, 36, 29, 15, 2, -5, -5 }, + { -2, -6, -2, 8, 22, 33, 36, 30, 16, 3, -5, -5 }, + { -1, -6, -3, 7, 21, 33, 36, 30, 17, 4, -4, -6 }, + { -1, -6, -3, 6, 20, 32, 37, 31, 18, 4, -4, -6 } + }, + { // D = 4 + { -9, 0, 9, 20, 28, 32, 28, 20, 9, 0, -9, 0 }, + { -9, 0, 8, 19, 28, 32, 29, 20, 10, 0, -4, -5 }, + { -9, -1, 8, 18, 28, 32, 29, 21, 10, 1, -4, -5 }, + { -9, -1, 7, 18, 27, 32, 30, 22, 11, 1, -4, -6 }, + { -8, -2, 6, 17, 27, 32, 30, 22, 12, 2, -4, -6 }, + { -8, -2, 6, 16, 26, 32, 31, 23, 12, 2, -4, -6 }, + { -8, -2, 5, 16, 26, 31, 31, 23, 13, 3, -3, -7 }, + { -8, -3, 5, 15, 25, 31, 31, 24, 14, 4, -3, -7 }, + { -7, -3, 4, 14, 25, 31, 31, 25, 14, 4, -3, -7 }, + { -7, -3, 4, 14, 24, 31, 31, 25, 15, 5, -3, -8 }, + { -7, -3, 3, 13, 23, 31, 31, 26, 16, 5, -2, -8 }, + { -6, -4, 2, 12, 23, 31, 32, 26, 16, 6, -2, -8 }, + { -6, -4, 2, 12, 22, 30, 32, 27, 17, 6, -2, -8 }, + { -6, -4, 1, 11, 22, 30, 32, 27, 18, 7, -1, -9 }, + { -5, -4, 1, 10, 21, 29, 32, 28, 18, 8, -1, -9 }, + { -5, -4, 0, 10, 20, 29, 32, 28, 19, 8, 0, -9 } + }, + { // D = 5.5 + { -8, 7, 13, 18, 22, 24, 22, 18, 13, 7, 2, -10 }, + { -8, 7, 13, 18, 22, 23, 22, 19, 13, 7, 2, -10 }, + { -8, 6, 12, 18, 22, 23, 22, 19, 14, 8, 2, -10 }, + { -9, 6, 12, 17, 22, 23, 23, 19, 14, 8, 3, -10 }, + { -9, 6, 12, 17, 21, 23, 23, 19, 14, 9, 3, -10 }, + { -9, 5, 11, 17, 21, 23, 23, 20, 15, 9, 3, -10 }, + { -9, 5, 11, 16, 21, 23, 23, 20, 15, 9, 4, -10 }, + { -9, 5, 10, 16, 21, 23, 23, 20, 15, 10, 4, -10 }, + { -10, 5, 10, 16, 20, 23, 23, 20, 16, 10, 5, -10 }, + { -10, 4, 10, 15, 20, 23, 23, 21, 16, 10, 5, -9 }, + { -10, 4, 9, 15, 20, 23, 23, 21, 16, 11, 5, -9 }, + { -10, 3, 9, 15, 20, 23, 23, 21, 17, 11, 5, -9 }, + { -10, 3, 9, 14, 19, 23, 23, 21, 17, 12, 6, -9 }, + { -10, 3, 8, 14, 19, 23, 23, 22, 17, 12, 6, -9 }, + { -10, 2, 8, 14, 19, 22, 23, 22, 18, 12, 6, -8 }, + { -10, 2, 7, 13, 19, 22, 23, 22, 18, 13, 7, -8 } + } +}; + +void Picture::sampleRateConv( const std::pair<int, int> scalingRatio, const std::pair<int, int> compScale, + const CPelBuf& beforeScale, const int beforeScaleLeftOffset, const int beforeScaleTopOffset, + const PelBuf& afterScale, const int afterScaleLeftOffset, const int afterScaleTopOffset, + const int bitDepth, const bool useLumaFilter, const bool downsampling, + const bool horCollocatedPositionFlag, const bool verCollocatedPositionFlag ) { - const UnitArea clipdArea = clipArea( ctuArea, *this ); - const int sourceID = scheduler.getSplitPicId( 0 ); - // distribute the reconstruction across all of the parallel workers - for( int dataId = 0; dataId < scheduler.getNumPicInstances(); dataId++ ) + const Pel* orgSrc = beforeScale.buf; + const int orgWidth = beforeScale.width; + const int orgHeight = beforeScale.height; + const int orgStride = beforeScale.stride; + + Pel* scaledSrc = afterScale.buf; + const int scaledWidth = afterScale.width; + const int scaledHeight = afterScale.height; + const int scaledStride = afterScale.stride; + + if( orgWidth == scaledWidth && orgHeight == scaledHeight && scalingRatio == SCALE_1X && !beforeScaleLeftOffset && !beforeScaleTopOffset && !afterScaleLeftOffset && !afterScaleTopOffset ) { - if( dataId == sourceID ) continue; + for( int j = 0; j < orgHeight; j++ ) + { + memcpy( scaledSrc + j * scaledStride, orgSrc + j * orgStride, sizeof( Pel ) * orgWidth ); + } + + return; + } + + const TFilterCoeff* filterHor = useLumaFilter ? &InterpolationFilter::m_lumaFilter[0][0] : &InterpolationFilter::m_chromaFilter[0][0]; + const TFilterCoeff* filterVer = useLumaFilter ? &InterpolationFilter::m_lumaFilter[0][0] : &InterpolationFilter::m_chromaFilter[0][0]; + const int numFracPositions = useLumaFilter ? 15 : 31; + const int numFracShift = useLumaFilter ? 4 : 5; + const int posShiftX = SCALE_RATIO_BITS - numFracShift + compScale.first; + const int posShiftY = SCALE_RATIO_BITS - numFracShift + compScale.second; + int addX = ( 1 << ( posShiftX - 1 ) ) + ( beforeScaleLeftOffset << SCALE_RATIO_BITS ) + ( ( int( 1 - horCollocatedPositionFlag ) * 8 * ( scalingRatio.first - SCALE_1X.first ) + ( 1 << ( 2 + compScale.first ) ) ) >> ( 3 + compScale.first ) ); + int addY = ( 1 << ( posShiftY - 1 ) ) + ( beforeScaleTopOffset << SCALE_RATIO_BITS ) + ( ( int( 1 - verCollocatedPositionFlag ) * 8 * ( scalingRatio.second - SCALE_1X.second ) + ( 1 << ( 2 + compScale.second ) ) ) >> ( 3 + compScale.second ) ); + + if( downsampling ) + { + int verFilter = 0; + int horFilter = 0; + + if( scalingRatio.first > ( 15 << SCALE_RATIO_BITS ) / 4 ) horFilter = 7; + else if( scalingRatio.first > ( 20 << SCALE_RATIO_BITS ) / 7 ) horFilter = 6; + else if( scalingRatio.first > ( 5 << SCALE_RATIO_BITS ) / 2 ) horFilter = 5; + else if( scalingRatio.first > ( 2 << SCALE_RATIO_BITS ) ) horFilter = 4; + else if( scalingRatio.first > ( 5 << SCALE_RATIO_BITS ) / 3 ) horFilter = 3; + else if( scalingRatio.first > ( 5 << SCALE_RATIO_BITS ) / 4 ) horFilter = 2; + else if( scalingRatio.first > ( 20 << SCALE_RATIO_BITS ) / 19 ) horFilter = 1; + + if( scalingRatio.second > ( 15 << SCALE_RATIO_BITS ) / 4 ) verFilter = 7; + else if( scalingRatio.second > ( 20 << SCALE_RATIO_BITS ) / 7 ) verFilter = 6; + else if( scalingRatio.second > ( 5 << SCALE_RATIO_BITS ) / 2 ) verFilter = 5; + else if( scalingRatio.second > ( 2 << SCALE_RATIO_BITS ) ) verFilter = 4; + else if( scalingRatio.second > ( 5 << SCALE_RATIO_BITS ) / 3 ) verFilter = 3; + else if( scalingRatio.second > ( 5 << SCALE_RATIO_BITS ) / 4 ) verFilter = 2; + else if( scalingRatio.second > ( 20 << SCALE_RATIO_BITS ) / 19 ) verFilter = 1; + + filterHor = &DownsamplingFilterSRC[horFilter][0][0]; + filterVer = &DownsamplingFilterSRC[verFilter][0][0]; + } + + const int filterLength = downsampling ? 12 : ( useLumaFilter ? NTAPS_LUMA : NTAPS_CHROMA ); + const int log2Norm = downsampling ? 14 : 12; + + int *buf = new int[orgHeight * scaledWidth]; + int maxVal = ( 1 << bitDepth ) - 1; + + CHECK( bitDepth > 17, "Overflow may happen!" ); + + for( int i = 0; i < scaledWidth; i++ ) + { + const Pel* org = orgSrc; + int refPos = ( ( ( i << compScale.first ) - afterScaleLeftOffset ) * scalingRatio.first + addX ) >> posShiftX; + int integer = refPos >> numFracShift; + int frac = refPos & numFracPositions; + int* tmp = buf + i; + + for( int j = 0; j < orgHeight; j++ ) + { + int sum = 0; + const TFilterCoeff* f = filterHor + frac * filterLength; + + for( int k = 0; k < filterLength; k++ ) + { + int xInt = std::min<int>( std::max( 0, integer + k - filterLength / 2 + 1 ), orgWidth - 1 ); + sum += f[k] * org[xInt]; // postpone horizontal filtering gain removal after vertical filtering + } + + *tmp = sum; + + tmp += scaledWidth; + org += orgStride; + } + } + + Pel* dst = scaledSrc; + + for( int j = 0; j < scaledHeight; j++ ) + { + int refPos = ( ( ( j << compScale.second ) - afterScaleTopOffset ) * scalingRatio.second + addY ) >> posShiftY; + int integer = refPos >> numFracShift; + int frac = refPos & numFracPositions; + + for( int i = 0; i < scaledWidth; i++ ) + { + int sum = 0; + int* tmp = buf + i; + const TFilterCoeff* f = filterVer + frac * filterLength; + + for( int k = 0; k < filterLength; k++ ) + { + int yInt = std::min<int>( std::max( 0, integer + k - filterLength / 2 + 1 ), orgHeight - 1 ); + sum += f[k] * tmp[yInt*scaledWidth]; + } + + dst[i] = std::min<int>( std::max( 0, ( sum + ( 1 << ( log2Norm - 1 ) ) ) >> log2Norm ), maxVal ); + } - M_BUFS( dataId, PIC_RECONSTRUCTION ).subBuf( clipdArea ).copyFrom( M_BUFS( sourceID, PIC_RECONSTRUCTION ).subBuf( clipdArea ) ); + dst += scaledStride; } + + delete[] buf; } -#endif -#endif +void Picture::rescalePicture( const std::pair<int, int> scalingRatio, + const CPelUnitBuf& beforeScaling, const Window& scalingWindowBefore, + const PelUnitBuf& afterScaling, const Window& scalingWindowAfter, + const ChromaFormat chromaFormatIDC, const BitDepths& bitDepths, const bool useLumaFilter, const bool downsampling, + const bool horCollocatedChromaFlag, const bool verCollocatedChromaFlag ) +{ + for( int comp = 0; comp < ::getNumberValidComponents( chromaFormatIDC ); comp++ ) + { + ComponentID compID = ComponentID( comp ); + const CPelBuf& beforeScale = beforeScaling.get( compID ); + const PelBuf& afterScale = afterScaling.get( compID ); + + sampleRateConv( scalingRatio, std::pair<int, int>( ::getComponentScaleX( compID, chromaFormatIDC ), ::getComponentScaleY( compID, chromaFormatIDC ) ), +#if JVET_Q0487_SCALING_WINDOW_ISSUES + beforeScale, scalingWindowBefore.getWindowLeftOffset() * SPS::getWinUnitX( chromaFormatIDC ), scalingWindowBefore.getWindowTopOffset() * SPS::getWinUnitY( chromaFormatIDC ), + afterScale, scalingWindowAfter.getWindowLeftOffset() * SPS::getWinUnitX( chromaFormatIDC ), scalingWindowAfter.getWindowTopOffset() * SPS::getWinUnitY( chromaFormatIDC ), +#else + beforeScale, scalingWindowBefore.getWindowLeftOffset(), scalingWindowBefore.getWindowTopOffset(), + afterScale, scalingWindowAfter.getWindowLeftOffset(), scalingWindowAfter.getWindowTopOffset(), +#endif + bitDepths.recon[comp], downsampling || useLumaFilter ? true : isLuma( compID ), downsampling, + isLuma( compID ) ? 1 : horCollocatedChromaFlag, isLuma( compID ) ? 1 : verCollocatedChromaFlag ); + } +} void Picture::extendPicBorder() { @@ -1020,21 +751,6 @@ void Picture::extendPicBorder() Pel* pi = piTxt; // do left and right margins - if (cs->sps->getWrapAroundEnabledFlag()) - { - int xoffset = cs->sps->getWrapAroundOffset() >> getComponentScaleX( compID, cs->area.chromaFormat ); - for (int y = 0; y < p.height; y++) - { - for (int x = 0; x < xmargin; x++ ) - { - pi[ -x - 1 ] = pi[ -x - 1 + xoffset ]; - pi[ p.width + x ] = pi[ p.width + x - xoffset ]; - } - pi += p.stride; - } - } - else - { for (int y = 0; y < p.height; y++) { for (int x = 0; x < xmargin; x++ ) @@ -1044,7 +760,6 @@ void Picture::extendPicBorder() } pi += p.stride; } - } // pi is now the (0,height) (bottom left of image within bigger picture pi -= (p.stride + xmargin); @@ -1061,6 +776,43 @@ void Picture::extendPicBorder() { ::memcpy( pi - (y+1)*p.stride, pi, sizeof(Pel)*(p.width + (xmargin<<1)) ); } + + // reference picture with horizontal wrapped boundary + if (cs->sps->getWrapAroundEnabledFlag()) + { + p = M_BUFS( 0, PIC_RECON_WRAP ).get( compID ); + p.copyFrom(M_BUFS( 0, PIC_RECONSTRUCTION ).get( compID )); + piTxt = p.bufAt(0,0); + pi = piTxt; + int xoffset = cs->sps->getWrapAroundOffset() >> getComponentScaleX( compID, cs->area.chromaFormat ); + for (int y = 0; y < p.height; y++) + { + for (int x = 0; x < xmargin; x++ ) + { + if( x < xoffset ) + { + pi[ -x - 1 ] = pi[ -x - 1 + xoffset ]; + pi[ p.width + x ] = pi[ p.width + x - xoffset ]; + } + else + { + pi[ -x - 1 ] = pi[ 0 ]; + pi[ p.width + x ] = pi[ p.width - 1 ]; + } + } + pi += p.stride; + } + pi -= (p.stride + xmargin); + for (int y = 0; y < ymargin; y++ ) + { + ::memcpy( pi + (y+1)*p.stride, pi, sizeof(Pel)*(p.width + (xmargin << 1))); + } + pi -= ((p.height-1) * p.stride); + for (int y = 0; y < ymargin; y++ ) + { + ::memcpy( pi - (y+1)*p.stride, pi, sizeof(Pel)*(p.width + (xmargin<<1)) ); + } + } } m_bIsBorderExtended = true; @@ -1068,12 +820,12 @@ void Picture::extendPicBorder() PelBuf Picture::getBuf( const ComponentID compID, const PictureType &type ) { - return M_BUFS( ( type == PIC_ORIGINAL || type == PIC_TRUE_ORIGINAL ) ? 0 : scheduler.getSplitPicId(), type ).getBuf( compID ); + return M_BUFS( ( type == PIC_ORIGINAL || type == PIC_TRUE_ORIGINAL || type == PIC_ORIGINAL_INPUT || type == PIC_TRUE_ORIGINAL_INPUT ) ? 0 : scheduler.getSplitPicId(), type ).getBuf( compID ); } const CPelBuf Picture::getBuf( const ComponentID compID, const PictureType &type ) const { - return M_BUFS( ( type == PIC_ORIGINAL || type == PIC_TRUE_ORIGINAL ) ? 0 : scheduler.getSplitPicId(), type ).getBuf( compID ); + return M_BUFS( ( type == PIC_ORIGINAL || type == PIC_TRUE_ORIGINAL || type == PIC_ORIGINAL_INPUT || type == PIC_TRUE_ORIGINAL_INPUT ) ? 0 : scheduler.getSplitPicId(), type ).getBuf( compID ); } PelBuf Picture::getBuf( const CompArea &blk, const PictureType &type ) @@ -1084,8 +836,7 @@ PelBuf Picture::getBuf( const CompArea &blk, const PictureType &type ) } #if ENABLE_SPLIT_PARALLELISM - const int jId = ( type == PIC_ORIGINAL || type == PIC_TRUE_ORIGINAL ) ? 0 : scheduler.getSplitPicId(); - + const int jId = ( type == PIC_ORIGINAL || type == PIC_TRUE_ORIGINAL || type == PIC_ORIGINAL_INPUT || type == PIC_TRUE_ORIGINAL_INPUT ) ? 0 : scheduler.getSplitPicId(); #endif #if !KEEP_PRED_AND_RESI_SIGNALS if( type == PIC_RESIDUAL || type == PIC_PREDICTION ) @@ -1181,8 +932,8 @@ bool Picture::getSpliceFull() void Picture::addPictureToHashMapForInter() { - int picWidth = slices[0]->getSPS()->getPicWidthInLumaSamples(); - int picHeight = slices[0]->getSPS()->getPicHeightInLumaSamples(); + int picWidth = slices[0]->getPPS()->getPicWidthInLumaSamples(); + int picHeight = slices[0]->getPPS()->getPicHeightInLumaSamples(); uint32_t* blockHashValues[2][2]; bool* bIsBlockSame[2][3]; @@ -1198,18 +949,11 @@ void Picture::addPictureToHashMapForInter() bIsBlockSame[i][j] = new bool[picWidth*picHeight]; } } - - m_hashMap.create(); + m_hashMap.create(picWidth, picHeight); m_hashMap.generateBlock2x2HashValue(getOrigBuf(), picWidth, picHeight, slices[0]->getSPS()->getBitDepths(), blockHashValues[0], bIsBlockSame[0]);//2x2 m_hashMap.generateBlockHashValue(picWidth, picHeight, 4, 4, blockHashValues[0], blockHashValues[1], bIsBlockSame[0], bIsBlockSame[1]);//4x4 m_hashMap.addToHashMapByRowWithPrecalData(blockHashValues[1], bIsBlockSame[1][2], picWidth, picHeight, 4, 4); - m_hashMap.generateRectangleHashValue(picWidth, picHeight, 8, 4, blockHashValues[1], blockHashValues[0], bIsBlockSame[1], bIsBlockSame[0]);//8x4 - m_hashMap.addToHashMapByRowWithPrecalData(blockHashValues[0], bIsBlockSame[0][2], picWidth, picHeight, 8, 4); - - m_hashMap.generateRectangleHashValue(picWidth, picHeight, 4, 8, blockHashValues[1], blockHashValues[0], bIsBlockSame[1], bIsBlockSame[0]);//4x8 - m_hashMap.addToHashMapByRowWithPrecalData(blockHashValues[0], bIsBlockSame[0][2], picWidth, picHeight, 4, 8); - m_hashMap.generateBlockHashValue(picWidth, picHeight, 8, 8, blockHashValues[1], blockHashValues[0], bIsBlockSame[1], bIsBlockSame[0]);//8x8 m_hashMap.addToHashMapByRowWithPrecalData(blockHashValues[0], bIsBlockSame[0][2], picWidth, picHeight, 8, 8); diff --git a/source/Lib/CommonLib/Picture.h b/source/Lib/CommonLib/Picture.h index dd7ab22326f80a9f7a3776ea526542121a477b70..1c259541cde70d2bdf5b85ed760b00cb80b6dc97 100644 --- a/source/Lib/CommonLib/Picture.h +++ b/source/Lib/CommonLib/Picture.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * -* Copyright (c) 2010-2019, ITU/ISO/IEC +* Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -50,11 +50,7 @@ #include "MCTS.h" #include <deque> -#if ENABLE_WPP_PARALLELISM || ENABLE_SPLIT_PARALLELISM -#if ENABLE_WPP_PARALLELISM -#include <mutex> -class SyncObj; -#endif +#if ENABLE_SPLIT_PARALLELISM #define CURR_THREAD_ID -1 @@ -73,35 +69,10 @@ public: void finishParallel(); void setSplitThreadId( const int tId = CURR_THREAD_ID ); unsigned getNumSplitThreads() const { return m_numSplitThreads; }; -#endif -#if ENABLE_WPP_PARALLELISM - unsigned getWppDataId ( int lId = CURR_THREAD_ID ) const; - unsigned getWppThreadId() const; - void setWppThreadId( const int tId = CURR_THREAD_ID ); #endif unsigned getDataId () const; bool init ( const int ctuYsize, const int ctuXsize, const int numWppThreadsRunning, const int numWppExtraLines, const int numSplitThreads ); int getNumPicInstances() const; -#if ENABLE_WPP_PARALLELISM - void setReady ( const int ctuPosX, const int ctuPosY ); - void wait ( const int ctuPosX, const int ctuPosY ); - -private: - bool getNextCtu( Position& pos, int ctuLine, int offset ); - -private: - int m_firstNonFinishedLine; - int m_numWppThreads; - int m_numWppThreadsRunning; - int m_numWppDataInstances; - int m_ctuYsize; - int m_ctuXsize; - - std::vector<int> m_LineDone; - std::vector<bool> m_LineProc; - std::mutex m_mutex; - std::vector<SyncObj*> m_SyncObjs; -#endif #if ENABLE_SPLIT_PARALLELISM int m_numSplitThreads; @@ -115,60 +86,7 @@ class AQpLayer; typedef std::list<SEI*> SEIMessages; -#if HEVC_TILES_WPP -class Tile -{ -private: - uint32_t m_tileWidthInCtus; - uint32_t m_tileHeightInCtus; - uint32_t m_rightEdgePosInCtus; - uint32_t m_bottomEdgePosInCtus; - uint32_t m_firstCtuRsAddr; - -public: - Tile(); - virtual ~Tile(); - - void setTileWidthInCtus ( uint32_t i ) { m_tileWidthInCtus = i; } - uint32_t getTileWidthInCtus () const { return m_tileWidthInCtus; } - void setTileHeightInCtus ( uint32_t i ) { m_tileHeightInCtus = i; } - uint32_t getTileHeightInCtus () const { return m_tileHeightInCtus; } - void setRightEdgePosInCtus ( uint32_t i ) { m_rightEdgePosInCtus = i; } - uint32_t getRightEdgePosInCtus () const { return m_rightEdgePosInCtus; } - void setBottomEdgePosInCtus ( uint32_t i ) { m_bottomEdgePosInCtus = i; } - uint32_t getBottomEdgePosInCtus () const { return m_bottomEdgePosInCtus; } - void setFirstCtuRsAddr ( uint32_t i ) { m_firstCtuRsAddr = i; } - uint32_t getFirstCtuRsAddr () const { return m_firstCtuRsAddr; } -}; - - -struct TileMap -{ - TileMap(); - - void create( const SPS& sps, const PPS& pps ); - void destroy(); - uint32_t getTileIdxMap( uint32_t ctuRsAddr ) const { return *(tileIdxMap + ctuRsAddr); } - uint32_t getTileIdxMap( const Position& pos ) const { return getTileIdxMap( ( pos.x / pcv->maxCUWidth ) + ( pos.y / pcv->maxCUHeight ) * pcv->widthInCtus ); }; - uint32_t getCtuTsToRsAddrMap( uint32_t ctuTsAddr ) const { return *(ctuTsToRsAddrMap + (ctuTsAddr>=pcv->sizeInCtus ? pcv->sizeInCtus : ctuTsAddr)); } - uint32_t getCtuRsToTsAddrMap( uint32_t ctuRsAddr ) const { return *(ctuRsToTsAddrMap + (ctuRsAddr>=pcv->sizeInCtus ? pcv->sizeInCtus : ctuRsAddr)); } - uint32_t getSubstreamForCtuAddr(const uint32_t ctuAddr, const bool bAddressInRaster, Slice *pcSlice) const; - - const PreCalcValues* pcv; - std::vector<Tile> tiles; - uint32_t numTiles; - uint32_t numTileColumns; - uint32_t numTileRows; - uint32_t* tileIdxMap; - uint32_t* ctuTsToRsAddrMap; - uint32_t* ctuRsToTsAddrMap; - - void initTileMap( const SPS& sps, const PPS& pps ); - void initCtuTsRsAddrMap(); - uint32_t calculateNextCtuRSAddr( const uint32_t currCtuRsAddr ) const; -}; -#endif #if ENABLE_SPLIT_PARALLELISM #define M_BUFS(JID,PID) m_bufs[JID][PID] @@ -181,7 +99,7 @@ struct Picture : public UnitArea uint32_t margin; Picture(); - void create(const ChromaFormat &_chromaFormat, const Size &size, const unsigned _maxCUSize, const unsigned margin, const bool bDecoder); + void create( const ChromaFormat &_chromaFormat, const Size &size, const unsigned _maxCUSize, const unsigned margin, const bool bDecoder, const int layerId ); void destroy(); void createTempBuffers( const unsigned _maxCUSize ); @@ -210,14 +128,14 @@ struct Picture : public UnitArea PelUnitBuf getResiBuf(const UnitArea &unit); const CPelUnitBuf getResiBuf(const UnitArea &unit) const; - PelBuf getRecoBuf(const ComponentID compID); - const CPelBuf getRecoBuf(const ComponentID compID) const; - PelBuf getRecoBuf(const CompArea &blk); - const CPelBuf getRecoBuf(const CompArea &blk) const; - PelUnitBuf getRecoBuf(const UnitArea &unit); - const CPelUnitBuf getRecoBuf(const UnitArea &unit) const; - PelUnitBuf getRecoBuf(); - const CPelUnitBuf getRecoBuf() const; + PelBuf getRecoBuf(const ComponentID compID, bool wrap=false); + const CPelBuf getRecoBuf(const ComponentID compID, bool wrap=false) const; + PelBuf getRecoBuf(const CompArea &blk, bool wrap=false); + const CPelBuf getRecoBuf(const CompArea &blk, bool wrap=false) const; + PelUnitBuf getRecoBuf(const UnitArea &unit, bool wrap=false); + const CPelUnitBuf getRecoBuf(const UnitArea &unit, bool wrap=false) const; + PelUnitBuf getRecoBuf(bool wrap=false); + const CPelUnitBuf getRecoBuf(bool wrap=false) const; PelBuf getBuf(const ComponentID compID, const PictureType &type); const CPelBuf getBuf(const ComponentID compID, const PictureType &type) const; @@ -227,7 +145,7 @@ struct Picture : public UnitArea const CPelUnitBuf getBuf(const UnitArea &unit, const PictureType &type) const; void extendPicBorder(); - void finalInit(const SPS& sps, const PPS& pps, APS& aps); + void finalInit( const VPS* vps, const SPS& sps, const PPS& pps, PicHeader *picHeader, APS** alfApss, APS* lmcsAps, APS* scalingListAps ); int getPOC() const { return poc; } void setBorderExtension( bool bFlag) { m_bIsBorderExtended = bFlag;} @@ -237,6 +155,21 @@ struct Picture : public UnitArea void setSpliceIdx(uint32_t idx, int poc) { m_spliceIdx[idx] = poc; } void createSpliceIdx(int nums); bool getSpliceFull(); + static void sampleRateConv( const std::pair<int, int> scalingRatio, const std::pair<int, int> compScale, + const CPelBuf& beforeScale, const int beforeScaleLeftOffset, const int beforeScaleTopOffset, + const PelBuf& afterScale, const int afterScaleLeftOffset, const int afterScaleTopOffset, + const int bitDepth, const bool useLumaFilter, const bool downsampling, + const bool horCollocatedPositionFlag, const bool verCollocatedPositionFlag ); + + static void rescalePicture( const std::pair<int, int> scalingRatio, + const CPelUnitBuf& beforeScaling, const Window& scalingWindowBefore, + const PelUnitBuf& afterScaling, const Window& scalingWindowAfter, + const ChromaFormat chromaFormatIDC, const BitDepths& bitDepths, const bool useLumaFilter, const bool downsampling, + const bool horCollocatedChromaFlag, const bool verCollocatedChromaFlag ); + +private: + Window m_conformanceWindow; + Window m_scalingWindow; public: bool m_bIsBorderExtended; @@ -248,23 +181,26 @@ public: bool topField; bool fieldPic; int m_prevQP[MAX_NUM_CHANNEL_TYPE]; + bool precedingDRAP; // preceding a DRAP picture in decoding order int poc; uint32_t layer; uint32_t depth; + int layerId; + + bool subLayerNonReferencePictureDueToSTSA; int* m_spliceIdx; int m_ctuNums; + bool interLayerRefPicFlag; + #if ENABLE_SPLIT_PARALLELISM -#if ENABLE_WPP_PARALLELISM - PelStorage m_bufs[( PARL_SPLIT_MAX_NUM_JOBS * PARL_WPP_MAX_NUM_THREADS )][NUM_PIC_TYPES]; -#else PelStorage m_bufs[PARL_SPLIT_MAX_NUM_JOBS][NUM_PIC_TYPES]; -#endif #else PelStorage m_bufs[NUM_PIC_TYPES]; #endif + const Picture* unscaledPic; TComHash m_hashMap; TComHash* getHashMap() { return &m_hashMap; } @@ -275,13 +211,25 @@ public: std::deque<Slice*> slices; SEIMessages SEIs; + uint32_t getPicWidthInLumaSamples() const { return getRecoBuf( COMPONENT_Y ).width; } + uint32_t getPicHeightInLumaSamples() const { return getRecoBuf( COMPONENT_Y ).height; } + Window& getConformanceWindow() { return m_conformanceWindow; } + const Window& getConformanceWindow() const { return m_conformanceWindow; } + Window& getScalingWindow() { return m_scalingWindow; } + const Window& getScalingWindow() const { return m_scalingWindow; } +#if JVET_Q0487_SCALING_WINDOW_ISSUES + bool isRefScaled( const PPS* pps ) const { return getPicWidthInLumaSamples() != pps->getPicWidthInLumaSamples() || + getPicHeightInLumaSamples() != pps->getPicHeightInLumaSamples() || + getScalingWindow().getWindowLeftOffset() != pps->getScalingWindow().getWindowLeftOffset() || + getScalingWindow().getWindowRightOffset() != pps->getScalingWindow().getWindowRightOffset() || + getScalingWindow().getWindowTopOffset() != pps->getScalingWindow().getWindowTopOffset() || + getScalingWindow().getWindowBottomOffset() != pps->getScalingWindow().getWindowBottomOffset(); } +#endif + void allocateNewSlice(); Slice *swapSliceObject(Slice * p, uint32_t i); void clearSliceBuffer(); -#if HEVC_TILES_WPP - TileMap* tileMap; -#endif MCTSInfo mctsInfo; std::vector<AQpLayer*> aqlayer; @@ -293,11 +241,8 @@ private: #if ENABLE_SPLIT_PARALLELISM public: void finishParallelPart ( const UnitArea& ctuArea ); -#if ENABLE_WPP_PARALLELISM - void finishCtuPart ( const UnitArea& ctuArea ); -#endif #endif -#if ENABLE_WPP_PARALLELISM || ENABLE_SPLIT_PARALLELISM +#if ENABLE_SPLIT_PARALLELISM public: Scheduler scheduler; #endif @@ -328,6 +273,28 @@ public: std::fill( m_alfCtuEnableFlag[compIdx].begin(), m_alfCtuEnableFlag[compIdx].end(), 0 ); } } + std::vector<short> m_alfCtbFilterIndex; + short* getAlfCtbFilterIndex() { return m_alfCtbFilterIndex.data(); } + std::vector<short>& getAlfCtbFilterIndexVec() { return m_alfCtbFilterIndex; } + void resizeAlfCtbFilterIndex(int numEntries) + { + m_alfCtbFilterIndex.resize(numEntries); + for (int i = 0; i < numEntries; i++) + { + m_alfCtbFilterIndex[i] = 0; + } + } + std::vector<uint8_t> m_alfCtuAlternative[MAX_NUM_COMPONENT]; + std::vector<uint8_t>& getAlfCtuAlternative( int compIdx ) { return m_alfCtuAlternative[compIdx]; } + uint8_t* getAlfCtuAlternativeData( int compIdx ) { return m_alfCtuAlternative[compIdx].data(); } + void resizeAlfCtuAlternative( int numEntries ) + { + for( int compIdx = 1; compIdx < MAX_NUM_COMPONENT; compIdx++ ) + { + m_alfCtuAlternative[compIdx].resize( numEntries ); + std::fill( m_alfCtuAlternative[compIdx].begin(), m_alfCtuAlternative[compIdx].end(), 0 ); + } + } }; int calcAndPrintHashStatus(const CPelUnitBuf& pic, const class SEIDecodedPictureHash* pictureHashSEI, const BitDepths &bitDepths, const MsgLevel msgl); diff --git a/source/Lib/CommonLib/ProfileLevelTier.cpp b/source/Lib/CommonLib/ProfileLevelTier.cpp new file mode 100644 index 0000000000000000000000000000000000000000..44f8a74770bd7b4fc9e05c7eec46fc95ee3a0c0d --- /dev/null +++ b/source/Lib/CommonLib/ProfileLevelTier.cpp @@ -0,0 +1,127 @@ +/* The copyright in this software is being made available under the BSD + * License, included below. This software may be subject to other third party + * and contributor rights, including patent rights, and no such rights are + * granted under this license. + * + * Copyright (c) 2010-2020, ITU/ISO/IEC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * * Neither the name of the ITU/ISO/IEC nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file ProfileLevelTier.cpp + \brief Handle profile, level and tier information. +*/ + + +#include "ProfileLevelTier.h" +#include "CommonLib/Slice.h" +#include <math.h> + +uint32_t +LevelTierFeatures::getMaxPicWidthInLumaSamples() const +{ + return uint32_t(sqrt(maxLumaPs*8.0)); +} + +uint32_t +LevelTierFeatures::getMaxPicHeightInLumaSamples() const +{ + return uint32_t(sqrt(maxLumaPs*8.0)); +} + +static const uint64_t MAX_CNFUINT64 = std::numeric_limits<uint64_t>::max(); + +static const LevelTierFeatures mainLevelTierInfo[] = +{ + // level , maxlumaps, maxcpb[tier],, maxSlice, tile rows, cols, maxLumaSr, maxBr[tier],, , minCr[tier],, + { Level::LEVEL1 , 36864, { 350, 0 }, 16, 1, 1, 552960ULL, { 128, 0 }, { 2, 2} }, + { Level::LEVEL2 , 122880, { 1500, 0 }, 16, 1, 1, 3686400ULL, { 1500, 0 }, { 2, 2} }, + { Level::LEVEL2_1, 245760, { 3000, 0 }, 20, 1, 1, 7372800ULL, { 3000, 0 }, { 2, 2} }, + { Level::LEVEL3 , 552960, { 6000, 0 }, 30, 2, 2, 16588800ULL, { 6000, 0 }, { 2, 2} }, + { Level::LEVEL3_1, 983040, { 10000, 0 }, 40, 3, 3, 33177600ULL, { 10000, 0 }, { 2, 2} }, + { Level::LEVEL4 , 2228224, { 12000, 30000 }, 75, 5, 5, 66846720ULL, { 12000, 30000 }, { 4, 4} }, + { Level::LEVEL4_1, 2228224, { 20000, 50000 }, 75, 5, 5, 133693440ULL, { 20000, 50000 }, { 4, 4} }, + { Level::LEVEL5 , 8912896, { 25000, 100000 }, 200, 11, 10, 267386880ULL, { 25000, 100000 }, { 6, 4} }, + { Level::LEVEL5_1, 8912896, { 40000, 160000 }, 200, 11, 10, 534773760ULL, { 40000, 160000 }, { 8, 4} }, + { Level::LEVEL5_2, 8912896, { 60000, 240000 }, 200, 11, 10, 1069547520ULL, { 60000, 240000 }, { 8, 4} }, + { Level::LEVEL6 , 35651584, { 60000, 240000 }, 600, 22, 20, 1069547520ULL, { 60000, 240000 }, { 8, 4} }, + { Level::LEVEL6_1, 35651584, { 120000, 480000 }, 600, 22, 20, 2139095040ULL, { 120000, 480000 }, { 8, 4} }, + { Level::LEVEL6_2, 35651584, { 240000, 800000 }, 600, 22, 20, 4278190080ULL, { 240000, 800000 }, { 6, 4} }, + { Level::LEVEL8_5, MAX_UINT, { MAX_UINT, MAX_UINT }, MAX_UINT, MAX_UINT, MAX_UINT, MAX_CNFUINT64, {MAX_UINT, MAX_UINT }, { 0, 0} }, + { Level::NONE } +}; + +static const ProfileFeatures validProfiles[] = +{ // profile, pNameString, maxBitDepth, maxChrFmt, lvl8.5, cpbvcl, cpbnal, fcf*1000, mincr*10, levelInfo + { Profile::MAIN_10, "Main_10", 10, CHROMA_420, false, 1000, 1100, 1875, 10 , mainLevelTierInfo }, + { Profile::MAIN_444_10, "Main_444_10", 10, CHROMA_444, false, 2500, 2750, 3750, 5 , mainLevelTierInfo }, + { Profile::NONE, 0 } +}; + +void +ProfileLevelTierFeatures::extractPTLInformation(const SPS &sps) +{ + const ProfileTierLevel &spsPtl =*(sps.getProfileTierLevel()); + + m_tier = spsPtl.getTierFlag(); + + // Identify the profile from the profile Idc, and possibly other constraints. + for(int32_t i=0; validProfiles[i].profile != Profile::NONE; i++) + { + if (spsPtl.getProfileIdc() == validProfiles[i].profile) + { + m_pProfile = &(validProfiles[i]); + break; + } + } + + if (m_pProfile != 0) + { + // Now identify the level: + const LevelTierFeatures *pLTF = m_pProfile->pLevelTiersListInfo; + const Level::Name spsLevelName = spsPtl.getLevelIdc(); + if (spsLevelName!=Level::LEVEL8_5 || m_pProfile->canUseLevel8p5) + { + for(int i=0; pLTF[i].level!=Level::NONE; i++) + { + if (pLTF[i].level == spsLevelName) + { + m_pLevelTier = &(pLTF[i]); + } + } + } + } +} + +double ProfileLevelTierFeatures::getMinCr() const +{ + return (m_pLevelTier!=0 && m_pProfile!=0) ? (m_pProfile->minCrScaleFactorx10 * m_pLevelTier->minCrBase[m_tier?1:0])/10.0 : 0.0 ; +} + +uint64_t ProfileLevelTierFeatures::getCpbSizeInBits() const +{ + return (m_pLevelTier!=0 && m_pProfile!=0) ? uint64_t(m_pProfile->cpbVclFactor) * m_pLevelTier->maxCpb[m_tier?1:0] : uint64_t(0); +} diff --git a/source/Lib/CommonLib/ProfileLevelTier.h b/source/Lib/CommonLib/ProfileLevelTier.h new file mode 100644 index 0000000000000000000000000000000000000000..6cd54107eff7b090a2e2b7685c8b70f5ac7ac592 --- /dev/null +++ b/source/Lib/CommonLib/ProfileLevelTier.h @@ -0,0 +1,102 @@ +/* The copyright in this software is being made available under the BSD + * License, included below. This software may be subject to other third party + * and contributor rights, including patent rights, and no such rights are + * granted under this license. + * + * Copyright (c) 2010-2020, ITU/ISO/IEC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * * Neither the name of the ITU/ISO/IEC nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file ProfileLevelTier.h + \brief Handle profile, level and tier information. +*/ + +#ifndef __PROFILE_LEVEL_TIER__ +#define __PROFILE_LEVEL_TIER__ + +#if _MSC_VER > 1000 +#pragma once +#endif // _MSC_VER > 1000 + +#include "CommonLib/CommonDef.h" +#include <stdint.h> + +class SPS; // Forward declaration. + +struct LevelTierFeatures +{ + Level::Name level; + uint32_t maxLumaPs; + uint32_t maxCpb[Level::NUMBER_OF_TIERS]; // in units of CpbVclFactor or CpbNalFactor bits + uint32_t maxSliceSegmentsPerPicture; + uint32_t maxTileRows; + uint32_t maxTileCols; + uint64_t maxLumaSr; + uint32_t maxBr[Level::NUMBER_OF_TIERS]; // in units of BrVclFactor or BrNalFactor bits/s + uint32_t minCrBase[Level::NUMBER_OF_TIERS]; + uint32_t getMaxPicWidthInLumaSamples() const; + uint32_t getMaxPicHeightInLumaSamples() const; +}; + + +struct ProfileFeatures +{ + Profile::Name profile; + const char *pNameString; + uint32_t maxBitDepth; + ChromaFormat maxChromaFormat; + + bool canUseLevel8p5; + uint32_t cpbVclFactor; + uint32_t cpbNalFactor; + uint32_t formatCapabilityFactorx1000; + uint32_t minCrScaleFactorx10; + const LevelTierFeatures *pLevelTiersListInfo; +}; + + +class ProfileLevelTierFeatures +{ + private: + const ProfileFeatures *m_pProfile; + const LevelTierFeatures *m_pLevelTier; + Level::Tier m_tier; + public: + ProfileLevelTierFeatures() : m_pProfile(0), m_pLevelTier(0), m_tier(Level::MAIN) { } + + void extractPTLInformation(const SPS &sps); + + const ProfileFeatures *getProfileFeatures() const { return m_pProfile; } + const LevelTierFeatures *getLevelTierFeatures() const { return m_pLevelTier; } + Level::Tier getTier() const { return m_tier; } + uint64_t getCpbSizeInBits() const; + double getMinCr() const; +}; + + +#endif + diff --git a/source/Lib/CommonLib/Quant.cpp b/source/Lib/CommonLib/Quant.cpp index 04f9edb9eae2d7e52507a5be9c04476cabf2eabf..b46a46267666f9d44f13ca1d62e4ffd17e21f79a 100644 --- a/source/Lib/CommonLib/Quant.cpp +++ b/source/Lib/CommonLib/Quant.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -63,37 +63,39 @@ // ==================================================================================================================== QpParam::QpParam(const int qpy, - const ChannelType chType, + const ComponentID compID, const int qpBdOffset, + const int minQpPrimeTsMinus4, const int chromaQPOffset, const ChromaFormat chFmt, - const int dqp ) + const int dqp + , const SPS *sps +) { int baseQp; - - if(isLuma(chType)) + if (isLuma(compID)) { baseQp = qpy + qpBdOffset; } else { - baseQp = Clip3( -qpBdOffset, (chromaQPMappingTableSize - 1), qpy + chromaQPOffset ); - - if(baseQp < 0) - { - baseQp = baseQp + qpBdOffset; - } - else - { - baseQp = getScaledChromaQP(baseQp, chFmt) + qpBdOffset; - } + int qpi = Clip3(-qpBdOffset, MAX_QP, qpy); + baseQp = sps->getMappedChromaQpValue(compID, qpi); + baseQp = Clip3(-qpBdOffset, MAX_QP, baseQp + chromaQPOffset) + qpBdOffset; } baseQp = Clip3( 0, MAX_QP+qpBdOffset, baseQp + dqp ); - Qp =baseQp; - per=baseQp/6; - rem=baseQp%6; + Qps[0] =baseQp; + pers[0]=baseQp/6; + rems[0]=baseQp%6; + + int baseQpTS = baseQp; + baseQpTS = std::max(baseQpTS, 4 + minQpPrimeTsMinus4); + + Qps[1] = baseQpTS; + pers[1] = baseQpTS / 6; + rems[1] = baseQpTS % 6; } QpParam::QpParam(const TransformUnit& tu, const ComponentID &compIDX, const int QP /*= -MAX_INT*/) @@ -103,18 +105,18 @@ QpParam::QpParam(const TransformUnit& tu, const ComponentID &compIDX, const int if (isChroma(compID)) { - chromaQpOffset += tu.cs->pps->getQpOffset( compID ); - chromaQpOffset += tu.cs->slice->getSliceChromaQpDelta( compID ); - chromaQpOffset += tu.cs->pps->getPpsRangeExtension().getChromaQpOffsetListEntry( tu.cu->chromaQpAdj ).u.offset[int( compID ) - 1]; + const bool useJQP = ( abs(TU::getICTMode(tu)) == 2 ); + + chromaQpOffset += tu.cs->pps->getQpOffset ( useJQP ? JOINT_CbCr : compID ); + chromaQpOffset += tu.cu->slice->getSliceChromaQpDelta( useJQP ? JOINT_CbCr : compID ); + + chromaQpOffset += tu.cs->pps->getChromaQpOffsetListEntry( tu.cu->chromaQpAdj ).u.offset[int( useJQP ? JOINT_CbCr : compID ) - 1]; } -#if HM_QTBT_AS_IN_JEM_QUANT int dqp = 0; -#else - int dqp = ( TU::needsQP3Offset(tu, compID) ? -3 : 0 ); -#endif - *this = QpParam(QP <= -MAX_INT ? tu.cu->qp : QP, toChannelType(compID), tu.cs->sps->getQpBDOffset(toChannelType(compID)), chromaQpOffset, tu.chromaFormat, dqp); + const bool useJQP = isChroma(compID) && (abs(TU::getICTMode(tu)) == 2); + *this = QpParam(QP <= -MAX_INT ? tu.cu->qp : QP, useJQP ? JOINT_CbCr : compID, tu.cs->sps->getQpBDOffset(toChannelType(compID)), tu.cs->sps->getMinQpPrimeTsMinus4(toChannelType(compID)), chromaQpOffset, tu.chromaFormat, dqp, tu.cs->sps); } @@ -124,20 +126,93 @@ QpParam::QpParam(const TransformUnit& tu, const ComponentID &compIDX, const int Quant::Quant( const Quant* other ) { -#if HEVC_USE_SCALING_LISTS xInitScalingList( other ); -#endif } Quant::~Quant() { -#if HEVC_USE_SCALING_LISTS xDestroyScalingList(); -#endif } +void invResDPCM( const TransformUnit &tu, const ComponentID &compID, CoeffBuf &dstBuf ) +{ + const CompArea &rect = tu.blocks[compID]; + const int wdt = rect.width; + const int hgt = rect.height; + const CCoeffBuf coeffs = tu.getCoeffs(compID); + + const int maxLog2TrDynamicRange = tu.cs->sps->getMaxLog2TrDynamicRange(toChannelType(compID)); + const TCoeff inputMinimum = -(1 << maxLog2TrDynamicRange); + const TCoeff inputMaximum = (1 << maxLog2TrDynamicRange) - 1; + + const TCoeff* coef = &coeffs.buf[0]; + TCoeff* dst = &dstBuf.buf[0]; + + if( isLuma(compID) ? tu.cu->bdpcmMode == 1 : tu.cu->bdpcmModeChroma == 1) + { + for( int y = 0; y < hgt; y++ ) + { + dst[0] = coef[0]; + for( int x = 1; x < wdt; x++ ) + { + dst[x] = Clip3(inputMinimum, inputMaximum, dst[x - 1] + coef[x]); + } + coef += coeffs.stride; + dst += dstBuf.stride; + } + } + else + { + for( int x = 0; x < wdt; x++ ) + { + dst[x] = coef[x]; + } + for( int y = 0; y < hgt - 1; y++ ) + { + for( int x = 0; x < wdt; x++ ) + { + dst[dstBuf.stride + x] = Clip3(inputMinimum, inputMaximum, dst[x] + coef[coeffs.stride + x]); + } + coef += coeffs.stride; + dst += dstBuf.stride; + } + } +} + +void fwdResDPCM( TransformUnit &tu, const ComponentID &compID ) +{ + const CompArea &rect = tu.blocks[compID]; + const int wdt = rect.width; + const int hgt = rect.height; + CoeffBuf coeffs = tu.getCoeffs(compID); + + TCoeff* coef = &coeffs.buf[0]; + + if( isLuma(compID) ? tu.cu->bdpcmMode == 1 : tu.cu->bdpcmModeChroma == 1) + { + for( int y = 0; y < hgt; y++ ) + { + for( int x = wdt - 1; x > 0; x-- ) + { + coef[x] -= coef[x - 1]; + } + coef += coeffs.stride; + } + } + else + { + coef += coeffs.stride * (hgt - 1); + for( int y = 0; y < hgt - 1; y++ ) + { + for ( int x = 0; x < wdt; x++ ) + { + coef[x] -= coef[x - coeffs.stride]; + } + coef -= coeffs.stride; + } + } +} -#if HEVC_USE_SIGN_HIDING // To minimize the distortion only. No rate is considered. void Quant::xSignBitHidingHDQ( TCoeff* pQCoef, const TCoeff* pCoef, TCoeff* deltaU, const CoeffCodingContext& cctx, const int maxLog2TrDynamicRange ) { @@ -272,7 +347,6 @@ void Quant::xSignBitHidingHDQ( TCoeff* pQCoef, const TCoeff* pCoef, TCoeff* delt return; } -#endif void Quant::dequant(const TransformUnit &tu, CoeffBuf &dstCoeff, @@ -283,48 +357,43 @@ void Quant::dequant(const TransformUnit &tu, const CompArea &area = tu.blocks[compID]; const uint32_t uiWidth = area.width; const uint32_t uiHeight = area.height; - const TCoeff *const piQCoef = tu.getCoeffs(compID).buf; TCoeff *const piCoef = dstCoeff.buf; const uint32_t numSamplesInBlock = uiWidth * uiHeight; const int maxLog2TrDynamicRange = sps->getMaxLog2TrDynamicRange(toChannelType(compID)); const TCoeff transformMinimum = -(1 << maxLog2TrDynamicRange); const TCoeff transformMaximum = (1 << maxLog2TrDynamicRange) - 1; -#if HEVC_USE_SCALING_LISTS - const bool enableScalingLists = getUseScalingList(uiWidth, uiHeight, (tu.transformSkip[compID] != 0)); + const bool isTransformSkip = (tu.mtsIdx[compID] == MTS_SKIP); + + const bool disableSMForLFNST = tu.cs->picHeader->getScalingListPresentFlag() ? tu.cs->picHeader->getScalingListAPS()->getScalingList().getDisableScalingMatrixForLfnstBlks() : false; + const bool enableScalingLists = getUseScalingList(uiWidth, uiHeight, isTransformSkip, tu.cu->lfnstIdx > 0, disableSMForLFNST); const int scalingListType = getScalingListType(tu.cu->predMode, compID); -#endif const int channelBitDepth = sps->getBitDepth(toChannelType(compID)); -#if HEVC_USE_SCALING_LISTS + const TCoeff *coef; + if ((tu.cu->bdpcmMode && isLuma(compID)) || ( tu.cu->bdpcmModeChroma && isChroma(compID) )) + { + invResDPCM( tu, compID, dstCoeff ); + coef = piCoef; + } + else + { + coef = tu.getCoeffs(compID).buf; + } + const TCoeff *const piQCoef = coef; CHECK(scalingListType >= SCALING_LIST_NUM, "Invalid scaling list"); -#endif CHECK(uiWidth > m_uiMaxTrSize, "Unsupported transformation size"); // Represents scaling through forward transform - const bool bClipTransformShiftTo0 = tu.mtsIdx!=1 && sps->getSpsRangeExtension().getExtendedPrecisionProcessingFlag(); + const bool bClipTransformShiftTo0 = tu.mtsIdx[compID] != MTS_SKIP && sps->getSpsRangeExtension().getExtendedPrecisionProcessingFlag(); const int originalTransformShift = getTransformShift(channelBitDepth, area.size(), maxLog2TrDynamicRange); - const int iTransformShift = bClipTransformShiftTo0 ? std::max<int>(0, originalTransformShift) : originalTransformShift; - - const int QP_per = cQP.per; - const int QP_rem = cQP.rem; - -#if HM_QTBT_AS_IN_JEM_QUANT - const bool needsScalingCorrection = TU::needsBlockSizeTrafoScale( tu, compID ); - const int NEScale = TU::needsSqrt2Scale( tu, compID ) ? 181 : 1; -#if HEVC_USE_SCALING_LISTS - const int rightShift = (needsScalingCorrection ? 8 : 0 ) + (IQUANT_SHIFT - (iTransformShift + QP_per)) + (enableScalingLists ? LOG2_SCALING_LIST_NEUTRAL_VALUE : 0); -#else - const int rightShift = (needsScalingCorrection ? 8 : 0 ) + (IQUANT_SHIFT - (iTransformShift + QP_per)); -#endif -#else -#if HEVC_USE_SCALING_LISTS - const int rightShift = (IQUANT_SHIFT - (iTransformShift + QP_per)) + (enableScalingLists ? LOG2_SCALING_LIST_NEUTRAL_VALUE : 0); -#else - const int rightShift = (IQUANT_SHIFT - (iTransformShift + QP_per)); -#endif -#endif + const bool needSqrtAdjustment = TU::needsBlockSizeTrafoScale( tu, compID ); + const int iTransformShift = (bClipTransformShiftTo0 ? std::max<int>(0, originalTransformShift) : originalTransformShift) + (needSqrtAdjustment?-1:0); + + const int QP_per = cQP.per(isTransformSkip); + const int QP_rem = cQP.rem(isTransformSkip); + + const int rightShift = (IQUANT_SHIFT - ((isTransformSkip ? 0 : iTransformShift) + QP_per)) + (enableScalingLists ? LOG2_SCALING_LIST_NEUTRAL_VALUE : 0); -#if HEVC_USE_SCALING_LISTS if(enableScalingLists) { //from the dequantization equation: @@ -336,9 +405,9 @@ void Quant::dequant(const TransformUnit &tu, const Intermediate_Int inputMinimum = -(1 << (targetInputBitDepth - 1)); const Intermediate_Int inputMaximum = (1 << (targetInputBitDepth - 1)) - 1; - const uint32_t uiLog2TrWidth = g_aucLog2[uiWidth]; - const uint32_t uiLog2TrHeight = g_aucLog2[uiHeight]; - int *piDequantCoef = getDequantCoeff(scalingListType, QP_rem, uiLog2TrWidth - 1, uiLog2TrHeight - 1); + const uint32_t uiLog2TrWidth = floorLog2(uiWidth); + const uint32_t uiLog2TrHeight = floorLog2(uiHeight); + int *piDequantCoef = getDequantCoeff(scalingListType, QP_rem, uiLog2TrWidth, uiLog2TrHeight); if(rightShift > 0) { @@ -347,11 +416,7 @@ void Quant::dequant(const TransformUnit &tu, for( int n = 0; n < numSamplesInBlock; n++ ) { const TCoeff clipQCoef = TCoeff(Clip3<Intermediate_Int>(inputMinimum, inputMaximum, piQCoef[n])); -#if HM_QTBT_AS_IN_JEM_QUANT - const Intermediate_Int iCoeffQ = ((Intermediate_Int(clipQCoef) * piDequantCoef[n] * NEScale) + iAdd ) >> rightShift; -#else const Intermediate_Int iCoeffQ = ((Intermediate_Int(clipQCoef) * piDequantCoef[n]) + iAdd ) >> rightShift; -#endif piCoef[n] = TCoeff(Clip3<Intermediate_Int>(transformMinimum,transformMaximum,iCoeffQ)); } @@ -363,11 +428,7 @@ void Quant::dequant(const TransformUnit &tu, for( int n = 0; n < numSamplesInBlock; n++ ) { const TCoeff clipQCoef = TCoeff(Clip3<Intermediate_Int>(inputMinimum, inputMaximum, piQCoef[n])); -#if HM_QTBT_AS_IN_JEM_QUANT - const Intermediate_Int iCoeffQ = (Intermediate_Int(clipQCoef) * piDequantCoef[n] * NEScale) << leftShift; -#else const Intermediate_Int iCoeffQ = (Intermediate_Int(clipQCoef) * piDequantCoef[n]) << leftShift; -#endif piCoef[n] = TCoeff(Clip3<Intermediate_Int>(transformMinimum,transformMaximum,iCoeffQ)); } @@ -375,12 +436,7 @@ void Quant::dequant(const TransformUnit &tu, } else { -#endif -#if HM_QTBT_AS_IN_JEM_QUANT - const int scale = g_invQuantScales[QP_rem] * NEScale; -#else - const int scale = g_invQuantScales[QP_rem]; -#endif + const int scale = g_invQuantScales[needSqrtAdjustment?1:0][QP_rem]; const int scaleBits = ( IQUANT_SHIFT + 1 ); //from the dequantisation equation: @@ -414,9 +470,7 @@ void Quant::dequant(const TransformUnit &tu, piCoef[n] = TCoeff(Clip3<Intermediate_Int>(transformMinimum,transformMaximum,iCoeffQ)); } } -#if HEVC_USE_SCALING_LISTS } -#endif } void Quant::init( uint32_t uiMaxTrSize, @@ -437,6 +491,7 @@ void Quant::init( uint32_t uiMaxTrSize, #if T0196_SELECTIVE_RDOQ m_useSelectiveRDOQ = useSelectiveRDOQ; #endif + m_resetStore = true; } #if ENABLE_SPLIT_PARALLELISM @@ -447,7 +502,6 @@ void Quant::copyState( const Quant& other ) } #endif -#if HEVC_USE_SCALING_LISTS /** set quantized matrix coefficient for encode * \param scalingList quantized matrix address * \param format chroma format @@ -459,14 +513,39 @@ void Quant::setScalingList(ScalingList *scalingList, const int maxLog2TrDynamicR const int minimumQp = 0; const int maximumQp = SCALING_LIST_REM_NUM; - for(uint32_t size = 0; size < SCALING_LIST_SIZE_NUM; size++) + int scalingListId = 0; + int recScalingListId = 0; + for(uint32_t size = SCALING_LIST_FIRST_CODED; size <= SCALING_LIST_LAST_CODED; size++) //2x2->64x64 { for(uint32_t list = 0; list < SCALING_LIST_NUM; list++) { + if ((size == SCALING_LIST_2x2 && list < 4) || (size == SCALING_LIST_64x64 && list % (SCALING_LIST_NUM / SCALING_LIST_PRED_MODES) != 0)) // skip 2x2 luma + continue; for(int qp = minimumQp; qp < maximumQp; qp++) { - xSetScalingListEnc(scalingList,list,size,qp); - xSetScalingListDec(*scalingList,list,size,qp); + xSetScalingListEnc(scalingList, list, size, qp, scalingListId); + xSetScalingListDec(*scalingList, list, size, qp, scalingListId); + } + scalingListId++; + } + } + //based on square result and apply downsample technology + for (uint32_t sizew = 0; sizew <= SCALING_LIST_LAST_CODED; sizew++) //7 + { + for (uint32_t sizeh = 0; sizeh <= SCALING_LIST_LAST_CODED; sizeh++) //7 + { + if (sizew == sizeh || (sizew == SCALING_LIST_1x1 && sizeh<SCALING_LIST_4x4) || (sizeh == SCALING_LIST_1x1 && sizew<SCALING_LIST_4x4)) continue; + for (uint32_t list = 0; list < SCALING_LIST_NUM; list++) //9 + { + int largerSide = (sizew > sizeh) ? sizew : sizeh; + if (largerSide == SCALING_LIST_64x64 && list % (SCALING_LIST_NUM / SCALING_LIST_PRED_MODES) != 0) continue; + if (largerSide < SCALING_LIST_4x4) printf("Rectangle Error !\n"); + recScalingListId = SCALING_LIST_NUM * (largerSide - 2) + 2 + (list / ((largerSide == SCALING_LIST_64x64) ? 3 : 1)); + for (int qp = minimumQp; qp < maximumQp; qp++) + { + xSetRecScalingListEnc(scalingList, list, sizew, sizeh, qp, recScalingListId); + xSetRecScalingListDec(*scalingList, list, sizew, sizeh, qp, recScalingListId); + } } } } @@ -480,13 +559,38 @@ void Quant::setScalingListDec(const ScalingList &scalingList) const int minimumQp = 0; const int maximumQp = SCALING_LIST_REM_NUM; - for(uint32_t size = 0; size < SCALING_LIST_SIZE_NUM; size++) + int scalingListId = 0; + int recScalingListId = 0; + for (uint32_t size = SCALING_LIST_FIRST_CODED; size <= SCALING_LIST_LAST_CODED; size++) { for(uint32_t list = 0; list < SCALING_LIST_NUM; list++) { + if ((size == SCALING_LIST_2x2 && list < 4) || (size == SCALING_LIST_64x64 && list % (SCALING_LIST_NUM / SCALING_LIST_PRED_MODES) != 0)) // skip 2x2 luma + continue; for(int qp = minimumQp; qp < maximumQp; qp++) { - xSetScalingListDec(scalingList,list,size,qp); + xSetScalingListDec(scalingList, list, size, qp, scalingListId); + } + scalingListId++; + } + } + //based on square result and apply downsample technology + //based on square result and apply downsample technology + for (uint32_t sizew = 0; sizew <= SCALING_LIST_LAST_CODED; sizew++) //7 + { + for (uint32_t sizeh = 0; sizeh <= SCALING_LIST_LAST_CODED; sizeh++) //7 + { + if (sizew == sizeh || (sizew == SCALING_LIST_1x1 && sizeh<SCALING_LIST_4x4) || (sizeh == SCALING_LIST_1x1 && sizew<SCALING_LIST_4x4)) continue; + for (uint32_t list = 0; list < SCALING_LIST_NUM; list++) //9 + { + int largerSide = (sizew > sizeh) ? sizew : sizeh; + if (largerSide == SCALING_LIST_64x64 && list % (SCALING_LIST_NUM / SCALING_LIST_PRED_MODES) != 0) continue; + if (largerSide < SCALING_LIST_4x4) printf("Rectangle Error !\n"); + recScalingListId = SCALING_LIST_NUM * (largerSide - 2) + 2 + (list / ((largerSide == SCALING_LIST_64x64) ? 3 : 1)); + for (int qp = minimumQp; qp < maximumQp; qp++) + { + xSetRecScalingListDec(scalingList, list, sizew, sizeh, qp, recScalingListId); + } } } } @@ -500,23 +604,24 @@ void Quant::setScalingListDec(const ScalingList &scalingList) * \param qp Quantization parameter * \param format chroma format */ -void Quant::xSetScalingListEnc(ScalingList *scalingList, uint32_t listId, uint32_t sizeId, int qp) +void Quant::xSetScalingListEnc(ScalingList *scalingList, uint32_t listId, uint32_t sizeId, int qp, uint32_t scalingListId) { uint32_t width = g_scalingListSizeX[sizeId]; uint32_t height = g_scalingListSizeX[sizeId]; uint32_t ratio = g_scalingListSizeX[sizeId]/std::min(MAX_MATRIX_SIZE_NUM,(int)g_scalingListSizeX[sizeId]); int *quantcoeff; - int *coeff = scalingList->getScalingListAddress(sizeId,listId); + int *coeff = scalingList->getScalingListAddress(scalingListId); quantcoeff = getQuantCoeff(listId, qp, sizeId, sizeId); - int quantScales = g_quantScales[qp]; + const bool blockIsNotPowerOf4 = ((floorLog2(width) + floorLog2(height)) & 1) == 1; + int quantScales = g_quantScales[blockIsNotPowerOf4?1:0][qp]; processScalingListEnc(coeff, quantcoeff, (quantScales << LOG2_SCALING_LIST_NEUTRAL_VALUE), height, width, ratio, std::min(MAX_MATRIX_SIZE_NUM, (int)g_scalingListSizeX[sizeId]), - scalingList->getScalingListDC(sizeId,listId)); + scalingList->getScalingListDC(scalingListId)); } /** set quantized matrix coefficient for decode @@ -526,26 +631,81 @@ void Quant::xSetScalingListEnc(ScalingList *scalingList, uint32_t listId, uint32 * \param qp Quantization parameter * \param format chroma format */ -void Quant::xSetScalingListDec(const ScalingList &scalingList, uint32_t listId, uint32_t sizeId, int qp) +void Quant::xSetScalingListDec(const ScalingList &scalingList, uint32_t listId, uint32_t sizeId, int qp, uint32_t scalingListId) { uint32_t width = g_scalingListSizeX[sizeId]; uint32_t height = g_scalingListSizeX[sizeId]; uint32_t ratio = g_scalingListSizeX[sizeId]/std::min(MAX_MATRIX_SIZE_NUM,(int)g_scalingListSizeX[sizeId]); int *dequantcoeff; - const int *coeff = scalingList.getScalingListAddress(sizeId,listId); + const int *coeff = scalingList.getScalingListAddress(scalingListId); dequantcoeff = getDequantCoeff(listId, qp, sizeId, sizeId); - int invQuantScale = g_invQuantScales[qp]; + const bool blockIsNotPowerOf4 = ((floorLog2(width) + floorLog2(height)) & 1) == 1; + int invQuantScale = g_invQuantScales[blockIsNotPowerOf4?1:0][qp]; processScalingListDec(coeff, dequantcoeff, invQuantScale, height, width, ratio, std::min(MAX_MATRIX_SIZE_NUM, (int)g_scalingListSizeX[sizeId]), - scalingList.getScalingListDC(sizeId,listId)); + scalingList.getScalingListDC(scalingListId)); } +/** set quantized matrix coefficient for encode +* \param scalingList quantized matrix address +* \param listId List index +* \param sizeId size index +* \param qp Quantization parameter +* \param format chroma format +*/ +void Quant::xSetRecScalingListEnc(ScalingList *scalingList, uint32_t listId, uint32_t sizeIdw, uint32_t sizeIdh, int qp, uint32_t scalingListId) +{ + if (sizeIdw == sizeIdh) return; + + uint32_t width = g_scalingListSizeX[sizeIdw]; + uint32_t height = g_scalingListSizeX[sizeIdh]; + uint32_t largeSideId = (sizeIdw > sizeIdh) ? sizeIdw : sizeIdh; //16 + int *quantcoeff; + int *coeff = scalingList->getScalingListAddress(scalingListId);//4x4, 8x8 + quantcoeff = getQuantCoeff(listId, qp, sizeIdw, sizeIdh);//final quantCoeff (downsample) + const bool blockIsNotPowerOf4 = ((floorLog2(width) + floorLog2(height)) & 1) == 1; + int quantScales = g_quantScales[blockIsNotPowerOf4?1:0][qp]; + + processScalingListEnc(coeff, + quantcoeff, + (quantScales << LOG2_SCALING_LIST_NEUTRAL_VALUE), + height, width, + ((largeSideId>3) ? 2 : 1), + ((largeSideId >= 3) ? 8 : 4), + scalingList->getScalingListDC(scalingListId)); +} +/** set quantized matrix coefficient for decode +* \param scalingList quantaized matrix address +* \param listId List index +* \param sizeId size index +* \param qp Quantization parameter +* \param format chroma format +*/ +void Quant::xSetRecScalingListDec(const ScalingList &scalingList, uint32_t listId, uint32_t sizeIdw, uint32_t sizeIdh, int qp, uint32_t scalingListId) +{ + if (sizeIdw == sizeIdh) return; + uint32_t width = g_scalingListSizeX[sizeIdw]; + uint32_t height = g_scalingListSizeX[sizeIdh]; + uint32_t largeSideId = (sizeIdw > sizeIdh) ? sizeIdw : sizeIdh; //16 + + const int *coeff = scalingList.getScalingListAddress(scalingListId); + int *dequantcoeff; + dequantcoeff = getDequantCoeff(listId, qp, sizeIdw, sizeIdh); + const bool blockIsNotPowerOf4 = ((floorLog2(width) + floorLog2(height)) & 1) == 1; + int invQuantScale = g_invQuantScales[blockIsNotPowerOf4 ? 1 : 0][qp]; + processScalingListDec(coeff, + dequantcoeff, + invQuantScale, + height, width, (largeSideId>3) ? 2 : 1, + (largeSideId >= 3 ? 8 : 4), + scalingList.getScalingListDC(scalingListId)); +} /** set flat matrix value to quantized coefficient */ void Quant::setFlatScalingList(const int maxLog2TrDynamicRange[MAX_NUM_CHANNEL_TYPE], const BitDepths &bitDepths) @@ -580,8 +740,9 @@ void Quant::xSetFlatScalingList(uint32_t list, uint32_t sizeX, uint32_t sizeY, i int *quantcoeff; int *dequantcoeff; - int quantScales = g_quantScales [qp]; - int invQuantScales = g_invQuantScales[qp] << 4; + const bool blockIsNotPowerOf4 = ((floorLog2(g_scalingListSizeX[sizeX]) + floorLog2(g_scalingListSizeX[sizeY])) & 1) == 1; + int quantScales = g_quantScales [blockIsNotPowerOf4?1:0][qp]; + int invQuantScales = g_invQuantScales[blockIsNotPowerOf4?1:0][qp] << 4; quantcoeff = getQuantCoeff(list, qp, sizeX, sizeY); dequantcoeff = getDequantCoeff(list, qp, sizeX, sizeY); @@ -605,6 +766,37 @@ void Quant::xSetFlatScalingList(uint32_t list, uint32_t sizeX, uint32_t sizeY, i */ void Quant::processScalingListEnc( int *coeff, int *quantcoeff, int quantScales, uint32_t height, uint32_t width, uint32_t ratio, int sizuNum, uint32_t dc) { + if (height != width) + { + for (uint32_t j = 0; j<height; j++) + { + for (uint32_t i = 0; i<width; i++) + { + if (j >= JVET_C0024_ZERO_OUT_TH || i >= JVET_C0024_ZERO_OUT_TH) + { + quantcoeff[j*width + i] = 0; + continue; + } + int ratioWH = (height>width) ? height / width : width / height; + int ratioH = (height / sizuNum) ? (height / sizuNum) : (sizuNum / height); // 32/8 = 4 + int ratioW = (width / sizuNum) ? (width / sizuNum) : (sizuNum / width); //16/8 = 2 //sizeNum = 8/4 + if (height > width) + { + quantcoeff[j*width + i] = quantScales / coeff[sizuNum * (j / ratioH) + ((i * ratioWH) / ratioH)]; + } + else //ratioH < ratioW + { + quantcoeff[j*width + i] = quantScales / coeff[sizuNum * ((j * ratioWH) / ratioW) + (i / ratioW)]; + } + int largeOne = (width > height) ? width : height; + if (largeOne>8) + { + quantcoeff[0] = quantScales / dc; + } + } + } + return; + } for(uint32_t j=0;j<height;j++) { for(uint32_t i=0;i<width;i++) @@ -619,27 +811,63 @@ void Quant::processScalingListEnc( int *coeff, int *quantcoeff, int quantScales, } } -/** set quantized matrix coefficient for decode - * \param coeff quantaized matrix address - * \param dequantcoeff quantaized matrix address - * \param invQuantScales IQ(QP%6)) - * \param height height - * \param width width - * \param ratio ratio for upscale - * \param sizuNum matrix size - * \param dc dc parameter - */ -void Quant::processScalingListDec( const int *coeff, int *dequantcoeff, int invQuantScales, uint32_t height, uint32_t width, uint32_t ratio, int sizuNum, uint32_t dc) +void Quant::processScalingListDec( const int *coeff, int *dequantcoeff, int invQuantScales, uint32_t height, uint32_t width, uint32_t ratio, int sizeNum, uint32_t dc) { - for(uint32_t j=0;j<height;j++) + if (height != width) { - for(uint32_t i=0;i<width;i++) + int ratioWH = (height > width ) ? (height / width ) : (width / height); + int ratioH = (height / sizeNum) ? (height / sizeNum) : (sizeNum / height); + int ratioW = (width / sizeNum) ? (width / sizeNum) : (sizeNum / width ); + if (height > width) + { + for (uint32_t j = 0; j < height; j++) + { + int coeffLineSep = (j / ratioH) * sizeNum; + int dequantCoeffLineSep = j * width; + for (uint32_t i = 0; i < width; i++) + { + if (i >= JVET_C0024_ZERO_OUT_TH || j >= JVET_C0024_ZERO_OUT_TH) + { + dequantcoeff[dequantCoeffLineSep + i] = 0; + continue; + } + dequantcoeff[dequantCoeffLineSep + i] = invQuantScales * coeff[coeffLineSep + ((i * ratioWH) / ratioH)]; + } + } + } + else //ratioH < ratioW + { + for (uint32_t j = 0; j < height; j++) + { + int coeffLineSep = ((j * ratioWH) / ratioW) * sizeNum; + int dequantCoeffLineSep = j * width; + for (uint32_t i = 0; i < width; i++) + { + if (i >= JVET_C0024_ZERO_OUT_TH || j >= JVET_C0024_ZERO_OUT_TH) + { + dequantcoeff[dequantCoeffLineSep + i] = 0; + continue; + } + dequantcoeff[dequantCoeffLineSep + i] = invQuantScales * coeff[coeffLineSep + (i / ratioW)]; + } + } + } + int largeOne = (width > height) ? width : height; + if (largeOne > 8) + dequantcoeff[0] = invQuantScales * dc; + return; + } + for (uint32_t j = 0; j<height; j++) + { + int coeffLineSep = (j / ratio) * sizeNum; + int dequantCoeffLineSep = j * width; + for (uint32_t i = 0; i<width; i++) { - dequantcoeff[j*width + i] = invQuantScales * coeff[sizuNum * (j / ratio) + i / ratio]; + dequantcoeff[dequantCoeffLineSep + i] = invQuantScales * coeff[coeffLineSep + i / ratio]; } } - if(ratio > 1) + if (ratio > 1) { dequantcoeff[0] = invQuantScales * dc; } @@ -651,6 +879,29 @@ void Quant::xInitScalingList( const Quant* other ) { m_isScalingListOwner = other == nullptr; + size_t numElements = 0; + + for (uint32_t sizeIdX = 0; sizeIdX < SCALING_LIST_SIZE_NUM; sizeIdX++) + { + for (uint32_t sizeIdY = 0; sizeIdY < SCALING_LIST_SIZE_NUM; sizeIdY++) + { + for (uint32_t qp = 0; qp < SCALING_LIST_REM_NUM; qp++) + { + for (uint32_t listId = 0; listId < SCALING_LIST_NUM; listId++) + { + numElements += g_scalingListSizeX[sizeIdX] * g_scalingListSizeX[sizeIdY]; + } + } + } + } + + if (m_isScalingListOwner) + { + m_quantCoef[0][0][0][0] = new int[2 * numElements]; + } + + size_t offset = 0; + for(uint32_t sizeIdX = 0; sizeIdX < SCALING_LIST_SIZE_NUM; sizeIdX++) { for(uint32_t sizeIdY = 0; sizeIdY < SCALING_LIST_SIZE_NUM; sizeIdY++) @@ -661,8 +912,10 @@ void Quant::xInitScalingList( const Quant* other ) { if( m_isScalingListOwner ) { - m_quantCoef [sizeIdX][sizeIdY][listId][qp] = new int [g_scalingListSizeX[sizeIdX]*g_scalingListSizeX[sizeIdY]]; - m_dequantCoef [sizeIdX][sizeIdY][listId][qp] = new int [g_scalingListSizeX[sizeIdX]*g_scalingListSizeX[sizeIdY]]; + m_quantCoef[sizeIdX][sizeIdY][listId][qp] = m_quantCoef[0][0][0][0] + offset; + offset += g_scalingListSizeX[sizeIdX] * g_scalingListSizeX[sizeIdY]; + m_dequantCoef[sizeIdX][sizeIdY][listId][qp] = m_quantCoef[0][0][0][0] + offset; + offset += g_scalingListSizeX[sizeIdX] * g_scalingListSizeX[sizeIdY]; } else { @@ -673,6 +926,8 @@ void Quant::xInitScalingList( const Quant* other ) } } } + + m_pairCheck = 0; } /** destroy quantization matrix array @@ -681,129 +936,86 @@ void Quant::xDestroyScalingList() { if( !m_isScalingListOwner ) return; - for(uint32_t sizeIdX = 0; sizeIdX < SCALING_LIST_SIZE_NUM; sizeIdX++) - { - for(uint32_t sizeIdY = 0; sizeIdY < SCALING_LIST_SIZE_NUM; sizeIdY++) - { - for(uint32_t listId = 0; listId < SCALING_LIST_NUM; listId++) - { - for(uint32_t qp = 0; qp < SCALING_LIST_REM_NUM; qp++) - { - if(m_quantCoef[sizeIdX][sizeIdY][listId][qp]) - { - delete [] m_quantCoef[sizeIdX][sizeIdY][listId][qp]; - } - if(m_dequantCoef[sizeIdX][sizeIdY][listId][qp]) - { - delete [] m_dequantCoef[sizeIdX][sizeIdY][listId][qp]; - } - } - } - } - } + delete[] m_quantCoef[0][0][0][0]; } -#endif void Quant::quant(TransformUnit &tu, const ComponentID &compID, const CCoeffBuf &pSrc, TCoeff &uiAbsSum, const QpParam &cQP, const Ctx& ctx) { const SPS &sps = *tu.cs->sps; const CompArea &rect = tu.blocks[compID]; -#if HEVC_USE_SCALING_LISTS || HEVC_USE_SIGN_HIDING const uint32_t uiWidth = rect.width; const uint32_t uiHeight = rect.height; -#endif const int channelBitDepth = sps.getBitDepth(toChannelType(compID)); const CCoeffBuf &piCoef = pSrc; CoeffBuf piQCoef = tu.getCoeffs(compID); - const bool useTransformSkip = tu.mtsIdx==1; + const bool useTransformSkip = (tu.mtsIdx[compID] == MTS_SKIP); const int maxLog2TrDynamicRange = sps.getMaxLog2TrDynamicRange(toChannelType(compID)); { -#if HEVC_USE_SIGN_HIDING - CoeffCodingContext cctx(tu, compID, tu.cs->slice->getSignDataHidingEnabledFlag()); -#else - CoeffCodingContext cctx(tu, compID); -#endif + CoeffCodingContext cctx(tu, compID, tu.cs->picHeader->getSignDataHidingEnabledFlag()); const TCoeff entropyCodingMinimum = -(1 << maxLog2TrDynamicRange); const TCoeff entropyCodingMaximum = (1 << maxLog2TrDynamicRange) - 1; -#if HEVC_USE_SIGN_HIDING TCoeff deltaU[MAX_TB_SIZEY * MAX_TB_SIZEY]; -#endif -#if HEVC_USE_SCALING_LISTS int scalingListType = getScalingListType(tu.cu->predMode, compID); CHECK(scalingListType >= SCALING_LIST_NUM, "Invalid scaling list"); - const uint32_t uiLog2TrWidth = g_aucLog2[uiWidth]; - const uint32_t uiLog2TrHeight = g_aucLog2[uiHeight]; - int *piQuantCoeff = getQuantCoeff(scalingListType, cQP.rem, uiLog2TrWidth-1, uiLog2TrHeight-1); + const uint32_t uiLog2TrWidth = floorLog2(uiWidth); + const uint32_t uiLog2TrHeight = floorLog2(uiHeight); + int *piQuantCoeff = getQuantCoeff(scalingListType, cQP.rem(useTransformSkip), uiLog2TrWidth, uiLog2TrHeight); - const bool enableScalingLists = getUseScalingList(uiWidth, uiHeight, useTransformSkip); -#endif - const int defaultQuantisationCoefficient = g_quantScales[cQP.rem]; + const bool disableSMForLFNST = tu.cs->picHeader->getScalingListPresentFlag() ? tu.cs->picHeader->getScalingListAPS()->getScalingList().getDisableScalingMatrixForLfnstBlks() : false; + const bool enableScalingLists = getUseScalingList(uiWidth, uiHeight, useTransformSkip, tu.cu->lfnstIdx > 0, disableSMForLFNST); - /* for 422 chroma blocks, the effective scaling applied during transformation is not a power of 2, hence it cannot be - * implemented as a bit-shift (the quantised result will be sqrt(2) * larger than required). Alternatively, adjust the - * uiLog2TrSize applied in iTransformShift, such that the result is 1/sqrt(2) the required result (i.e. smaller) - * Then a QP+3 (sqrt(2)) or QP-3 (1/sqrt(2)) method could be used to get the required result - */ - // Represents scaling through forward transform - int iTransformShift = getTransformShift(channelBitDepth, rect.size(), maxLog2TrDynamicRange); + // for blocks that where width*height != 4^N, the effective scaling applied during transformation cannot be + // compensated by a bit-shift (the quantised result will be sqrt(2) * larger than required). + // The quantScale table and shift is used to compensate for this. + const bool needSqrtAdjustment= TU::needsBlockSizeTrafoScale( tu, compID ); + const int defaultQuantisationCoefficient = g_quantScales[needSqrtAdjustment?1:0][cQP.rem(useTransformSkip)]; + int iTransformShift = getTransformShift(channelBitDepth, rect.size(), maxLog2TrDynamicRange) + ( needSqrtAdjustment?-1:0); if (useTransformSkip && sps.getSpsRangeExtension().getExtendedPrecisionProcessingFlag()) { iTransformShift = std::max<int>(0, iTransformShift); } - int iWHScale = 1; -#if HM_QTBT_AS_IN_JEM_QUANT - if( TU::needsBlockSizeTrafoScale( tu, compID ) ) - { - iTransformShift += ADJ_QUANT_SHIFT; - iWHScale = 181; - } -#endif - - const int iQBits = QUANT_SHIFT + cQP.per + iTransformShift; + const int iQBits = QUANT_SHIFT + cQP.per(useTransformSkip) + (useTransformSkip ? 0 : iTransformShift); // QBits will be OK for any internal bit depth as the reduction in transform shift is balanced by an increase in Qp_per due to QpBDOffset const int64_t iAdd = int64_t(tu.cs->slice->isIRAP() ? 171 : 85) << int64_t(iQBits - 9); -#if HEVC_USE_SIGN_HIDING const int qBits8 = iQBits - 8; -#endif - for (int uiBlockPos = 0; uiBlockPos < piQCoef.area(); uiBlockPos++) + const uint32_t lfnstIdx = tu.cu->lfnstIdx; + const int maxNumberOfCoeffs = lfnstIdx > 0 ? ((( uiWidth == 4 && uiHeight == 4 ) || ( uiWidth == 8 && uiHeight == 8) ) ? 8 : 16) : piQCoef.area(); + memset( piQCoef.buf, 0, sizeof(TCoeff) * piQCoef.area() ); + for (int uiBlockPos = 0; uiBlockPos < maxNumberOfCoeffs; uiBlockPos++ ) { const TCoeff iLevel = piCoef.buf[uiBlockPos]; const TCoeff iSign = (iLevel < 0 ? -1: 1); -#if HEVC_USE_SCALING_LISTS const int64_t tmpLevel = (int64_t)abs(iLevel) * (enableScalingLists ? piQuantCoeff[uiBlockPos] : defaultQuantisationCoefficient); -#else - const int64_t tmpLevel = (int64_t)abs(iLevel) * defaultQuantisationCoefficient; -#endif - const TCoeff quantisedMagnitude = TCoeff((tmpLevel * iWHScale + iAdd ) >> iQBits); -#if HEVC_USE_SIGN_HIDING - deltaU[uiBlockPos] = (TCoeff)((tmpLevel * iWHScale - ((int64_t)quantisedMagnitude<<iQBits) )>> qBits8); -#endif + const TCoeff quantisedMagnitude = TCoeff((tmpLevel + iAdd ) >> iQBits); + deltaU[uiBlockPos] = (TCoeff)((tmpLevel - ((int64_t)quantisedMagnitude<<iQBits) )>> qBits8); uiAbsSum += quantisedMagnitude; const TCoeff quantisedCoefficient = quantisedMagnitude * iSign; piQCoef.buf[uiBlockPos] = Clip3<TCoeff>( entropyCodingMinimum, entropyCodingMaximum, quantisedCoefficient ); } // for n -#if HEVC_USE_SIGN_HIDING - if( cctx.signHiding() && uiWidth>=4 && uiHeight>=4 ) + if ((tu.cu->bdpcmMode && isLuma(compID)) || (tu.cu->bdpcmModeChroma && isChroma(compID)) ) + { + fwdResDPCM( tu, compID ); + } + if( cctx.signHiding() ) { if(uiAbsSum >= 2) //this prevents TUs with only one coefficient of value 1 from being tested { xSignBitHidingHDQ(piQCoef.buf, piCoef.buf, deltaU, cctx, maxLog2TrDynamicRange); } } -#endif } //if RDOQ //return; } @@ -812,53 +1024,42 @@ bool Quant::xNeedRDOQ(TransformUnit &tu, const ComponentID &compID, const CCoeff { const SPS &sps = *tu.cs->sps; const CompArea &rect = tu.blocks[compID]; -#if HEVC_USE_SCALING_LISTS const uint32_t uiWidth = rect.width; const uint32_t uiHeight = rect.height; -#endif const int channelBitDepth = sps.getBitDepth(toChannelType(compID)); const CCoeffBuf piCoef = pSrc; - const bool useTransformSkip = tu.mtsIdx==1; + const bool useTransformSkip = (tu.mtsIdx[compID] == MTS_SKIP); const int maxLog2TrDynamicRange = sps.getMaxLog2TrDynamicRange(toChannelType(compID)); -#if HEVC_USE_SCALING_LISTS int scalingListType = getScalingListType(tu.cu->predMode, compID); CHECK(scalingListType >= SCALING_LIST_NUM, "Invalid scaling list"); - const uint32_t uiLog2TrWidth = g_aucLog2[uiWidth]; - const uint32_t uiLog2TrHeight = g_aucLog2[uiHeight]; - int *piQuantCoeff = getQuantCoeff(scalingListType, cQP.rem, uiLog2TrWidth-1, uiLog2TrHeight-1); + const uint32_t uiLog2TrWidth = floorLog2(uiWidth); + const uint32_t uiLog2TrHeight = floorLog2(uiHeight); + int *piQuantCoeff = getQuantCoeff(scalingListType, cQP.rem(useTransformSkip), uiLog2TrWidth, uiLog2TrHeight); - const bool enableScalingLists = getUseScalingList(uiWidth, uiHeight, (useTransformSkip != 0)); -#endif - const int defaultQuantisationCoefficient = g_quantScales[cQP.rem]; + const bool disableSMForLFNST = tu.cs->picHeader->getScalingListPresentFlag() ? tu.cs->picHeader->getScalingListAPS()->getScalingList().getDisableScalingMatrixForLfnstBlks() : false; + const bool enableScalingLists = getUseScalingList(uiWidth, uiHeight, (useTransformSkip != 0), tu.cu->lfnstIdx > 0, disableSMForLFNST); /* for 422 chroma blocks, the effective scaling applied during transformation is not a power of 2, hence it cannot be * implemented as a bit-shift (the quantised result will be sqrt(2) * larger than required). Alternatively, adjust the * uiLog2TrSize applied in iTransformShift, such that the result is 1/sqrt(2) the required result (i.e. smaller) * Then a QP+3 (sqrt(2)) or QP-3 (1/sqrt(2)) method could be used to get the required result */ - - // Represents scaling through forward transform - int iTransformShift = getTransformShift(channelBitDepth, rect.size(), maxLog2TrDynamicRange); + const bool needSqrtAdjustment= TU::needsBlockSizeTrafoScale( tu, compID ); + const int defaultQuantisationCoefficient = g_quantScales[needSqrtAdjustment?1:0][cQP.rem(useTransformSkip)]; + int iTransformShift = getTransformShift(channelBitDepth, rect.size(), maxLog2TrDynamicRange) + (needSqrtAdjustment?-1:0); if (useTransformSkip && sps.getSpsRangeExtension().getExtendedPrecisionProcessingFlag()) { iTransformShift = std::max<int>(0, iTransformShift); } - int iWHScale = 1; -#if HM_QTBT_AS_IN_JEM_QUANT - if( TU::needsBlockSizeTrafoScale( tu, compID ) ) - { - iTransformShift += ADJ_QUANT_SHIFT; - iWHScale = 181; - } -#endif - const int iQBits = QUANT_SHIFT + cQP.per + iTransformShift; + const int iQBits = QUANT_SHIFT + cQP.per(useTransformSkip) + iTransformShift; + assert(iQBits>=0); // QBits will be OK for any internal bit depth as the reduction in transform shift is balanced by an increase in Qp_per due to QpBDOffset // iAdd is different from the iAdd used in normal quantization @@ -867,12 +1068,8 @@ bool Quant::xNeedRDOQ(TransformUnit &tu, const ComponentID &compID, const CCoeff for (int uiBlockPos = 0; uiBlockPos < rect.area(); uiBlockPos++) { const TCoeff iLevel = piCoef.buf[uiBlockPos]; -#if HEVC_USE_SCALING_LISTS const int64_t tmpLevel = (int64_t)abs(iLevel) * (enableScalingLists ? piQuantCoeff[uiBlockPos] : defaultQuantisationCoefficient); -#else - const int64_t tmpLevel = (int64_t)abs(iLevel) * defaultQuantisationCoefficient; -#endif - const TCoeff quantisedMagnitude = TCoeff((tmpLevel * iWHScale + iAdd ) >> iQBits); + const TCoeff quantisedMagnitude = TCoeff((tmpLevel + iAdd ) >> iQBits); if (quantisedMagnitude != 0) { @@ -887,34 +1084,29 @@ void Quant::transformSkipQuantOneSample(TransformUnit &tu, const ComponentID &co { const SPS &sps = *tu.cs->sps; const CompArea &rect = tu.blocks[compID]; -#if HEVC_USE_SCALING_LISTS const uint32_t uiWidth = rect.width; const uint32_t uiHeight = rect.height; -#endif const int maxLog2TrDynamicRange = sps.getMaxLog2TrDynamicRange(toChannelType(compID)); const int channelBitDepth = sps.getBitDepth(toChannelType(compID)); const int iTransformShift = getTransformShift(channelBitDepth, rect.size(), maxLog2TrDynamicRange); -#if HEVC_USE_SCALING_LISTS const int scalingListType = getScalingListType(tu.cu->predMode, compID); - const bool enableScalingLists = getUseScalingList(uiWidth, uiHeight, true); -#endif - const int defaultQuantisationCoefficient = g_quantScales[cQP.rem]; + const bool disableSMForLFNST = tu.cs->picHeader->getScalingListPresentFlag() ? tu.cs->picHeader->getScalingListAPS()->getScalingList().getDisableScalingMatrixForLfnstBlks() : false; + const bool enableScalingLists = getUseScalingList(uiWidth, uiHeight, true, tu.cu->lfnstIdx > 0, disableSMForLFNST); + const bool useTransformSkip = (tu.mtsIdx[compID] == MTS_SKIP); + const int defaultQuantisationCoefficient = g_quantScales[0][cQP.rem(useTransformSkip)]; -#if HEVC_USE_SCALING_LISTS CHECK( scalingListType >= SCALING_LIST_NUM, "Invalid scaling list" ); - const uint32_t uiLog2TrWidth = g_aucLog2[uiWidth]; - const uint32_t uiLog2TrHeight = g_aucLog2[uiHeight]; - const int *const piQuantCoeff = getQuantCoeff(scalingListType, cQP.rem, uiLog2TrWidth-1, uiLog2TrHeight-1); -#endif + const uint32_t uiLog2TrWidth = floorLog2(uiWidth); + const uint32_t uiLog2TrHeight = floorLog2(uiHeight); + const int *const piQuantCoeff = getQuantCoeff(scalingListType, cQP.rem(useTransformSkip), uiLog2TrWidth, uiLog2TrHeight); /* for 422 chroma blocks, the effective scaling applied during transformation is not a power of 2, hence it cannot be * implemented as a bit-shift (the quantised result will be sqrt(2) * larger than required). Alternatively, adjust the * uiLog2TrSize applied in iTransformShift, such that the result is 1/sqrt(2) the required result (i.e. smaller) * Then a QP+3 (sqrt(2)) or QP-3 (1/sqrt(2)) method could be used to get the required result */ - - const int iQBits = QUANT_SHIFT + cQP.per + iTransformShift; + const int iQBits = QUANT_SHIFT + cQP.per(useTransformSkip) + (useTransformSkip ? 0 : iTransformShift); // QBits will be OK for any internal bit depth as the reduction in transform shift is balanced by an increase in Qp_per due to QpBDOffset const int iAdd = int64_t(bUseHalfRoundingPoint ? 256 : (tu.cs->slice->isIRAP() ? 171 : 85)) << int64_t(iQBits - 9); TCoeff transformedCoefficient; @@ -934,13 +1126,9 @@ void Quant::transformSkipQuantOneSample(TransformUnit &tu, const ComponentID &co // quantization const TCoeff iSign = (transformedCoefficient < 0 ? -1: 1); -#if HEVC_USE_SCALING_LISTS const int quantisationCoefficient = enableScalingLists ? piQuantCoeff[uiPos] : defaultQuantisationCoefficient; const int64_t tmpLevel = (int64_t)abs(transformedCoefficient) * quantisationCoefficient; -#else - const int64_t tmpLevel = (int64_t)abs(transformedCoefficient) * defaultQuantisationCoefficient; -#endif const TCoeff quantisedCoefficient = (TCoeff((tmpLevel + iAdd ) >> iQBits)) * iSign; @@ -953,25 +1141,21 @@ void Quant::invTrSkipDeQuantOneSample(TransformUnit &tu, const ComponentID &comp { const SPS &sps = *tu.cs->sps; const CompArea &rect = tu.blocks[compID]; -#if HEVC_USE_SCALING_LISTS const uint32_t uiWidth = rect.width; const uint32_t uiHeight = rect.height; -#endif - const int QP_per = cQP.per; - const int QP_rem = cQP.rem; + const int QP_per = cQP.per(tu.mtsIdx[compID] == MTS_SKIP); + const int QP_rem = cQP.rem(tu.mtsIdx[compID] == MTS_SKIP); const int maxLog2TrDynamicRange = sps.getMaxLog2TrDynamicRange(toChannelType(compID)); const int channelBitDepth = sps.getBitDepth(toChannelType(compID)); const int iTransformShift = getTransformShift(channelBitDepth, rect.size(), maxLog2TrDynamicRange); -#if HEVC_USE_SCALING_LISTS const int scalingListType = getScalingListType(tu.cu->predMode, compID); - const bool enableScalingLists = getUseScalingList(uiWidth, uiHeight, true); + const bool disableSMForLFNST = tu.cs->picHeader->getScalingListPresentFlag() ? tu.cs->picHeader->getScalingListAPS()->getScalingList().getDisableScalingMatrixForLfnstBlks() : false; + const bool enableScalingLists = getUseScalingList(uiWidth, uiHeight, true, tu.cu->lfnstIdx > 0, disableSMForLFNST); CHECK(scalingListType >= SCALING_LIST_NUM, "Invalid scaling list"); - const int rightShift = (IQUANT_SHIFT - (iTransformShift + QP_per)) + (enableScalingLists ? LOG2_SCALING_LIST_NEUTRAL_VALUE : 0); -#else - const int rightShift = (IQUANT_SHIFT - (iTransformShift + QP_per)); -#endif + const bool isTransformSkip = (tu.mtsIdx[compID] == MTS_SKIP); + const int rightShift = (IQUANT_SHIFT - ((isTransformSkip ? 0 : iTransformShift) + QP_per)) + (enableScalingLists ? LOG2_SCALING_LIST_NEUTRAL_VALUE : 0); const TCoeff transformMinimum = -(1 << maxLog2TrDynamicRange); const TCoeff transformMaximum = (1 << maxLog2TrDynamicRange) - 1; @@ -980,7 +1164,6 @@ void Quant::invTrSkipDeQuantOneSample(TransformUnit &tu, const ComponentID &comp TCoeff dequantisedSample; -#if HEVC_USE_SCALING_LISTS if (enableScalingLists) { const uint32_t dequantCoefBits = 1 + IQUANT_SHIFT + SCALING_LIST_BITS; @@ -989,9 +1172,9 @@ void Quant::invTrSkipDeQuantOneSample(TransformUnit &tu, const ComponentID &comp const Intermediate_Int inputMinimum = -(1 << (targetInputBitDepth - 1)); const Intermediate_Int inputMaximum = (1 << (targetInputBitDepth - 1)) - 1; - const uint32_t uiLog2TrWidth = g_aucLog2[uiWidth]; - const uint32_t uiLog2TrHeight = g_aucLog2[uiHeight]; - int *piDequantCoef = getDequantCoeff(scalingListType,QP_rem,uiLog2TrWidth-1, uiLog2TrHeight-1); + const uint32_t uiLog2TrWidth = floorLog2(uiWidth); + const uint32_t uiLog2TrHeight = floorLog2(uiHeight); + int *piDequantCoef = getDequantCoeff(scalingListType, QP_rem, uiLog2TrWidth, uiLog2TrHeight); if (rightShift > 0) { @@ -1012,8 +1195,7 @@ void Quant::invTrSkipDeQuantOneSample(TransformUnit &tu, const ComponentID &comp } else { -#endif - const int scale = g_invQuantScales[QP_rem]; + const int scale = g_invQuantScales[0][QP_rem]; const int scaleBits = (IQUANT_SHIFT + 1); const uint32_t targetInputBitDepth = std::min<uint32_t>((maxLog2TrDynamicRange + 1), (((sizeof(Intermediate_Int) * 8) + rightShift) - scaleBits)); @@ -1036,23 +1218,42 @@ void Quant::invTrSkipDeQuantOneSample(TransformUnit &tu, const ComponentID &comp dequantisedSample = TCoeff(Clip3<Intermediate_Int>(transformMinimum, transformMaximum, iCoeffQ)); } -#if HEVC_USE_SCALING_LISTS } -#endif // Inverse transform-skip + reconSample = Pel(dequantisedSample); +} - if (iTransformShift >= 0) +void Quant::lambdaAdjustColorTrans(bool forward) +{ + if (m_resetStore) { - const TCoeff offset = iTransformShift == 0 ? 0 : (1 << (iTransformShift - 1)); - reconSample = Pel((dequantisedSample + offset) >> iTransformShift); + for (uint8_t component = 0; component < MAX_NUM_COMPONENT; component++) + { + ComponentID compID = (ComponentID)component; + int delta_QP = (compID == COMPONENT_Cr ? DELTA_QP_FOR_Co : DELTA_QP_FOR_Y_Cg); + double lamdbaAdjustRate = pow(2.0, delta_QP / 3.0); + + m_lambdasStore[0][component] = m_lambdas[component]; + m_lambdasStore[1][component] = m_lambdas[component] * lamdbaAdjustRate; + } + m_resetStore = false; } - else //for very high bit depths + + if (forward) { - const int iTrShiftNeg = -iTransformShift; - reconSample = Pel(dequantisedSample << iTrShiftNeg); + CHECK(m_pairCheck == 1, "lambda has been already adjusted"); + m_pairCheck = 1; + } + else + { + CHECK(m_pairCheck == 0, "lambda has not been adjusted"); + m_pairCheck = 0; } -} - + for (uint8_t component = 0; component < MAX_NUM_COMPONENT; component++) + { + m_lambdas[component] = m_lambdasStore[m_pairCheck][component]; + } +} //! \} diff --git a/source/Lib/CommonLib/Quant.h b/source/Lib/CommonLib/Quant.h index 4877a59aa20e58887ec4f1ca797005dfbe31261e..d2ffa8bbf336cd14f2ccbaf77ccd0ca0813bf71c 100644 --- a/source/Lib/CommonLib/Quant.h +++ b/source/Lib/CommonLib/Quant.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -58,27 +58,40 @@ // ==================================================================================================================== // Class definition // ==================================================================================================================== +struct TrQuantParams +{ + int rightShift; + int qScale; +}; /// QP struct -struct QpParam +class QpParam { - int Qp; - int per; - int rem; +public: + int Qps[2]; + int pers[2]; + int rems[2]; private: QpParam(const int qpy, - const ChannelType chType, + const ComponentID compID, const int qpBdOffset, + const int minQpPrimeTsMinus4, const int chromaQPOffset, const ChromaFormat chFmt, - const int dqp ); + const int dqp + , const SPS *sps + ); public: QpParam(const TransformUnit& tu, const ComponentID &compID, const int QP = -MAX_INT); + int Qp ( const bool ts ) const { return Qps [ts?1:0]; } + int per( const bool ts ) const { return pers[ts?1:0]; } + int rem( const bool ts ) const { return rems[ts?1:0]; } + }; // END STRUCT DEFINITION QpParam /// transform and quantization class @@ -109,21 +122,23 @@ public: #endif void setLambda ( const double dLambda ) { m_dLambda = dLambda; } double getLambda () const { return m_dLambda; } + void lambdaAdjustColorTrans(bool forward); + void resetStore() { m_resetStore = true; } -#if HEVC_USE_SCALING_LISTS int* getQuantCoeff ( uint32_t list, int qp, uint32_t sizeX, uint32_t sizeY ) { return m_quantCoef [sizeX][sizeY][list][qp]; }; //!< get Quant Coefficent int* getDequantCoeff ( uint32_t list, int qp, uint32_t sizeX, uint32_t sizeY ) { return m_dequantCoef [sizeX][sizeY][list][qp]; }; //!< get DeQuant Coefficent void setUseScalingList ( bool bUseScalingList){ m_scalingListEnabledFlag = bUseScalingList; }; - bool getUseScalingList ( const uint32_t width, const uint32_t height, const bool isTransformSkip){ return m_scalingListEnabledFlag && (!isTransformSkip || ((width == 4) && (height == 4))); }; - + bool getUseScalingList(const uint32_t width, const uint32_t height, const bool isTransformSkip, const bool lfnstApplied, const bool disableScalingMatrixForLFNSTBlks) + { + return (m_scalingListEnabledFlag && !isTransformSkip && (!lfnstApplied || !disableScalingMatrixForLFNSTBlks)); + } void setScalingListDec ( const ScalingList &scalingList); - void processScalingListEnc ( int *coeff, int *quantcoeff, int quantScales, uint32_t height, uint32_t width, uint32_t ratio, int sizuNum, uint32_t dc); - void processScalingListDec ( const int *coeff, int *dequantcoeff, int invQuantScales, uint32_t height, uint32_t width, uint32_t ratio, int sizuNum, uint32_t dc); + void processScalingListEnc ( int *coeff, int *quantcoeff, int qpMod6, uint32_t height, uint32_t width, uint32_t ratio, int sizuNum, uint32_t dc); + void processScalingListDec ( const int *coeff, int *dequantcoeff, int qpMod6, uint32_t height, uint32_t width, uint32_t ratio, int sizuNum, uint32_t dc); virtual void setFlatScalingList( const int maxLog2TrDynamicRange[MAX_NUM_CHANNEL_TYPE], const BitDepths &bitDepths); virtual void setScalingList ( ScalingList *scalingList, const int maxLog2TrDynamicRange[MAX_NUM_CHANNEL_TYPE], const BitDepths &bitDepths); -#endif // quantization virtual void quant ( TransformUnit &tu, const ComponentID &compID, const CCoeffBuf &pSrc, TCoeff &uiAbsSum, const QpParam &cQP, const Ctx& ctx ); @@ -147,30 +162,30 @@ protected: #if T0196_SELECTIVE_RDOQ bool m_useSelectiveRDOQ; #endif -#if HEVC_USE_SCALING_LISTS private: void xInitScalingList ( const Quant* other ); void xDestroyScalingList(); void xSetFlatScalingList( uint32_t list, uint32_t sizeX, uint32_t sizeY, int qp ); - void xSetScalingListEnc ( ScalingList *scalingList, uint32_t list, uint32_t size, int qp ); - void xSetScalingListDec ( const ScalingList &scalingList, uint32_t list, uint32_t size, int qp ); -#endif -#if HEVC_USE_SIGN_HIDING + void xSetScalingListEnc(ScalingList *scalingList, uint32_t list, uint32_t size, int qp, uint32_t scalingListId); + void xSetScalingListDec(const ScalingList &scalingList, uint32_t list, uint32_t size, int qp, uint32_t scalingListId); + void xSetRecScalingListEnc(ScalingList *scalingList, uint32_t list, uint32_t sizew, uint32_t sizeh, int qp, uint32_t scalingListId); + void xSetRecScalingListDec(const ScalingList &scalingList, uint32_t list, uint32_t sizew, uint32_t sizeh, int qp, uint32_t scalingListId); private: void xSignBitHidingHDQ (TCoeff* pQCoef, const TCoeff* pCoef, TCoeff* deltaU, const CoeffCodingContext& cctx, const int maxLog2TrDynamicRange); -#endif private: #if RDOQ_CHROMA_LAMBDA double m_lambdas[MAX_NUM_COMPONENT]; #endif -#if HEVC_USE_SCALING_LISTS + double m_lambdasStore[2][MAX_NUM_COMPONENT]; // 0-org; 1-act + bool m_resetStore; bool m_scalingListEnabledFlag; bool m_isScalingListOwner; int *m_quantCoef [SCALING_LIST_SIZE_NUM][SCALING_LIST_SIZE_NUM][SCALING_LIST_NUM][SCALING_LIST_REM_NUM]; ///< array of quantization matrix coefficient 4x4 int *m_dequantCoef [SCALING_LIST_SIZE_NUM][SCALING_LIST_SIZE_NUM][SCALING_LIST_NUM][SCALING_LIST_REM_NUM]; ///< array of dequantization matrix coefficient 4x4 -#endif + + int m_pairCheck; };// END CLASS DEFINITION Quant diff --git a/source/Lib/CommonLib/QuantRDOQ.cpp b/source/Lib/CommonLib/QuantRDOQ.cpp index b844ae48d4320200b8c2457e54fe67991d4af0c7..7b3ffe927496a28c4bf07dabc9b8116da3e8ba60 100644 --- a/source/Lib/CommonLib/QuantRDOQ.cpp +++ b/source/Lib/CommonLib/QuantRDOQ.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -57,6 +57,7 @@ struct coeffGroupRDStats double d64UncodedDist; // all zero coded block distortion double d64SigCost; double d64SigCost_0; + int iNumSbbCtxBins; }; @@ -82,16 +83,12 @@ QuantRDOQ::QuantRDOQ( const Quant* other ) : Quant( other ) const QuantRDOQ *rdoq = dynamic_cast<const QuantRDOQ*>( other ); CHECK( other && !rdoq, "The RDOQ cast must be successfull!" ); -#if HEVC_USE_SCALING_LISTS xInitScalingList( rdoq ); -#endif } QuantRDOQ::~QuantRDOQ() { -#if HEVC_USE_SCALING_LISTS xDestroyScalingList(); -#endif } @@ -112,7 +109,6 @@ inline uint32_t QuantRDOQ::xGetCodedLevel( double& rd64CodedCost, const BinFracBits& fracBitsPar, const BinFracBits& fracBitsGt1, const BinFracBits& fracBitsGt2, - const int remGt2Bins, const int remRegBins, unsigned goRiceZero, uint16_t ui16AbsGoRice, @@ -150,7 +146,7 @@ inline uint32_t QuantRDOQ::xGetCodedLevel( double& rd64CodedCost, { double dErr = double( lLevelDouble - ( Intermediate_Int(uiAbsLevel) << iQBits ) ); - double dCurrCost = dErr * dErr * errorScale + xGetICost( xGetICRate( uiAbsLevel, fracBitsPar, fracBitsGt1, fracBitsGt2, remGt2Bins, remRegBins, goRiceZero, ui16AbsGoRice, true, maxLog2TrDynamicRange ) ); + double dCurrCost = dErr * dErr * errorScale + xGetICost( xGetICRate( uiAbsLevel, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, ui16AbsGoRice, true, maxLog2TrDynamicRange ) ); dCurrCost += dCurrCostSig; if( dCurrCost < rd64CodedCost ) @@ -179,7 +175,6 @@ inline int QuantRDOQ::xGetICRate( const uint32_t uiAbsLevel, const BinFracBits& fracBitsPar, const BinFracBits& fracBitsGt1, const BinFracBits& fracBitsGt2, - const int remGt2Bins, const int remRegBins, unsigned goRiceZero, const uint16_t ui16AbsGoRice, @@ -347,7 +342,6 @@ inline double QuantRDOQ::xGetIEPRate ( -#if HEVC_USE_SCALING_LISTS /** set quantized matrix coefficient for encode * \param scalingList quantized matrix address * \param format chroma format @@ -376,32 +370,19 @@ void QuantRDOQ::setScalingList(ScalingList *scalingList, const int maxLog2TrDyna } - -#if HM_QTBT_AS_IN_JEM_QUANT -#endif -#else - -double QuantRDOQ::xGetErrScaleCoeff( const bool needsSqrt2, SizeType width, SizeType height, int qp, const int maxLog2TrDynamicRange, const int channelBitDepth ) +double QuantRDOQ::xGetErrScaleCoeff(const bool needsSqrt2, SizeType width, SizeType height, int qp, const int maxLog2TrDynamicRange, const int channelBitDepth, bool bTransformSkip=false) { - const int iTransformShift = getTransformShift(channelBitDepth, Size(width, height), maxLog2TrDynamicRange); -#if HM_QTBT_AS_IN_JEM_QUANT - double dErrScale = (double)( 1 << SCALE_BITS ); // Compensate for scaling of bitcount in Lagrange cost function - double dTransShift = (double)iTransformShift + ( needsSqrt2 ? -0.5 : 0.0 ); - dErrScale = dErrScale*pow( 2.0, ( -2.0*dTransShift ) ); // Compensate for scaling through forward transform - int QStep = ( needsSqrt2 ? ( ( g_quantScales[qp] * 181 ) >> 7 ) : g_quantScales[qp] ); + const int iTransformShift = bTransformSkip ? 0 : getTransformShift(channelBitDepth, Size(width, height), maxLog2TrDynamicRange); + double dErrScale = (double)(1 << SCALE_BITS); // Compensate for scaling of bitcount in Lagrange cost function + double dTransShift = (double)iTransformShift + (needsSqrt2 ? -0.5 : 0.0); + dErrScale = dErrScale * pow(2.0, (-2.0*dTransShift)); // Compensate for scaling through forward transform + const int QStep = g_quantScales[needsSqrt2 ? 1 : 0][qp]; double finalErrScale = dErrScale / QStep / QStep / (1 << (DISTORTION_PRECISION_ADJUSTMENT(channelBitDepth) << 1)); -#else - int errShift = SCALE_BITS - ((iTransformShift + DISTORTION_PRECISION_ADJUSTMENT(channelBitDepth)) << 1); - double dErrScale = exp2( double( errShift ) ); - double finalErrScale = dErrScale / double( g_quantScales[qp] * g_quantScales[qp] ); -#endif return finalErrScale; } -#endif -#if HEVC_USE_SCALING_LISTS /** set error scale coefficients * \param list list ID * \param size @@ -421,12 +402,11 @@ void QuantRDOQ::xSetErrScaleCoeff( uint32_t list, uint32_t sizeX, uint32_t sizeY int *piQuantcoeff; double *pdErrScale; piQuantcoeff = getQuantCoeff( list, qp, sizeX, sizeY ); - pdErrScale = xGetErrScaleCoeff( list, sizeX, sizeY, qp ); + pdErrScale = xGetErrScaleCoeffSL( list, sizeX, sizeY, qp); -#if HM_QTBT_AS_IN_JEM_QUANT double dErrScale = (double)( 1 << SCALE_BITS ); // Compensate for scaling of bitcount in Lagrange cost function - bool needsSqrt2 = TU::needsBlockSizeTrafoScale( Size( g_scalingListSizeX[sizeX], g_scalingListSizeX[sizeY] ) );// ( ( (sizeX+sizeY) & 1 ) !=0 ); + const bool needsSqrt2 = ((floorLog2(width) + floorLog2(height)) & 1) == 1; double dTransShift = (double)iTransformShift + ( needsSqrt2 ? -0.5 : 0.0 ); dErrScale = dErrScale*pow( 2.0, ( -2.0*dTransShift ) ); // Compensate for scaling through forward transform @@ -436,19 +416,10 @@ void QuantRDOQ::xSetErrScaleCoeff( uint32_t list, uint32_t sizeX, uint32_t sizeY / (1 << (DISTORTION_PRECISION_ADJUSTMENT(bitDepths.recon[channelType]) << 1)); } - int QStep = ( needsSqrt2 ? ( ( g_quantScales[qp] * 181 ) >> 7 ) : g_quantScales[qp] ); + int QStep = g_quantScales[needsSqrt2][qp]; xGetErrScaleCoeffNoScalingList(list, sizeX, sizeY, qp) = dErrScale / QStep / QStep / (1 << (DISTORTION_PRECISION_ADJUSTMENT(bitDepths.recon[channelType]) << 1)); -#else - int errShift = SCALE_BITS - ((iTransformShift + DISTORTION_PRECISION_ADJUSTMENT(bitDepths.recon[channelType])) << 1); - double dErrScale = exp2( double( errShift ) ); - for( i = 0; i < uiMaxNumCoeff; i++ ) - { - pdErrScale[i] = dErrScale / double( piQuantcoeff[i] * piQuantcoeff[i] ); - } - xGetErrScaleCoeffNoScalingList( list, sizeX, sizeY, qp ) = dErrScale / double( g_quantScales[qp] * g_quantScales[qp] ); -#endif } /** set flat matrix value to quantized coefficient @@ -481,6 +452,29 @@ void QuantRDOQ::xInitScalingList( const QuantRDOQ* other ) { m_isErrScaleListOwner = other == nullptr; + size_t numElements = 0; + + for (uint32_t sizeIdX = 0; sizeIdX < SCALING_LIST_SIZE_NUM; sizeIdX++) + { + for (uint32_t sizeIdY = 0; sizeIdY < SCALING_LIST_SIZE_NUM; sizeIdY++) + { + for (uint32_t qp = 0; qp < SCALING_LIST_REM_NUM; qp++) + { + for (uint32_t listId = 0; listId < SCALING_LIST_NUM; listId++) + { + numElements += g_scalingListSizeX[sizeIdX] * g_scalingListSizeX[sizeIdY]; + } + } + } + } + + if (m_isErrScaleListOwner) + { + m_errScale[0][0][0][0] = new double[numElements]; + } + + size_t offset = 0; + for(uint32_t sizeIdX = 0; sizeIdX < SCALING_LIST_SIZE_NUM; sizeIdX++) { for(uint32_t sizeIdY = 0; sizeIdY < SCALING_LIST_SIZE_NUM; sizeIdY++) @@ -491,7 +485,8 @@ void QuantRDOQ::xInitScalingList( const QuantRDOQ* other ) { if( m_isErrScaleListOwner ) { - m_errScale[sizeIdX][sizeIdY][listId][qp] = new double[g_scalingListSizeX[sizeIdX] * g_scalingListSizeX[sizeIdY]]; + m_errScale[sizeIdX][sizeIdY][listId][qp] = m_errScale[0][0][0][0] + offset; + offset += g_scalingListSizeX[sizeIdX] * g_scalingListSizeX[sizeIdY]; } else { @@ -509,25 +504,8 @@ void QuantRDOQ::xDestroyScalingList() { if( !m_isErrScaleListOwner ) return; - for(uint32_t sizeIdX = 0; sizeIdX < SCALING_LIST_SIZE_NUM; sizeIdX++) - { - for(uint32_t sizeIdY = 0; sizeIdY < SCALING_LIST_SIZE_NUM; sizeIdY++) - { - for(uint32_t listId = 0; listId < SCALING_LIST_NUM; listId++) - { - for(uint32_t qp = 0; qp < SCALING_LIST_REM_NUM; qp++) - { - if(m_errScale[sizeIdX][sizeIdY][listId][qp]) - { - delete [] m_errScale[sizeIdX][sizeIdY][listId][qp]; - } - } - } - } - } -// Quant::destroyScalingList(); + delete[] m_errScale[0][0][0][0]; } -#endif void QuantRDOQ::quant(TransformUnit &tu, const ComponentID &compID, const CCoeffBuf &pSrc, TCoeff &uiAbsSum, const QpParam &cQP, const Ctx& ctx) @@ -539,7 +517,7 @@ void QuantRDOQ::quant(TransformUnit &tu, const ComponentID &compID, const CCoeff const CCoeffBuf &piCoef = pSrc; CoeffBuf piQCoef = tu.getCoeffs(compID); - const bool useTransformSkip = tu.mtsIdx==1; + const bool useTransformSkip = (tu.mtsIdx[compID] == MTS_SKIP); bool useRDOQ = useTransformSkip ? m_useRDOQTS : m_useRDOQ; @@ -555,7 +533,21 @@ void QuantRDOQ::quant(TransformUnit &tu, const ComponentID &compID, const CCoeff if (!m_useSelectiveRDOQ || xNeedRDOQ(tu, compID, piCoef, cQP)) { #endif - xRateDistOptQuant( tu, compID, pSrc, uiAbsSum, cQP, ctx ); + if( useTransformSkip ) + { + if( (tu.cu->bdpcmMode && isLuma(compID)) || (isChroma(compID) && tu.cu->bdpcmModeChroma ) ) + { + forwardRDPCM( tu, compID, pSrc, uiAbsSum, cQP, ctx ); + } + else + { + xRateDistOptQuantTS( tu, compID, pSrc, uiAbsSum, cQP, ctx ); + } + } + else + { + xRateDistOptQuant( tu, compID, pSrc, uiAbsSum, cQP, ctx ); + } #if T0196_SELECTIVE_RDOQ } else @@ -597,25 +589,21 @@ void QuantRDOQ::xRateDistOptQuant(TransformUnit &tu, const ComponentID &compID, // Represents scaling through forward transform int iTransformShift = getTransformShift(channelBitDepth, rect.size(), maxLog2TrDynamicRange); - if (tu.mtsIdx==1 && extendedPrecision) + if (tu.mtsIdx[compID] == MTS_SKIP && extendedPrecision) { iTransformShift = std::max<int>(0, iTransformShift); } double d64BlockUncodedCost = 0; - const uint32_t uiLog2BlockWidth = g_aucLog2[uiWidth]; -#if HEVC_USE_SCALING_LISTS - const uint32_t uiLog2BlockHeight = g_aucLog2[uiHeight]; -#endif + const uint32_t uiLog2BlockWidth = floorLog2(uiWidth); + const uint32_t uiLog2BlockHeight = floorLog2(uiHeight); const uint32_t uiMaxNumCoeff = rect.area(); CHECK(compID >= MAX_NUM_TBLOCKS, "Invalid component ID"); -#if HEVC_USE_SCALING_LISTS int scalingListType = getScalingListType(tu.cu->predMode, compID); CHECK(scalingListType >= SCALING_LIST_NUM, "Invalid scaling list"); -#endif const TCoeff *plSrcCoeff = pSrc.buf; TCoeff *piDstCoeff = tu.getCoeffs(compID).buf; @@ -623,75 +611,47 @@ void QuantRDOQ::xRateDistOptQuant(TransformUnit &tu, const ComponentID &compID, double *pdCostCoeff = m_pdCostCoeff; double *pdCostSig = m_pdCostSig; double *pdCostCoeff0 = m_pdCostCoeff0; -#if HEVC_USE_SIGN_HIDING int *rateIncUp = m_rateIncUp; int *rateIncDown = m_rateIncDown; int *sigRateDelta = m_sigRateDelta; TCoeff *deltaU = m_deltaU; -#endif memset(piDstCoeff, 0, sizeof(*piDstCoeff) * uiMaxNumCoeff); memset( m_pdCostCoeff, 0, sizeof( double ) * uiMaxNumCoeff ); memset( m_pdCostSig, 0, sizeof( double ) * uiMaxNumCoeff ); -#if HEVC_USE_SIGN_HIDING memset( m_rateIncUp, 0, sizeof( int ) * uiMaxNumCoeff ); memset( m_rateIncDown, 0, sizeof( int ) * uiMaxNumCoeff ); memset( m_sigRateDelta, 0, sizeof( int ) * uiMaxNumCoeff ); memset( m_deltaU, 0, sizeof( TCoeff ) * uiMaxNumCoeff ); -#endif - - - const int iQBits = QUANT_SHIFT + cQP.per + iTransformShift; // Right shift of non-RDOQ quantizer; level = (coeff*uiQ + offset)>>q_bits -#if HEVC_USE_SCALING_LISTS - const double *const pdErrScale = xGetErrScaleCoeff(scalingListType, (uiLog2BlockWidth-1), (uiLog2BlockHeight-1), cQP.rem); - const int *const piQCoef = getQuantCoeff(scalingListType, cQP.rem, (uiLog2BlockWidth-1), (uiLog2BlockHeight-1)); - const bool enableScalingLists = getUseScalingList(uiWidth, uiHeight, tu.transformSkip[compID]); -#if HM_QTBT_AS_IN_JEM_QUANT - const int defaultQuantisationCoefficient = ( TU::needsSqrt2Scale( rect, tu.transformSkip[compID] ) ? ( g_quantScales[cQP.rem] * 181 ) >> 7 : g_quantScales[cQP.rem] ); - const double defaultErrorScale = xGetErrScaleCoeffNoScalingList(scalingListType, (uiLog2BlockWidth-1), (uiLog2BlockHeight-1), cQP.rem); -#else - const double blkErrScale = ( TU::needsQP3Offset( tu, compID ) ? 2.0 : 1.0 ); - const int defaultQuantisationCoefficient = g_quantScales[cQP.rem]; - const double defaultErrorScale = blkErrScale * xGetErrScaleCoeffNoScalingList( scalingListType, ( uiLog2BlockWidth - 1 ), ( uiLog2BlockHeight - 1 ), cQP.rem ); -#endif -#else //HEVC_USE_SCALING_LISTS -#if HM_QTBT_AS_IN_JEM_QUANT - const int quantisationCoefficient = ( TU::needsSqrt2Scale( tu, compID ) ? ( g_quantScales[cQP.rem] * 181 ) >> 7 : g_quantScales[cQP.rem] ); - const double errorScale = xGetErrScaleCoeff( TU::needsSqrt2Scale( tu, compID ), uiWidth, uiHeight, cQP.rem, maxLog2TrDynamicRange, channelBitDepth ); -#else - const double blkErrScale = ( TU::needsQP3Offset( tu, compID ) ? 2.0 : 1.0 ); - const int quantisationCoefficient = g_quantScales[cQP.rem]; - const double errorScale = blkErrScale * xGetErrScaleCoeff( uiWidth, uiHeight, cQP.rem, maxLog2TrDynamicRange, channelBitDepth ); -#endif -#endif//HEVC_USE_SCALING_LISTS + const bool needSqrtAdjustment= TU::needsBlockSizeTrafoScale( tu, compID ); + const bool isTransformSkip = (tu.mtsIdx[compID] == MTS_SKIP); + const double *const pdErrScale = xGetErrScaleCoeffSL(scalingListType, uiLog2BlockWidth, uiLog2BlockHeight, cQP.rem(isTransformSkip)); + const int *const piQCoef = getQuantCoeff(scalingListType, cQP.rem(isTransformSkip), uiLog2BlockWidth, uiLog2BlockHeight); + const bool disableSMForLFNST = tu.cs->picHeader->getScalingListPresentFlag() ? tu.cs->picHeader->getScalingListAPS()->getScalingList().getDisableScalingMatrixForLfnstBlks() : false; + const bool enableScalingLists = getUseScalingList(uiWidth, uiHeight, isTransformSkip, tu.cu->lfnstIdx > 0, disableSMForLFNST); + const int defaultQuantisationCoefficient = g_quantScales[ needSqrtAdjustment ?1:0][cQP.rem(isTransformSkip)]; + const double defaultErrorScale = xGetErrScaleCoeffNoScalingList(scalingListType, uiLog2BlockWidth, uiLog2BlockHeight, cQP.rem(isTransformSkip)); + const int iQBits = QUANT_SHIFT + cQP.per(isTransformSkip) + iTransformShift + (needSqrtAdjustment?-1:0); // Right shift of non-RDOQ quantizer; level = (coeff*uiQ + offset)>>q_bits -#if HEVC_USE_SIGN_HIDING const TCoeff entropyCodingMinimum = -(1 << maxLog2TrDynamicRange); -#endif const TCoeff entropyCodingMaximum = (1 << maxLog2TrDynamicRange) - 1; -#if HEVC_USE_SIGN_HIDING - CoeffCodingContext cctx(tu, compID, tu.cs->slice->getSignDataHidingEnabledFlag()); -#else - CoeffCodingContext cctx(tu, compID); -#endif + CoeffCodingContext cctx(tu, compID, tu.cs->picHeader->getSignDataHidingEnabledFlag()); const int iCGSizeM1 = (1 << cctx.log2CGSize()) - 1; int iCGLastScanPos = -1; double d64BaseCost = 0; int iLastScanPos = -1; - bool is2x2subblock = ( iCGSizeM1 == 3 ); - int remGt2Bins = ( is2x2subblock ? MAX_NUM_GT2_BINS_2x2SUBBLOCK : MAX_NUM_GT2_BINS_4x4SUBBLOCK ); - int remRegBins = ( is2x2subblock ? MAX_NUM_REG_BINS_2x2SUBBLOCK : MAX_NUM_REG_BINS_4x4SUBBLOCK ); + int ctxBinSampleRatio = (compID == COMPONENT_Y) ? MAX_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT_LUMA : MAX_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT_CHROMA; + int remRegBins = (uiWidth * uiHeight * ctxBinSampleRatio) >> 4; uint32_t goRiceParam = 0; double *pdCostCoeffGroupSig = m_pdCostCoeffGroupSig; memset( pdCostCoeffGroupSig, 0, ( uiMaxNumCoeff >> cctx.log2CGSize() ) * sizeof( double ) ); - const int iCGNum = std::min<int>(JVET_C0024_ZERO_OUT_TH, uiWidth) * std::min<int>(JVET_C0024_ZERO_OUT_TH, uiHeight) >> cctx.log2CGSize(); int iScanPos; coeffGroupRDStats rdStats; @@ -699,29 +659,37 @@ void QuantRDOQ::xRateDistOptQuant(TransformUnit &tu, const ComponentID &compID, DTRACE( g_trace_ctx, D_RDOQ, "%d: %3d, %3d, %dx%d, comp=%d\n", DTRACE_GET_COUNTER( g_trace_ctx, D_RDOQ ), rect.x, rect.y, rect.width, rect.height, compID ); #endif + const uint32_t lfnstIdx = tu.cu->lfnstIdx; + const int iCGNum = lfnstIdx > 0 ? 1 : std::min<int>(JVET_C0024_ZERO_OUT_TH, uiWidth) * std::min<int>(JVET_C0024_ZERO_OUT_TH, uiHeight) >> cctx.log2CGSize(); for (int subSetId = iCGNum - 1; subSetId >= 0; subSetId--) { cctx.initSubblock( subSetId ); + uint32_t maxNonZeroPosInCG = iCGSizeM1; + if( lfnstIdx > 0 && ( ( uiWidth == 4 && uiHeight == 4 ) || ( uiWidth == 8 && uiHeight == 8 && cctx.cgPosX() == 0 && cctx.cgPosY() == 0 ) ) ) + { + maxNonZeroPosInCG = 7; + } + memset( &rdStats, 0, sizeof (coeffGroupRDStats)); - for (int iScanPosinCG = iCGSizeM1; iScanPosinCG >= 0; iScanPosinCG--) + for( int iScanPosinCG = iCGSizeM1; iScanPosinCG > maxNonZeroPosInCG; iScanPosinCG-- ) + { + iScanPos = cctx.minSubPos() + iScanPosinCG; + uint32_t blkPos = cctx.blockPos( iScanPos ); + piDstCoeff[ blkPos ] = 0; + } + for( int iScanPosinCG = maxNonZeroPosInCG; iScanPosinCG >= 0; iScanPosinCG-- ) { iScanPos = cctx.minSubPos() + iScanPosinCG; //===== quantization ===== uint32_t uiBlkPos = cctx.blockPos(iScanPos); // set coeff -#if HEVC_USE_SCALING_LISTS const int quantisationCoefficient = (enableScalingLists) ? piQCoef [uiBlkPos] : defaultQuantisationCoefficient; -#if HM_QTBT_AS_IN_JEM_QUANT const double errorScale = (enableScalingLists) ? pdErrScale[uiBlkPos] : defaultErrorScale; -#else - const double errorScale = (enableScalingLists) ? pdErrScale[uiBlkPos] * blkErrScale : defaultErrorScale; -#endif -#endif const int64_t tmpLevel = int64_t(abs(plSrcCoeff[ uiBlkPos ])) * quantisationCoefficient; const Intermediate_Int lLevelDouble = (Intermediate_Int)std::min<int64_t>(tmpLevel, std::numeric_limits<Intermediate_Int>::max() - (Intermediate_Int(1) << (iQBits - 1))); @@ -763,9 +731,9 @@ void QuantRDOQ::xRateDistOptQuant(TransformUnit &tu, const ComponentID &compID, uint32_t goRiceZero = 0; if( remRegBins < 4 ) { - unsigned sumAbs = cctx.templateAbsSum( iScanPos, piDstCoeff ); + unsigned sumAbs = cctx.templateAbsSum( iScanPos, piDstCoeff, 0 ); goRiceParam = g_auiGoRiceParsCoeff [ sumAbs ]; - goRiceZero = g_auiGoRicePosCoeff0[0][ sumAbs ]; + goRiceZero = g_auiGoRicePosCoeff0(0, goRiceParam); } const BinFracBits fracBitsPar = fracBits.getFracBitsArray( uiParCtx ); @@ -775,7 +743,7 @@ void QuantRDOQ::xRateDistOptQuant(TransformUnit &tu, const ComponentID &compID, if( iScanPos == iLastScanPos ) { uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ], - lLevelDouble, uiMaxAbsLevel, nullptr, fracBitsPar, fracBitsGt1, fracBitsGt2, remGt2Bins, remRegBins, goRiceZero, goRiceParam, iQBits, errorScale, 1, extendedPrecision, maxLog2TrDynamicRange ); + lLevelDouble, uiMaxAbsLevel, nullptr, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, iQBits, errorScale, 1, extendedPrecision, maxLog2TrDynamicRange ); } else { @@ -783,54 +751,45 @@ void QuantRDOQ::xRateDistOptQuant(TransformUnit &tu, const ComponentID &compID, const BinFracBits fracBitsSig = fracBits.getFracBitsArray( ctxIdSig ); uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ], - lLevelDouble, uiMaxAbsLevel, &fracBitsSig, fracBitsPar, fracBitsGt1, fracBitsGt2, remGt2Bins, remRegBins, goRiceZero, goRiceParam, iQBits, errorScale, 0, extendedPrecision, maxLog2TrDynamicRange ); -#if HEVC_USE_SIGN_HIDING + lLevelDouble, uiMaxAbsLevel, &fracBitsSig, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, iQBits, errorScale, 0, extendedPrecision, maxLog2TrDynamicRange ); sigRateDelta[ uiBlkPos ] = ( remRegBins < 4 ? 0 : fracBitsSig.intBits[1] - fracBitsSig.intBits[0] ); -#endif } DTRACE( g_trace_ctx, D_RDOQ, " Lev=%d \n", uiLevel ); DTRACE_COND( ( uiMaxAbsLevel != 0 ), g_trace_ctx, D_RDOQ, " CostC0=%d\n", (int64_t)( pdCostCoeff0[iScanPos] ) ); DTRACE_COND( ( uiMaxAbsLevel != 0 ), g_trace_ctx, D_RDOQ, " CostC =%d\n", (int64_t)( pdCostCoeff[iScanPos] ) ); -#if HEVC_USE_SIGN_HIDING deltaU[ uiBlkPos ] = TCoeff((lLevelDouble - (Intermediate_Int(uiLevel) << iQBits)) >> (iQBits-8)); if( uiLevel > 0 ) { - int rateNow = xGetICRate( uiLevel, fracBitsPar, fracBitsGt1, fracBitsGt2, remGt2Bins, remRegBins, goRiceZero, goRiceParam, extendedPrecision, maxLog2TrDynamicRange ); - rateIncUp [ uiBlkPos ] = xGetICRate( uiLevel+1, fracBitsPar, fracBitsGt1, fracBitsGt2, remGt2Bins, remRegBins, goRiceZero, goRiceParam, extendedPrecision, maxLog2TrDynamicRange ) - rateNow; - rateIncDown [ uiBlkPos ] = xGetICRate( uiLevel-1, fracBitsPar, fracBitsGt1, fracBitsGt2, remGt2Bins, remRegBins, goRiceZero, goRiceParam, extendedPrecision, maxLog2TrDynamicRange ) - rateNow; + int rateNow = xGetICRate( uiLevel, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, extendedPrecision, maxLog2TrDynamicRange ); + rateIncUp [ uiBlkPos ] = xGetICRate( uiLevel+1, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, extendedPrecision, maxLog2TrDynamicRange ) - rateNow; + rateIncDown [ uiBlkPos ] = xGetICRate( uiLevel-1, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, extendedPrecision, maxLog2TrDynamicRange ) - rateNow; } else // uiLevel == 0 { if( remRegBins < 4 ) { - int rateNow = xGetICRate( uiLevel, fracBitsPar, fracBitsGt1, fracBitsGt2, remGt2Bins, remRegBins, goRiceZero, goRiceParam, extendedPrecision, maxLog2TrDynamicRange ); - rateIncUp [ uiBlkPos ] = xGetICRate( uiLevel+1, fracBitsPar, fracBitsGt1, fracBitsGt2, remGt2Bins, remRegBins, goRiceZero, goRiceParam, extendedPrecision, maxLog2TrDynamicRange ) - rateNow; + int rateNow = xGetICRate( uiLevel, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, extendedPrecision, maxLog2TrDynamicRange ); + rateIncUp [ uiBlkPos ] = xGetICRate( uiLevel+1, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, extendedPrecision, maxLog2TrDynamicRange ) - rateNow; } else { rateIncUp [ uiBlkPos ] = fracBitsGt1.intBits[ 0 ]; } } -#endif piDstCoeff[ uiBlkPos ] = uiLevel; d64BaseCost += pdCostCoeff [ iScanPos ]; if( ( (iScanPos & iCGSizeM1) == 0 ) && ( iScanPos > 0 ) ) { - remGt2Bins = ( is2x2subblock ? MAX_NUM_GT2_BINS_2x2SUBBLOCK : MAX_NUM_GT2_BINS_4x4SUBBLOCK ); - remRegBins = ( is2x2subblock ? MAX_NUM_REG_BINS_2x2SUBBLOCK : MAX_NUM_REG_BINS_4x4SUBBLOCK ) - remGt2Bins; goRiceParam = 0; } else if( remRegBins >= 4 ) { - const uint32_t baseLevel = 4; - if( goRiceParam < 3 && ((uiLevel-baseLevel)>>1) > (3<<goRiceParam)-1 ) - { - goRiceParam++; - } + int sumAll = cctx.templateAbsSum(iScanPos, piDstCoeff, 4); + goRiceParam = g_auiGoRiceParsCoeff[sumAll]; remRegBins -= (uiLevel < 2 ? uiLevel : 3) + (iScanPos != iLastScanPos); } } @@ -901,7 +860,7 @@ void QuantRDOQ::xRateDistOptQuant(TransformUnit &tu, const ComponentID &compID, pdCostCoeffGroupSig[ cctx.subSetId() ] = xGetRateSigCoeffGroup(fracBitsSigGroup,0); } // reset coeffs to 0 in this block - for (int iScanPosinCG = iCGSizeM1; iScanPosinCG >= 0; iScanPosinCG--) + for( int iScanPosinCG = maxNonZeroPosInCG; iScanPosinCG >= 0; iScanPosinCG-- ) { iScanPos = cctx.minSubPos() + iScanPosinCG; uint32_t uiBlkPos = cctx.blockPos( iScanPos ); @@ -949,7 +908,7 @@ void QuantRDOQ::xRateDistOptQuant(TransformUnit &tu, const ComponentID &compID, { bool rootCbfSoFar = false; bool isLastSubPartition = CU::isISPLast(*tu.cu, tu.Y(), compID); - uint32_t nTus = tu.cu->ispMode == HOR_INTRA_SUBPARTITIONS ? tu.cu->lheight() >> g_aucLog2[tu.lheight()] : tu.cu->lwidth() >> g_aucLog2[tu.lwidth()]; + uint32_t nTus = tu.cu->ispMode == HOR_INTRA_SUBPARTITIONS ? tu.cu->lheight() >> floorLog2(tu.lheight()) : tu.cu->lwidth() >> floorLog2(tu.lwidth()); if( isLastSubPartition ) { TransformUnit* tuPointer = tu.cu->firstTU; @@ -968,7 +927,7 @@ void QuantRDOQ::xRateDistOptQuant(TransformUnit &tu, const ComponentID &compID, previousCbf = TU::getPrevTuCbfAtDepth(tu, compID, tu.depth); } } - BinFracBits fracBitsQtCbf = fracBits.getFracBitsArray( Ctx::QtCbf[compID]( DeriveCtx::CtxQtCbf( rect.compID, tu.depth, previousCbf, useIntraSubPartitions ) ) ); + BinFracBits fracBitsQtCbf = fracBits.getFracBitsArray( Ctx::QtCbf[compID]( DeriveCtx::CtxQtCbf( rect.compID, previousCbf, useIntraSubPartitions ) ) ); if( !lastCbfIsInferred ) { @@ -984,13 +943,8 @@ void QuantRDOQ::xRateDistOptQuant(TransformUnit &tu, const ComponentID &compID, int lastBitsX[LAST_SIGNIFICANT_GROUPS] = { 0 }; int lastBitsY[LAST_SIGNIFICANT_GROUPS] = { 0 }; { -#if HEVC_USE_MDCS - int dim1 = ( cctx.scanType() == SCAN_VER ? uiHeight : uiWidth ); - int dim2 = ( cctx.scanType() == SCAN_VER ? uiWidth : uiHeight ); -#else int dim1 = std::min<int>(JVET_C0024_ZERO_OUT_TH, uiWidth); int dim2 = std::min<int>(JVET_C0024_ZERO_OUT_TH, uiHeight); -#endif int bitsX = 0; int bitsY = 0; int ctxId; @@ -1019,7 +973,12 @@ void QuantRDOQ::xRateDistOptQuant(TransformUnit &tu, const ComponentID &compID, d64BaseCost -= pdCostCoeffGroupSig [ iCGScanPos ]; if (cctx.isSigGroup( iCGScanPos ) ) { - for (int iScanPosinCG = iCGSizeM1; iScanPosinCG >= 0; iScanPosinCG--) + uint32_t maxNonZeroPosInCG = iCGSizeM1; + if( lfnstIdx > 0 && ( ( uiWidth == 4 && uiHeight == 4 ) || ( uiWidth == 8 && uiHeight == 8 && cctx.cgPosX() == 0 && cctx.cgPosY() == 0 ) ) ) + { + maxNonZeroPosInCG = 7; + } + for( int iScanPosinCG = maxNonZeroPosInCG; iScanPosinCG >= 0; iScanPosinCG-- ) { iScanPos = iCGScanPos * (iCGSizeM1 + 1) + iScanPosinCG; @@ -1033,11 +992,7 @@ void QuantRDOQ::xRateDistOptQuant(TransformUnit &tu, const ComponentID &compID, { uint32_t uiPosY = uiBlkPos >> uiLog2BlockWidth; uint32_t uiPosX = uiBlkPos - ( uiPosY << uiLog2BlockWidth ); -#if HEVC_USE_MDCS - double d64CostLast = ( cctx.scanType() == SCAN_VER ? xGetRateLast( lastBitsX, lastBitsY, uiPosY, uiPosX ) : xGetRateLast( lastBitsX, lastBitsY, uiPosX, uiPosY ) ); -#else double d64CostLast = xGetRateLast( lastBitsX, lastBitsY, uiPosX, uiPosY ); -#endif double totalCost = d64BaseCost + d64CostLast - pdCostSig[ iScanPos ]; @@ -1085,16 +1040,11 @@ void QuantRDOQ::xRateDistOptQuant(TransformUnit &tu, const ComponentID &compID, piDstCoeff[ cctx.blockPos( scanPos ) ] = 0; } -#if HEVC_USE_SIGN_HIDING if( cctx.signHiding() && uiAbsSum>=2) { - const double inverseQuantScale = double(g_invQuantScales[cQP.rem]); - int64_t rdFactor = (int64_t)(inverseQuantScale * inverseQuantScale * (1 << (2 * cQP.per)) / m_dLambda / 16 - / (1 << (2 * DISTORTION_PRECISION_ADJUSTMENT(channelBitDepth))) -#if HM_QTBT_AS_IN_JEM_QUANT -#else - * blkErrScale -#endif + const double inverseQuantScale = double(g_invQuantScales[0][cQP.rem(isTransformSkip)]); + int64_t rdFactor = (int64_t)(inverseQuantScale * inverseQuantScale * (1 << (2 * cQP.per(isTransformSkip))) / m_dLambda / 16 + / (1 << (2 * DISTORTION_PRECISION_ADJUSTMENT(channelBitDepth))) + 0.5); int lastCG = -1; @@ -1220,7 +1170,757 @@ void QuantRDOQ::xRateDistOptQuant(TransformUnit &tu, const ComponentID &compID, } } } -#endif +} + +void QuantRDOQ::xRateDistOptQuantTS( TransformUnit &tu, const ComponentID &compID, const CCoeffBuf &coeffs, TCoeff &absSum, const QpParam &qp, const Ctx &ctx ) +{ + const FracBitsAccess& fracBits = ctx.getFracBitsAcess(); + + const SPS &sps = *tu.cs->sps; + const CompArea &rect = tu.blocks[compID]; + const uint32_t width = rect.width; + const uint32_t height = rect.height; + const ChannelType chType = toChannelType(compID); + const int channelBitDepth = sps.getBitDepth( chType ); + + const bool extendedPrecision = sps.getSpsRangeExtension().getExtendedPrecisionProcessingFlag(); + const int maxLog2TrDynamicRange = sps.getMaxLog2TrDynamicRange(chType); + + int transformShift = getTransformShift( channelBitDepth, rect.size(), maxLog2TrDynamicRange ); + + if( extendedPrecision ) + { + transformShift = std::max<int>( 0, transformShift ); + } + + double blockUncodedCost = 0; + const uint32_t maxNumCoeff = rect.area(); + + CHECK( compID >= MAX_NUM_TBLOCKS, "Invalid component ID" ); + + int scalingListType = getScalingListType( tu.cu->predMode, compID ); + CHECK( scalingListType >= SCALING_LIST_NUM, "Invalid scaling list" ); + + const TCoeff *srcCoeff = coeffs.buf; + TCoeff *dstCoeff = tu.getCoeffs( compID ).buf; + + double *costCoeff = m_pdCostCoeff; + double *costSig = m_pdCostSig; + double *costCoeff0 = m_pdCostCoeff0; + + memset( m_pdCostCoeff, 0, sizeof( double ) * maxNumCoeff ); + memset( m_pdCostSig, 0, sizeof( double ) * maxNumCoeff ); + + m_bdpcm = 0; + + const bool needsSqrt2Scale = TU::needsSqrt2Scale( tu, compID ); // should always be false - transform-skipped blocks don't require sqrt(2) compensation. + const bool isTransformSkip = (tu.mtsIdx[compID] == MTS_SKIP); + const int qBits = QUANT_SHIFT + qp.per(isTransformSkip) + (isTransformSkip ? 0 : transformShift) + (needsSqrt2Scale ? -1 : 0); // Right shift of non-RDOQ quantizer; level = (coeff*uiQ + offset)>>q_bits + const int quantisationCoefficient = g_quantScales[needsSqrt2Scale?1:0][qp.rem(isTransformSkip)]; + const double errorScale = xGetErrScaleCoeff( TU::needsSqrt2Scale(tu, compID), width, height, qp.rem(isTransformSkip), maxLog2TrDynamicRange, channelBitDepth, isTransformSkip); + + const TCoeff entropyCodingMaximum = ( 1 << maxLog2TrDynamicRange ) - 1; + + uint32_t coeffLevels[3]; + double coeffLevelError[4]; + + CoeffCodingContext cctx( tu, compID, tu.cs->picHeader->getSignDataHidingEnabledFlag() ); + const int sbSizeM1 = ( 1 << cctx.log2CGSize() ) - 1; + double baseCost = 0; + uint32_t goRiceParam = 0; + + double *costSigSubBlock = m_pdCostCoeffGroupSig; + memset( costSigSubBlock, 0, ( maxNumCoeff >> cctx.log2CGSize() ) * sizeof( double ) ); + + const int sbNum = width * height >> cctx.log2CGSize(); + int scanPos; + coeffGroupRDStats rdStats; + + bool anySigCG = false; + + int maxCtxBins = (cctx.maxNumCoeff() * 7) >> 2; + cctx.setNumCtxBins(maxCtxBins); + + for( int sbId = 0; sbId < sbNum; sbId++ ) + { + cctx.initSubblock( sbId ); + + int noCoeffCoded = 0; + baseCost = 0.0; + memset( &rdStats, 0, sizeof (coeffGroupRDStats)); + + rdStats.iNumSbbCtxBins = 0; + + for( int scanPosInSB = 0; scanPosInSB <= sbSizeM1; scanPosInSB++ ) + { + int lastPosCoded = sbSizeM1; + scanPos = cctx.minSubPos() + scanPosInSB; + //===== quantization ===== + uint32_t blkPos = cctx.blockPos( scanPos ); + + // set coeff + const int64_t tmpLevel = int64_t( abs( srcCoeff[blkPos] ) ) * quantisationCoefficient; + const Intermediate_Int levelDouble = (Intermediate_Int)std::min<int64_t>( tmpLevel, std::numeric_limits<Intermediate_Int>::max() - ( Intermediate_Int( 1 ) << ( qBits - 1 ) ) ); + + uint32_t roundAbsLevel = std::min<uint32_t>(uint32_t(entropyCodingMaximum), uint32_t((levelDouble + (Intermediate_Int(1) << (qBits - 1))) >> qBits)); + uint32_t minAbsLevel = (roundAbsLevel > 1 ? roundAbsLevel - 1 : 1); + + uint32_t downAbsLevel = std::min<uint32_t>(uint32_t(entropyCodingMaximum), uint32_t(levelDouble >> qBits)); + uint32_t upAbsLevel = std::min<uint32_t>(uint32_t(entropyCodingMaximum), downAbsLevel + 1); + + m_testedLevels = 0; + coeffLevels[m_testedLevels++] = roundAbsLevel; + + if (minAbsLevel != roundAbsLevel) + coeffLevels[m_testedLevels++] = minAbsLevel; + + int rightPixel, belowPixel, predPixel; + + cctx.neighTS(rightPixel, belowPixel, scanPos, dstCoeff); + predPixel = cctx.deriveModCoeff(rightPixel, belowPixel, upAbsLevel, 0); + + if (upAbsLevel != roundAbsLevel && upAbsLevel != minAbsLevel && predPixel == 1) + coeffLevels[m_testedLevels++] = upAbsLevel; + + double dErr = double(levelDouble); + coeffLevelError[0] = dErr * dErr * errorScale; + + costCoeff0[scanPos] = coeffLevelError[0]; + blockUncodedCost += costCoeff0[ scanPos ]; + dstCoeff[blkPos] = coeffLevels[0]; + + //===== coefficient level estimation ===== + unsigned ctxIdSig = cctx.sigCtxIdAbsTS( scanPos, dstCoeff ); + uint32_t cLevel; + const BinFracBits fracBitsPar = fracBits.getFracBitsArray( cctx.parityCtxIdAbsTS() ); + + goRiceParam = cctx.templateAbsSumTS( scanPos, dstCoeff ); + unsigned ctxIdSign = cctx.signCtxIdAbsTS(scanPos, dstCoeff, 0); + const BinFracBits fracBitsSign = fracBits.getFracBitsArray(ctxIdSign); + const uint8_t sign = srcCoeff[ blkPos ] < 0 ? 1 : 0; + + DTRACE_COND( ( coeffLevels[0] != 0 ), g_trace_ctx, D_RDOQ_MORE, " uiCtxSig=%d", ctxIdSig ); + + unsigned gt1CtxId = cctx.lrg1CtxIdAbsTS(scanPos, dstCoeff, 0); + const BinFracBits fracBitsGr1 = fracBits.getFracBitsArray(gt1CtxId); + + const BinFracBits fracBitsSig = fracBits.getFracBitsArray( ctxIdSig ); + bool lastCoeff = false; // + if (scanPosInSB == lastPosCoded && noCoeffCoded == 0) + { + lastCoeff = true; + } + int numUsedCtxBins = 0; + cLevel = xGetCodedLevelTSPred(costCoeff[scanPos], costCoeff0[scanPos], costSig[scanPos], levelDouble, qBits, errorScale, coeffLevels, coeffLevelError, + &fracBitsSig, fracBitsPar, cctx, fracBits, fracBitsSign, fracBitsGr1, sign, rightPixel, belowPixel, goRiceParam, lastCoeff, extendedPrecision, maxLog2TrDynamicRange, numUsedCtxBins); + + cctx.decimateNumCtxBins(numUsedCtxBins); + rdStats.iNumSbbCtxBins += numUsedCtxBins; + + + if (cLevel > 0) + { + noCoeffCoded++; + } + + TCoeff level = cLevel; + dstCoeff[blkPos] = (level != 0 && srcCoeff[blkPos] < 0) ? -level : level; + baseCost += costCoeff[ scanPos ]; + rdStats.d64SigCost += costSig[ scanPos ]; + + if( dstCoeff[ blkPos ] ) + { + cctx.setSigGroup(); + rdStats.d64CodedLevelandDist += costCoeff [ scanPos ] - costSig[ scanPos ]; + rdStats.d64UncodedDist += costCoeff0[ scanPos ]; + } + } //end for (iScanPosinCG) + + if( !cctx.isSigGroup() ) + { + const BinFracBits fracBitsSigGroup = fracBits.getFracBitsArray( cctx.sigGroupCtxId( true ) ); + baseCost += xGetRateSigCoeffGroup( fracBitsSigGroup, 0 ) - rdStats.d64SigCost; + costSigSubBlock[cctx.subSetId()] = xGetRateSigCoeffGroup( fracBitsSigGroup, 0 ); + cctx.increaseNumCtxBins(rdStats.iNumSbbCtxBins); // skip sub-block + } + else if( sbId != sbNum - 1 || anySigCG ) + { + // rd-cost if SigCoeffGroupFlag = 0, initialization + double costZeroSB = baseCost; + + const BinFracBits fracBitsSigGroup = fracBits.getFracBitsArray( cctx.sigGroupCtxId( true ) ); + + baseCost += xGetRateSigCoeffGroup( fracBitsSigGroup, 1 ); + costZeroSB += xGetRateSigCoeffGroup( fracBitsSigGroup, 0 ); + costSigSubBlock[ cctx.subSetId() ] = xGetRateSigCoeffGroup( fracBitsSigGroup, 1 ); + + costZeroSB += rdStats.d64UncodedDist; // distortion for resetting non-zero levels to zero levels + costZeroSB -= rdStats.d64CodedLevelandDist; // distortion and level cost for keeping all non-zero levels + costZeroSB -= rdStats.d64SigCost; // sig cost for all coeffs, including zero levels and non-zerl levels + + if( costZeroSB < baseCost ) + { + cctx.resetSigGroup(); + baseCost = costZeroSB; + costSigSubBlock[ cctx.subSetId() ] = xGetRateSigCoeffGroup( fracBitsSigGroup, 0 ); + cctx.increaseNumCtxBins(rdStats.iNumSbbCtxBins); // skip sub-block + for( int scanPosInSB = 0; scanPosInSB <= sbSizeM1; scanPosInSB++ ) + { + scanPos = cctx.minSubPos() + scanPosInSB; + uint32_t blkPos = cctx.blockPos( scanPos ); + + if( dstCoeff[ blkPos ] ) + { + dstCoeff[ blkPos ] = 0; + costCoeff[ scanPos ] = costCoeff0[ scanPos ]; + costSig[ scanPos] = 0; + } + } + } + else + { + anySigCG = true; + } + } + } + + //===== estimate last position ===== + for( int scanPos = 0; scanPos < maxNumCoeff; scanPos++ ) + { + int blkPos = cctx.blockPos( scanPos ); + TCoeff level = dstCoeff[ blkPos ]; + absSum += abs(level); + } +} + +void QuantRDOQ::forwardRDPCM( TransformUnit &tu, const ComponentID &compID, const CCoeffBuf &coeffs, TCoeff &absSum, const QpParam &qp, const Ctx &ctx ) +{ + const FracBitsAccess& fracBits = ctx.getFracBitsAcess(); + + const SPS &sps = *tu.cs->sps; + const CompArea &rect = tu.blocks[compID]; + const uint32_t width = rect.width; + const uint32_t height = rect.height; + const ChannelType chType = toChannelType(compID); + const int channelBitDepth = sps.getBitDepth(chType); + + const bool extendedPrecision = sps.getSpsRangeExtension().getExtendedPrecisionProcessingFlag(); + const int maxLog2TrDynamicRange = sps.getMaxLog2TrDynamicRange(chType); + const int dirMode = isLuma(compID) ? tu.cu->bdpcmMode : tu.cu->bdpcmModeChroma; + int transformShift = getTransformShift(channelBitDepth, rect.size(), maxLog2TrDynamicRange); + + if (extendedPrecision) + { + transformShift = std::max<int>(0, transformShift); + } + + double blockUncodedCost = 0; + const uint32_t maxNumCoeff = rect.area(); + + CHECK(compID >= MAX_NUM_TBLOCKS, "Invalid component ID"); + + int scalingListType = getScalingListType(tu.cu->predMode, compID); + CHECK(scalingListType >= SCALING_LIST_NUM, "Invalid scaling list"); + + const TCoeff *srcCoeff = coeffs.buf; + TCoeff *dstCoeff = tu.getCoeffs(compID).buf; + + double *costCoeff = m_pdCostCoeff; + double *costSig = m_pdCostSig; + double *costCoeff0 = m_pdCostCoeff0; + + memset(m_pdCostCoeff, 0, sizeof(double) * maxNumCoeff); + memset(m_pdCostSig, 0, sizeof(double) * maxNumCoeff); + memset(m_fullCoeff, 0, sizeof(TCoeff) * maxNumCoeff); + + m_bdpcm = dirMode; + + const bool needsSqrt2Scale = TU::needsSqrt2Scale(tu, compID); // should always be false - transform-skipped blocks don't require sqrt(2) compensation. + const bool isTransformSkip = (tu.mtsIdx[compID] == MTS_SKIP); + const int qBits = QUANT_SHIFT + qp.per(isTransformSkip) + (isTransformSkip? 0 : transformShift) + ( needsSqrt2Scale ? -1 : 0); // Right shift of non-RDOQ quantizer; level = (coeff*uiQ + offset)>>q_bits + const int quantisationCoefficient = g_quantScales[needsSqrt2Scale ? 1 : 0][qp.rem(isTransformSkip)]; + const double errorScale = xGetErrScaleCoeff(TU::needsSqrt2Scale(tu, compID), width, height, qp.rem(isTransformSkip), maxLog2TrDynamicRange, channelBitDepth, isTransformSkip); + TrQuantParams trQuantParams; + trQuantParams.rightShift = (IQUANT_SHIFT - ((isTransformSkip ? 0 : transformShift) + qp.per(isTransformSkip))); + trQuantParams.qScale = g_invQuantScales[needsSqrt2Scale ? 1 : 0][qp.rem(isTransformSkip)]; + + const TCoeff entropyCodingMaximum = (1 << maxLog2TrDynamicRange) - 1; + + uint32_t coeffLevels[3]; + double coeffLevelError[4]; + + CoeffCodingContext cctx(tu, compID, tu.cs->picHeader->getSignDataHidingEnabledFlag()); + const int sbSizeM1 = (1 << cctx.log2CGSize()) - 1; + double baseCost = 0; + uint32_t goRiceParam = 0; + + double *costSigSubBlock = m_pdCostCoeffGroupSig; + memset(costSigSubBlock, 0, (maxNumCoeff >> cctx.log2CGSize()) * sizeof(double)); + + const int sbNum = width * height >> cctx.log2CGSize(); + int scanPos; + coeffGroupRDStats rdStats; + + bool anySigCG = false; + + int maxCtxBins = (cctx.maxNumCoeff() * 7) >> 2; + cctx.setNumCtxBins(maxCtxBins); + + for (int sbId = 0; sbId < sbNum; sbId++) + { + cctx.initSubblock(sbId); + + int noCoeffCoded = 0; + baseCost = 0.0; + memset(&rdStats, 0, sizeof(coeffGroupRDStats)); + rdStats.iNumSbbCtxBins = 0; + + for (int scanPosInSB = 0; scanPosInSB <= sbSizeM1; scanPosInSB++) + { + int lastPosCoded = sbSizeM1; + scanPos = cctx.minSubPos() + scanPosInSB; + //===== quantization ===== + uint32_t blkPos = cctx.blockPos(scanPos); + + const int posX = cctx.posX(scanPos); + const int posY = cctx.posY(scanPos); + const int posS = (1 == dirMode) ? posX : posY; + const int posNb = (1 == dirMode) ? (posX - 1) + posY * coeffs.stride : posX + (posY - 1) * coeffs.stride; + TCoeff predCoeff = (0 != posS) ? m_fullCoeff[posNb] : 0; + + // set coeff + const int64_t tmpLevel = int64_t(abs(srcCoeff[blkPos] - predCoeff)) * quantisationCoefficient; + const Intermediate_Int levelDouble = (Intermediate_Int)std::min<int64_t>(tmpLevel, std::numeric_limits<Intermediate_Int>::max() - (Intermediate_Int(1) << (qBits - 1))); + uint32_t roundAbsLevel = std::min<uint32_t>(uint32_t(entropyCodingMaximum), uint32_t((levelDouble + (Intermediate_Int(1) << (qBits - 1))) >> qBits)); + uint32_t minAbsLevel = (roundAbsLevel > 1 ? roundAbsLevel - 1 : 1); + + m_testedLevels = 0; + coeffLevels[m_testedLevels++] = roundAbsLevel; + + if (minAbsLevel != roundAbsLevel) + coeffLevels[m_testedLevels++] = minAbsLevel; + + double dErr = double(levelDouble); + coeffLevelError[0] = dErr * dErr * errorScale; + + costCoeff0[scanPos] = coeffLevelError[0]; + blockUncodedCost += costCoeff0[scanPos]; + dstCoeff[blkPos] = coeffLevels[0]; + + //===== coefficient level estimation ===== + unsigned ctxIdSig = cctx.sigCtxIdAbsTS(scanPos, dstCoeff); + uint32_t cLevel; + const BinFracBits fracBitsPar = fracBits.getFracBitsArray(cctx.parityCtxIdAbsTS()); + + goRiceParam = cctx.templateAbsSumTS(scanPos, dstCoeff); + unsigned ctxIdSign = cctx.signCtxIdAbsTS(scanPos, dstCoeff, dirMode); + const BinFracBits fracBitsSign = fracBits.getFracBitsArray(ctxIdSign); + const uint8_t sign = srcCoeff[blkPos] - predCoeff < 0 ? 1 : 0; + unsigned gt1CtxId = cctx.lrg1CtxIdAbsTS(scanPos, dstCoeff, dirMode); + const BinFracBits fracBitsGr1 = fracBits.getFracBitsArray(gt1CtxId); + + DTRACE_COND((dstCoeff[blkPos] != 0), g_trace_ctx, D_RDOQ_MORE, " uiCtxSig=%d", ctxIdSig); + + const BinFracBits fracBitsSig = fracBits.getFracBitsArray(ctxIdSig); + bool lastCoeff = false; // + if (scanPosInSB == lastPosCoded && noCoeffCoded == 0) + { + lastCoeff = true; + } + int rightPixel, belowPixel; + cctx.neighTS(rightPixel, belowPixel, scanPos, dstCoeff); + int numUsedCtxBins = 0; + cLevel = xGetCodedLevelTSPred(costCoeff[scanPos], costCoeff0[scanPos], costSig[scanPos], levelDouble, qBits, errorScale, coeffLevels, coeffLevelError, + &fracBitsSig, fracBitsPar, cctx, fracBits, fracBitsSign, fracBitsGr1, sign, rightPixel, belowPixel, goRiceParam, lastCoeff, extendedPrecision, maxLog2TrDynamicRange, numUsedCtxBins); + cctx.decimateNumCtxBins(numUsedCtxBins); + rdStats.iNumSbbCtxBins += numUsedCtxBins; + if (cLevel > 0) + { + noCoeffCoded++; + } + dstCoeff[blkPos] = cLevel; + + if (sign) + { + dstCoeff[blkPos] = -dstCoeff[blkPos]; + } + xDequantSample( m_fullCoeff[blkPos], dstCoeff[blkPos], trQuantParams ); + m_fullCoeff[blkPos] += predCoeff; + + baseCost += costCoeff[scanPos]; + rdStats.d64SigCost += costSig[scanPos]; + + if (dstCoeff[blkPos]) + { + cctx.setSigGroup(); + rdStats.d64CodedLevelandDist += costCoeff[scanPos] - costSig[scanPos]; + rdStats.d64UncodedDist += costCoeff0[scanPos]; + } + } //end for (iScanPosinCG) + + if (!cctx.isSigGroup()) + { + const BinFracBits fracBitsSigGroup = fracBits.getFracBitsArray(cctx.sigGroupCtxId(true)); + baseCost += xGetRateSigCoeffGroup(fracBitsSigGroup, 0) - rdStats.d64SigCost; + costSigSubBlock[cctx.subSetId()] = xGetRateSigCoeffGroup(fracBitsSigGroup, 0); + cctx.increaseNumCtxBins(rdStats.iNumSbbCtxBins); // skip sub-block + } + else if (sbId != sbNum - 1 || anySigCG) + { + // rd-cost if SigCoeffGroupFlag = 0, initialization + double costZeroSB = baseCost; + + const BinFracBits fracBitsSigGroup = fracBits.getFracBitsArray(cctx.sigGroupCtxId(true)); + + baseCost += xGetRateSigCoeffGroup(fracBitsSigGroup, 1); + costZeroSB += xGetRateSigCoeffGroup(fracBitsSigGroup, 0); + costSigSubBlock[cctx.subSetId()] = xGetRateSigCoeffGroup(fracBitsSigGroup, 1); + + costZeroSB += rdStats.d64UncodedDist; // distortion for resetting non-zero levels to zero levels + costZeroSB -= rdStats.d64CodedLevelandDist; // distortion and level cost for keeping all non-zero levels + costZeroSB -= rdStats.d64SigCost; // sig cost for all coeffs, including zero levels and non-zerl levels + + if (costZeroSB < baseCost) + { + cctx.resetSigGroup(); + baseCost = costZeroSB; + costSigSubBlock[cctx.subSetId()] = xGetRateSigCoeffGroup(fracBitsSigGroup, 0); + cctx.increaseNumCtxBins(rdStats.iNumSbbCtxBins); // skip sub-block + + for (int scanPosInSB = 0; scanPosInSB <= sbSizeM1; scanPosInSB++) + { + scanPos = cctx.minSubPos() + scanPosInSB; + uint32_t blkPos = cctx.blockPos(scanPos); + + const int posX = cctx.posX(scanPos); + const int posY = cctx.posY(scanPos); + const int posS = (1 == dirMode) ? posX : posY; + const int posNb = (1 == dirMode) ? (posX - 1) + posY * coeffs.stride : posX + (posY - 1) * coeffs.stride; + m_fullCoeff[scanPos] = (0 != posS) ? m_fullCoeff[posNb] : 0; + + if (dstCoeff[blkPos]) + { + dstCoeff[blkPos] = 0; + costCoeff[scanPos] = costCoeff0[scanPos]; + costSig[scanPos] = 0; + } + } + } + else + { + anySigCG = true; + } + } + } + + //===== estimate last position ===== + for (int scanPos = 0; scanPos < maxNumCoeff; scanPos++) + { + int blkPos = cctx.blockPos(scanPos); + TCoeff level = dstCoeff[blkPos]; + absSum += abs(level); + } +} + +void QuantRDOQ::xDequantSample(TCoeff& pRes, TCoeff& coeff, const TrQuantParams& trQuantParams) +{ + // xDequant + if (trQuantParams.rightShift > 0) + { + const Intermediate_Int qAdd = Intermediate_Int(1) << (trQuantParams.rightShift - 1); + pRes = TCoeff((Intermediate_Int(coeff) * trQuantParams.qScale + qAdd) >> trQuantParams.rightShift); + } + else + { + pRes = TCoeff((Intermediate_Int(coeff) * trQuantParams.qScale) << -trQuantParams.rightShift); + } +} + +inline uint32_t QuantRDOQ::xGetCodedLevelTSPred(double& rd64CodedCost, + double& rd64CodedCost0, + double& rd64CodedCostSig, + Intermediate_Int levelDouble, + int qBits, + double errorScale, + uint32_t coeffLevels[], + double coeffLevelError[], + const BinFracBits* fracBitsSig, + const BinFracBits& fracBitsPar, + CoeffCodingContext& cctx, + const FracBitsAccess& fracBitsAccess, + const BinFracBits& fracBitsSign, + const BinFracBits& fracBitsGt1, + const uint8_t sign, + int rightPixel, + int belowPixel, + uint16_t ricePar, + bool isLast, + bool useLimitedPrefixLength, + const int maxLog2TrDynamicRange + , int& numUsedCtxBins +) const +{ + double currCostSig = 0; + uint32_t bestAbsLevel = 0; + numUsedCtxBins = 0; + int numBestCtxBin = 0; + if (!isLast && coeffLevels[0] < 3) + { + if (cctx.numCtxBins() >= 4) + rd64CodedCostSig = xGetRateSigCoef(*fracBitsSig, 0); + else + rd64CodedCostSig = xGetICost(1 << SCALE_BITS); + rd64CodedCost = rd64CodedCost0 + rd64CodedCostSig; + if (cctx.numCtxBins() >= 4) + numUsedCtxBins++; + if (coeffLevels[0] == 0) + { + return bestAbsLevel; + } + } + else + { + rd64CodedCost = MAX_DOUBLE; + } + + if (!isLast) + { + if (cctx.numCtxBins() >= 4) + currCostSig = xGetRateSigCoef(*fracBitsSig, 1); + else + currCostSig = xGetICost(1 << SCALE_BITS); + if (coeffLevels[0] >= 3 && cctx.numCtxBins() >= 4) + numUsedCtxBins++; + } + + for (int errorInd = 1; errorInd <= m_testedLevels; errorInd++) + { + int absLevel = coeffLevels[errorInd - 1]; + double dErr = 0.0; + dErr = double(levelDouble - (Intermediate_Int(absLevel) << qBits)); + coeffLevelError[errorInd] = dErr * dErr * errorScale; + int modAbsLevel = absLevel; + if (cctx.numCtxBins() >= 4) + { + modAbsLevel = cctx.deriveModCoeff(rightPixel, belowPixel, absLevel, m_bdpcm); + } + int numCtxBins = 0; + double dCurrCost = coeffLevelError[errorInd] + xGetICost(xGetICRateTS(modAbsLevel, fracBitsPar, cctx, fracBitsAccess, fracBitsSign, fracBitsGt1, numCtxBins, sign, ricePar, useLimitedPrefixLength, maxLog2TrDynamicRange)); + + if (cctx.numCtxBins() >= 4) + dCurrCost += currCostSig; // if cctx.numCtxBins < 4, xGetICRateTS return rate including sign cost. dont need to add any more + + if (dCurrCost < rd64CodedCost) + { + bestAbsLevel = absLevel; + rd64CodedCost = dCurrCost; + rd64CodedCostSig = currCostSig; + numBestCtxBin = numCtxBins; + } + } + numUsedCtxBins += numBestCtxBin; + return bestAbsLevel; +} + +inline int QuantRDOQ::xGetICRateTS( const uint32_t absLevel, + const BinFracBits& fracBitsPar, + const CoeffCodingContext& cctx, + const FracBitsAccess& fracBitsAccess, + const BinFracBits& fracBitsSign, + const BinFracBits& fracBitsGt1, + int& numCtxBins, + const uint8_t sign, + const uint16_t ricePar, + const bool useLimitedPrefixLength, + const int maxLog2TrDynamicRange ) const +{ + + + if (cctx.numCtxBins() < 4) // Full by-pass coding + { + int rate = absLevel ? (1 << SCALE_BITS) : 0; // 1 bit to signal sign of non-zero + + uint32_t symbol = absLevel; + + uint32_t length; + const int threshold = COEF_REMAIN_BIN_REDUCTION; + if (symbol < (threshold << ricePar)) + { + length = symbol >> ricePar; + rate += (length + 1 + ricePar) << SCALE_BITS; + } + else if (useLimitedPrefixLength) + { + const uint32_t maximumPrefixLength = (32 - (COEF_REMAIN_BIN_REDUCTION + maxLog2TrDynamicRange)); + + uint32_t prefixLength = 0; + uint32_t suffix = (symbol >> ricePar) - COEF_REMAIN_BIN_REDUCTION; + + while ((prefixLength < maximumPrefixLength) && (suffix > ((2 << prefixLength) - 2))) + { + prefixLength++; + } + + const uint32_t suffixLength = (prefixLength == maximumPrefixLength) ? (maxLog2TrDynamicRange - ricePar) : (prefixLength + 1/*separator*/); + + rate += (COEF_REMAIN_BIN_REDUCTION + prefixLength + suffixLength + ricePar) << SCALE_BITS; + } + else + { + length = ricePar; + symbol = symbol - (threshold << ricePar); + while (symbol >= (1 << length)) + { + symbol -= (1 << (length++)); + } + rate += (threshold + length + 1 - ricePar + length) << SCALE_BITS; + } + + return rate; + } + + else if (cctx.numCtxBins() >= 4 && cctx.numCtxBins() < 8) // First pass context coding and all by-pass coding ( Sign flag is not counted here) + { + int rate = fracBitsSign.intBits[sign]; // sign bits + if (absLevel) + numCtxBins++; + + if (absLevel > 1) + { + rate += fracBitsGt1.intBits[1]; + rate += fracBitsPar.intBits[(absLevel - 2) & 1]; + + numCtxBins += 2; + + int cutoffVal = 2; + + if (absLevel >= cutoffVal) + { + uint32_t symbol = (absLevel - cutoffVal) >> 1; + uint32_t length; + const int threshold = COEF_REMAIN_BIN_REDUCTION; + if (symbol < (threshold << ricePar)) + { + length = symbol >> ricePar; + rate += (length + 1 + ricePar) << SCALE_BITS; + } + else if (useLimitedPrefixLength) + { + const uint32_t maximumPrefixLength = (32 - (COEF_REMAIN_BIN_REDUCTION + maxLog2TrDynamicRange)); + + uint32_t prefixLength = 0; + uint32_t suffix = (symbol >> ricePar) - COEF_REMAIN_BIN_REDUCTION; + + while ((prefixLength < maximumPrefixLength) && (suffix > ((2 << prefixLength) - 2))) + { + prefixLength++; + } + + const uint32_t suffixLength = (prefixLength == maximumPrefixLength) ? (maxLog2TrDynamicRange - ricePar) : (prefixLength + 1/*separator*/); + + rate += (COEF_REMAIN_BIN_REDUCTION + prefixLength + suffixLength + ricePar) << SCALE_BITS; + } + else + { + length = ricePar; + symbol = symbol - (threshold << ricePar); + while (symbol >= (1 << length)) + { + symbol -= (1 << (length++)); + } + rate += (threshold + length + 1 - ricePar + length) << SCALE_BITS; + } + } + } + else if (absLevel == 1) + { + rate += fracBitsGt1.intBits[0]; + numCtxBins++; + } + else + { + rate = 0; + } + return rate; + + } + + + + int rate = fracBitsSign.intBits[sign]; + + if (absLevel) + numCtxBins++; + + if( absLevel > 1 ) + { + rate += fracBitsGt1.intBits[1]; + rate += fracBitsPar.intBits[( absLevel - 2 ) & 1]; + numCtxBins += 2; + + int cutoffVal = 2; + const int numGtBins = 4; + for( int i = 0; i < numGtBins; i++ ) + { + if( absLevel >= cutoffVal ) + { + const uint16_t ctxGtX = cctx.greaterXCtxIdAbsTS( cutoffVal>>1 ); + const BinFracBits &fracBitsGtX = fracBitsAccess.getFracBitsArray( ctxGtX ); + unsigned gtX = ( absLevel >= ( cutoffVal + 2 ) ); + rate += fracBitsGtX.intBits[gtX]; + numCtxBins++; + } + cutoffVal += 2; + } + + if( absLevel >= cutoffVal ) + { + uint32_t symbol = ( absLevel - cutoffVal ) >> 1; + uint32_t length; + const int threshold = COEF_REMAIN_BIN_REDUCTION; + if( symbol < ( threshold << ricePar ) ) + { + length = symbol >> ricePar; + rate += ( length + 1 + ricePar ) << SCALE_BITS; + } + else if( useLimitedPrefixLength ) + { + const uint32_t maximumPrefixLength = ( 32 - ( COEF_REMAIN_BIN_REDUCTION + maxLog2TrDynamicRange ) ); + + uint32_t prefixLength = 0; + uint32_t suffix = ( symbol >> ricePar ) - COEF_REMAIN_BIN_REDUCTION; + + while( ( prefixLength < maximumPrefixLength ) && ( suffix > ( ( 2 << prefixLength ) - 2 ) ) ) + { + prefixLength++; + } + + const uint32_t suffixLength = ( prefixLength == maximumPrefixLength ) ? ( maxLog2TrDynamicRange - ricePar ) : ( prefixLength + 1/*separator*/ ); + + rate += ( COEF_REMAIN_BIN_REDUCTION + prefixLength + suffixLength + ricePar ) << SCALE_BITS; + } + else + { + length = ricePar; + symbol = symbol - ( threshold << ricePar ); + while( symbol >= ( 1 << length ) ) + { + symbol -= ( 1 << ( length++ ) ); + } + rate += ( threshold + length + 1 - ricePar + length ) << SCALE_BITS; + } + } + } + else if( absLevel == 1 ) + { + rate += fracBitsGt1.intBits[0]; + numCtxBins++; + } + else + { + rate = 0; + } + return rate; } //! \} diff --git a/source/Lib/CommonLib/QuantRDOQ.h b/source/Lib/CommonLib/QuantRDOQ.h index a51bdca698c3c03bada83ab39ef21562d4a4609c..ec3ca1c6db1b570841e8fa3e779e886118365a76 100644 --- a/source/Lib/CommonLib/QuantRDOQ.h +++ b/source/Lib/CommonLib/QuantRDOQ.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -61,24 +61,20 @@ public: ~QuantRDOQ(); public: -#if HEVC_USE_SCALING_LISTS void setFlatScalingList ( const int maxLog2TrDynamicRange[MAX_NUM_CHANNEL_TYPE], const BitDepths &bitDepths ); void setScalingList ( ScalingList *scalingList, const int maxLog2TrDynamicRange[MAX_NUM_CHANNEL_TYPE], const BitDepths &bitDepths); -#endif // quantization void quant ( TransformUnit &tu, const ComponentID &compID, const CCoeffBuf &pSrc, TCoeff &uiAbsSum, const QpParam &cQP, const Ctx& ctx ); + void forwardRDPCM ( TransformUnit &tu, const ComponentID &compID, const CCoeffBuf &pSrc, TCoeff &uiAbsSum, const QpParam &cQP, const Ctx &ctx ); private: -#if HEVC_USE_SCALING_LISTS - double* xGetErrScaleCoeff ( uint32_t list, uint32_t sizeX, uint32_t sizeY, int qp ) { return m_errScale [sizeX][sizeY][list][qp]; }; //!< get Error Scale Coefficent + double* xGetErrScaleCoeffSL ( uint32_t list, uint32_t sizeX, uint32_t sizeY, int qp ) { return m_errScale[sizeX][sizeY][list][qp]; }; //!< get Error Scale Coefficent + double xGetErrScaleCoeff ( const bool needsSqrt2, SizeType width, SizeType height, int qp, const int maxLog2TrDynamicRange, const int channelBitDepth, bool bTransformSkip); double& xGetErrScaleCoeffNoScalingList ( uint32_t list, uint32_t sizeX, uint32_t sizeY, int qp ) { return m_errScaleNoScalingList[sizeX][sizeY][list][qp]; }; //!< get Error Scale Coefficent void xInitScalingList ( const QuantRDOQ* other ); void xDestroyScalingList (); void xSetErrScaleCoeff ( uint32_t list, uint32_t sizeX, uint32_t sizeY, int qp, const int maxLog2TrDynamicRange[MAX_NUM_CHANNEL_TYPE], const BitDepths &bitDepths ); -#else - double xGetErrScaleCoeff ( const bool needsSqrt2, SizeType width, SizeType height, int qp, const int maxLog2TrDynamicRange, const int channelBitDepth); -#endif - + void xDequantSample ( TCoeff& pRes, TCoeff& coeff, const TrQuantParams& trQuantParams ); // RDOQ functions void xRateDistOptQuant(TransformUnit &tu, const ComponentID &compID, const CCoeffBuf &pSrc, TCoeff &uiAbsSum, const QpParam &cQP, const Ctx &ctx); @@ -91,7 +87,6 @@ private: const BinFracBits& fracBitsPar, const BinFracBits& fracBitsGt1, const BinFracBits& fracBitsGt2, - const int remGt2Bins, const int remRegBins, unsigned goRiceZero, uint16_t ui16AbsGoRice, @@ -104,7 +99,6 @@ private: const BinFracBits& fracBitsPar, const BinFracBits& fracBitsGt1, const BinFracBits& fracBitsGt2, - const int remGt2Bins, const int remRegBins, unsigned goRiceZero, const uint16_t ui16AbsGoRice, @@ -120,24 +114,60 @@ private: inline double xGetICost ( double dRate ) const; inline double xGetIEPRate ( ) const; + void xRateDistOptQuantTS( TransformUnit &tu, const ComponentID &compID, const CCoeffBuf &coeffs, TCoeff &absSum, const QpParam &qp, const Ctx &ctx ); + + inline uint32_t xGetCodedLevelTSPred(double& rd64CodedCost, + double& rd64CodedCost0, + double& rd64CodedCostSig, + Intermediate_Int levelDouble, + int qBits, + double errorScale, + uint32_t coeffLevels[], + double coeffLevelError[], + const BinFracBits* fracBitsSig, + const BinFracBits& fracBitsPar, + CoeffCodingContext& cctx, + const FracBitsAccess& fracBitsAccess, + const BinFracBits& fracBitsSign, + const BinFracBits& fracBitsGt1, + const uint8_t sign, + int rightPixel, + int belowPixel, + uint16_t ricePar, + bool isLast, + bool useLimitedPrefixLength, + const int maxLog2TrDynamicRange + , int& numUsedCtxBins + ) const; + + inline int xGetICRateTS ( const uint32_t absLevel, + const BinFracBits& fracBitsPar, + const CoeffCodingContext& cctx, + const FracBitsAccess& fracBitsAccess, + const BinFracBits& fracBitsSign, + const BinFracBits& fracBitsGt1, + int& numCtxBins, + const uint8_t sign, + const uint16_t ricePar, + const bool useLimitedPrefixLength, + const int maxLog2TrDynamicRange ) const; private: -#if HEVC_USE_SCALING_LISTS bool m_isErrScaleListOwner; double *m_errScale [SCALING_LIST_SIZE_NUM][SCALING_LIST_SIZE_NUM][SCALING_LIST_NUM][SCALING_LIST_REM_NUM]; ///< array of quantization matrix coefficient 4x4 double m_errScaleNoScalingList[SCALING_LIST_SIZE_NUM][SCALING_LIST_SIZE_NUM][SCALING_LIST_NUM][SCALING_LIST_REM_NUM]; ///< array of quantization matrix coefficient 4x4 -#endif // temporary buffers for RDOQ double m_pdCostCoeff [MAX_TB_SIZEY * MAX_TB_SIZEY]; double m_pdCostSig [MAX_TB_SIZEY * MAX_TB_SIZEY]; double m_pdCostCoeff0 [MAX_TB_SIZEY * MAX_TB_SIZEY]; double m_pdCostCoeffGroupSig[(MAX_TB_SIZEY * MAX_TB_SIZEY) >> MLS_CG_SIZE]; // even if CG size is 2 (if one of the sides is 2) instead of 4, there should be enough space -#if HEVC_USE_SIGN_HIDING int m_rateIncUp [MAX_TB_SIZEY * MAX_TB_SIZEY]; int m_rateIncDown [MAX_TB_SIZEY * MAX_TB_SIZEY]; int m_sigRateDelta [MAX_TB_SIZEY * MAX_TB_SIZEY]; TCoeff m_deltaU [MAX_TB_SIZEY * MAX_TB_SIZEY]; -#endif + TCoeff m_fullCoeff [MAX_TB_SIZEY * MAX_TB_SIZEY]; + int m_bdpcm; + int m_testedLevels; };// END CLASS DEFINITION QuantRDOQ //! \} diff --git a/source/Lib/CommonLib/RdCost.cpp b/source/Lib/CommonLib/RdCost.cpp index 5c753e9537b20670e0eaa897f6c0c4e785ada977..7f48f5d5ea9fca804b4e8de7f230cb99dec5add3 100644 --- a/source/Lib/CommonLib/RdCost.cpp +++ b/source/Lib/CommonLib/RdCost.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -65,7 +65,12 @@ double RdCost::calcRdCost( uint64_t fracBits, Distortion distortion, bool useUna double RdCost::calcRdCost( uint64_t fracBits, Distortion distortion ) #endif { - +#if JVET_AHG14_LOSSLESS + if( m_costMode == COST_LOSSLESS_CODING && 0 != distortion ) + { + return MAX_DOUBLE; + } +#endif #if WCG_EXT return ( useUnadjustedLambda ? m_DistScaleUnadjusted : m_DistScale ) * double( distortion ) + double( fracBits ); #else @@ -77,16 +82,46 @@ void RdCost::setLambda( double dLambda, const BitDepths &bitDepths ) { m_dLambda = dLambda; m_DistScale = double(1<<SCALE_BITS) / m_dLambda; - m_dLambdaMotionSAD[0] = sqrt(m_dLambda); - dLambda = 0.57 - * pow(2.0, ((LOSSLESS_AND_MIXED_LOSSLESS_RD_COST_TEST_QP_PRIME - 12 - + 6 - * ((bitDepths.recon[CHANNEL_TYPE_LUMA] - 8) - - DISTORTION_PRECISION_ADJUSTMENT(bitDepths.recon[CHANNEL_TYPE_LUMA]))) - / 3.0)); - m_dLambdaMotionSAD[1] = sqrt(dLambda); + m_dLambdaMotionSAD = sqrt(m_dLambda); } +void RdCost::lambdaAdjustColorTrans(bool forward, ComponentID componentID) +{ + if (m_resetStore) + { + for (uint8_t component = 0; component < MAX_NUM_COMPONENT; component++) + { + ComponentID compID = (ComponentID)component; + int delta_QP = (compID == COMPONENT_Cr ? DELTA_QP_FOR_Co : DELTA_QP_FOR_Y_Cg); + double lamdbaAdjustRate = pow(2.0, delta_QP / 3.0); + + m_lambdaStore[0][component] = m_dLambda; + m_DistScaleStore[0][component] = m_DistScale; + + m_lambdaStore[1][component] = m_dLambda * lamdbaAdjustRate; + m_DistScaleStore[1][component] = double(1 << SCALE_BITS) / m_lambdaStore[1][component]; + } + m_resetStore = false; + } + + if (forward) + { + CHECK(m_pairCheck == 1, "lambda has been already adjusted"); + m_pairCheck = 1; + } + else + { + CHECK(m_pairCheck == 0, "lambda has not been adjusted"); + m_pairCheck = 0; + } + + m_dLambda = m_lambdaStore[m_pairCheck][componentID]; + m_DistScale = m_DistScaleStore[m_pairCheck][componentID]; + if (m_pairCheck == 0) + { + CHECK(m_DistScale != m_DistScaleUnadjusted, "lambda should be adjusted to the original value"); + } +} // Initialize Function Pointer by [eDFunc] void RdCost::init() @@ -176,6 +211,8 @@ void RdCost::init() m_motionLambda = 0; m_iCostScale = 0; + m_resetStore = true; + m_pairCheck = 0; } @@ -190,7 +227,7 @@ void RdCost::copyState( const RdCost& other ) m_mvPredictor = other.m_mvPredictor; m_motionLambda = other.m_motionLambda; m_iCostScale = other.m_iCostScale; - memcpy( m_dLambdaMotionSAD, other.m_dLambdaMotionSAD, sizeof( m_dLambdaMotionSAD ) ); + m_dLambdaMotionSAD = other.m_dLambdaMotionSAD; #if WCG_EXT m_dLambda_unadjusted = other.m_dLambda_unadjusted ; m_DistScaleUnadjusted = other.m_DistScaleUnadjusted; @@ -232,7 +269,7 @@ void RdCost::setDistParam( DistParam &rcDP, const CPelBuf &org, const Pel* piRef } else if( isPowerOf2( org.width ) ) { - rcDP.distFunc = m_afpDistortFunc[ DF_SAD + DFOffset + g_aucLog2[ org.width ] ]; + rcDP.distFunc = m_afpDistortFunc[ DF_SAD + DFOffset + floorLog2( org.width ) ]; } else { @@ -241,7 +278,7 @@ void RdCost::setDistParam( DistParam &rcDP, const CPelBuf &org, const Pel* piRef } else if( isPowerOf2( org.width ) ) { - rcDP.distFunc = m_afpDistortFunc[ DF_HAD + DFOffset + g_aucLog2[ org.width ] ]; + rcDP.distFunc = m_afpDistortFunc[ DF_HAD + DFOffset + floorLog2( org.width ) ]; } else { @@ -306,7 +343,7 @@ void RdCost::setDistParam( DistParam &rcDP, const CPelBuf &org, const CPelBuf &c } else if( isPowerOf2( org.width) ) { - rcDP.distFunc = m_afpDistortFunc[ DF_SAD + DFOffset + g_aucLog2[ org.width ] ]; + rcDP.distFunc = m_afpDistortFunc[ DF_SAD + DFOffset + floorLog2( org.width ) ]; } else { @@ -315,7 +352,7 @@ void RdCost::setDistParam( DistParam &rcDP, const CPelBuf &org, const CPelBuf &c } else { - rcDP.distFunc = m_afpDistortFunc[ DF_HAD + DFOffset + g_aucLog2[ org.width ] ]; + rcDP.distFunc = m_afpDistortFunc[ DF_HAD + DFOffset + floorLog2( org.width ) ]; } rcDP.maximumDistortionForEarlyExit = std::numeric_limits<Distortion>::max(); @@ -359,7 +396,7 @@ void RdCost::setDistParam( DistParam &rcDP, const Pel* pOrg, const Pel* piRefY, } else { - rcDP.distFunc = m_afpDistortFunc[ DF_SAD + g_aucLog2[ width ] ]; + rcDP.distFunc = m_afpDistortFunc[ DF_SAD + floorLog2( width ) ]; } } @@ -380,6 +417,8 @@ Distortion RdCost::getDistPart( const CPelBuf &org, const CPelBuf &cur, int bitD #if WCG_EXT if( orgLuma ) { + cDtParam.cShiftX = getComponentScaleX(compID, m_cf); + cDtParam.cShiftY = getComponentScaleY(compID, m_cf); if( isChroma(compID) ) { cDtParam.orgLuma = *orgLuma; @@ -393,7 +432,7 @@ Distortion RdCost::getDistPart( const CPelBuf &org, const CPelBuf &cur, int bitD if( isPowerOf2( org.width ) ) { - cDtParam.distFunc = m_afpDistortFunc[eDFunc + g_aucLog2[org.width]]; + cDtParam.distFunc = m_afpDistortFunc[eDFunc + floorLog2(org.width)]; } else { @@ -2936,7 +2975,7 @@ void RdCost::restoreReshapeLumaLevelToWeightTable() void RdCost::updateReshapeLumaLevelToWeightTable(SliceReshapeInfo &sliceReshape, Pel *wtTable, double cwt) { - if (m_signalType == RESHAPE_SIGNAL_SDR) + if (m_signalType == RESHAPE_SIGNAL_SDR || m_signalType == RESHAPE_SIGNAL_HLG) { if (sliceReshape.getSliceReshapeModelPresentFlag()) { @@ -2989,7 +3028,7 @@ Distortion RdCost::getWeightedMSE(int compIdx, const Pel org, const Pel cur, con } // use luma to get weight double weight = 1.0; - if (m_signalType == RESHAPE_SIGNAL_SDR) + if (m_signalType == RESHAPE_SIGNAL_SDR || m_signalType == RESHAPE_SIGNAL_HLG) { if (compIdx == COMPONENT_Y) { @@ -3024,7 +3063,8 @@ Distortion RdCost::xGetSSE_WTD( const DistParam &rcDtParam ) const int iStrideOrg = rcDtParam.org.stride; const Pel* piOrgLuma = rcDtParam.orgLuma.buf; const int iStrideOrgLuma = rcDtParam.orgLuma.stride; - const int cShift = (rcDtParam.compID==COMPONENT_Y) ? 0 : 1; // assume 420, could use getComponentScaleX, getComponentScaleY + const size_t cShift = rcDtParam.cShiftX; + const size_t cShiftY = rcDtParam.cShiftY; Distortion uiSum = 0; uint32_t uiShift = DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth) << 1; @@ -3036,7 +3076,8 @@ Distortion RdCost::xGetSSE_WTD( const DistParam &rcDtParam ) } piOrg += iStrideOrg; piCur += iStrideCur; - piOrgLuma += iStrideOrgLuma<<cShift; + + piOrgLuma += iStrideOrgLuma<<cShiftY; } return ( uiSum ); } @@ -3056,7 +3097,9 @@ Distortion RdCost::xGetSSE2_WTD( const DistParam &rcDtParam ) const int iStrideOrg = rcDtParam.org.stride; const Pel* piOrgLuma = rcDtParam.orgLuma.buf; const size_t iStrideOrgLuma = rcDtParam.orgLuma.stride; - const size_t cShift = (rcDtParam.compID==COMPONENT_Y) ? 0 : 1; // assume 420, could use getComponentScaleX, getComponentScaleY + const size_t cShift = rcDtParam.cShiftX; + const size_t cShiftY = rcDtParam.cShiftY; + Distortion uiSum = 0; uint32_t uiShift = DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth) << 1; for( ; iRows != 0; iRows-- ) @@ -3065,7 +3108,7 @@ Distortion RdCost::xGetSSE2_WTD( const DistParam &rcDtParam ) uiSum += getWeightedMSE(rcDtParam.compID, piOrg[1 ], piCur[1 ], uiShift, piOrgLuma[size_t(1)<<cShift]); // piOrg[1] - piCur[1]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); piOrg += iStrideOrg; piCur += iStrideCur; - piOrgLuma += iStrideOrgLuma<<cShift; + piOrgLuma += iStrideOrgLuma<<cShiftY; } return ( uiSum ); } @@ -3085,7 +3128,9 @@ Distortion RdCost::xGetSSE4_WTD( const DistParam &rcDtParam ) const int iStrideOrg = rcDtParam.org.stride; const Pel* piOrgLuma = rcDtParam.orgLuma.buf; const size_t iStrideOrgLuma = rcDtParam.orgLuma.stride; - const size_t cShift = (rcDtParam.compID==COMPONENT_Y) ? 0 : 1; // assume 420, could use getComponentScaleX, getComponentScaleY + const size_t cShift = rcDtParam.cShiftX; + const size_t cShiftY = rcDtParam.cShiftY; + Distortion uiSum = 0; uint32_t uiShift = DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth) << 1; for( ; iRows != 0; iRows-- ) @@ -3096,7 +3141,7 @@ Distortion RdCost::xGetSSE4_WTD( const DistParam &rcDtParam ) uiSum += getWeightedMSE(rcDtParam.compID, piOrg[3 ], piCur[3 ], uiShift, piOrgLuma[size_t(3)<<cShift] ); // piOrg[3] - piCur[3]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); piOrg += iStrideOrg; piCur += iStrideCur; - piOrgLuma += iStrideOrgLuma<<cShift; + piOrgLuma += iStrideOrgLuma<<cShiftY; } return ( uiSum ); } @@ -3116,7 +3161,8 @@ Distortion RdCost::xGetSSE8_WTD( const DistParam &rcDtParam ) const int iStrideOrg = rcDtParam.org.stride; const Pel* piOrgLuma = rcDtParam.orgLuma.buf; const size_t iStrideOrgLuma = rcDtParam.orgLuma.stride; - const size_t cShift = (rcDtParam.compID==COMPONENT_Y) ? 0 : 1; // assume 420, could use getComponentScaleX, getComponentScaleY + const size_t cShift = rcDtParam.cShiftX; + const size_t cShiftY = rcDtParam.cShiftY; Distortion uiSum = 0; uint32_t uiShift = DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth) << 1; @@ -3132,7 +3178,7 @@ Distortion RdCost::xGetSSE8_WTD( const DistParam &rcDtParam ) uiSum += getWeightedMSE(rcDtParam.compID, piOrg[7 ], piCur[7 ], uiShift, piOrgLuma[size_t(7)<<cShift ]); // piOrg[7] - piCur[7]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); piOrg += iStrideOrg; piCur += iStrideCur; - piOrgLuma += iStrideOrgLuma<<cShift; + piOrgLuma += iStrideOrgLuma<<cShiftY; } return ( uiSum ); } @@ -3151,8 +3197,8 @@ Distortion RdCost::xGetSSE16_WTD( const DistParam &rcDtParam ) const int iStrideOrg = rcDtParam.org.stride; const Pel* piOrgLuma = rcDtParam.orgLuma.buf; const size_t iStrideOrgLuma = rcDtParam.orgLuma.stride; - const size_t cShift = (rcDtParam.compID==COMPONENT_Y) ? 0 : 1; // assume 420, could use getComponentScaleX, getComponentScaleY - + const size_t cShift = rcDtParam.cShiftX; + const size_t cShiftY = rcDtParam.cShiftY; Distortion uiSum = 0; uint32_t uiShift = DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth) << 1; for( ; iRows != 0; iRows-- ) @@ -3175,7 +3221,8 @@ Distortion RdCost::xGetSSE16_WTD( const DistParam &rcDtParam ) uiSum += getWeightedMSE(rcDtParam.compID, piOrg[15 ], piCur[15 ], uiShift, piOrgLuma[size_t(15)<<cShift ]); //piOrg[15] - piCur[15]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); piOrg += iStrideOrg; piCur += iStrideCur; - piOrgLuma += iStrideOrgLuma<<cShift; + + piOrgLuma += iStrideOrgLuma<<cShiftY; } return ( uiSum ); } @@ -3194,7 +3241,8 @@ Distortion RdCost::xGetSSE16N_WTD( const DistParam &rcDtParam ) const int iStrideOrg = rcDtParam.org.stride; const Pel* piOrgLuma = rcDtParam.orgLuma.buf; const size_t iStrideOrgLuma = rcDtParam.orgLuma.stride; - const size_t cShift = (rcDtParam.compID==COMPONENT_Y) ? 0 : 1; // assume 420, could use getComponentScaleX, getComponentScaleY + const size_t cShift = rcDtParam.cShiftX; + const size_t cShiftY = rcDtParam.cShiftY; Distortion uiSum = 0; uint32_t uiShift = DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth) << 1; for( ; iRows != 0; iRows-- ) @@ -3220,7 +3268,7 @@ Distortion RdCost::xGetSSE16N_WTD( const DistParam &rcDtParam ) } piOrg += iStrideOrg; piCur += iStrideCur; - piOrgLuma += iStrideOrgLuma<<cShift; + piOrgLuma += iStrideOrgLuma<<cShiftY; } return ( uiSum ); } @@ -3239,7 +3287,8 @@ Distortion RdCost::xGetSSE32_WTD( const DistParam &rcDtParam ) const int iStrideOrg = rcDtParam.org.stride; const Pel* piOrgLuma = rcDtParam.orgLuma.buf; const size_t iStrideOrgLuma = rcDtParam.orgLuma.stride; - const size_t cShift = (rcDtParam.compID==COMPONENT_Y) ? 0 : 1; // assume 420, could use getComponentScaleX, getComponentScaleY + const size_t cShift = rcDtParam.cShiftX; + const size_t cShiftY = rcDtParam.cShiftY; Distortion uiSum = 0; uint32_t uiShift = DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth) << 1; @@ -3279,7 +3328,7 @@ Distortion RdCost::xGetSSE32_WTD( const DistParam &rcDtParam ) uiSum += getWeightedMSE(rcDtParam.compID, piOrg[31], piCur[31], uiShift, piOrgLuma[size_t(31)<<cShift ]); // iTemp = piOrg[31] - piCur[31]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); piOrg += iStrideOrg; piCur += iStrideCur; - piOrgLuma += iStrideOrgLuma<<cShift; + piOrgLuma += iStrideOrgLuma<<cShiftY; } return ( uiSum ); } @@ -3298,7 +3347,8 @@ Distortion RdCost::xGetSSE64_WTD( const DistParam &rcDtParam ) const int iStrideOrg = rcDtParam.org.stride; const Pel* piOrgLuma = rcDtParam.orgLuma.buf; const size_t iStrideOrgLuma = rcDtParam.orgLuma.stride; - const size_t cShift = (rcDtParam.compID==COMPONENT_Y) ? 0 : 1; // assume 420, could use getComponentScaleX, getComponentScaleY + const size_t cShift = rcDtParam.cShiftX; + const size_t cShiftY = rcDtParam.cShiftY; Distortion uiSum = 0; uint32_t uiShift = DISTORTION_PRECISION_ADJUSTMENT((rcDtParam.bitDepth)) << 1; @@ -3370,7 +3420,8 @@ Distortion RdCost::xGetSSE64_WTD( const DistParam &rcDtParam ) uiSum += getWeightedMSE(rcDtParam.compID, piOrg[63], piCur[63], uiShift, piOrgLuma[size_t(63)<<cShift]); // iTemp = piOrg[63] - piCur[63]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); piOrg += iStrideOrg; piCur += iStrideCur; - piOrgLuma += iStrideOrgLuma<<cShift; + + piOrgLuma += iStrideOrgLuma<<cShiftY; } return ( uiSum ); } diff --git a/source/Lib/CommonLib/RdCost.h b/source/Lib/CommonLib/RdCost.h index 64d2e64595e9ec84e66bf711c55c558a121a5498..a7aef6fa7700fbe9cac8236d0317687c167ff53a 100644 --- a/source/Lib/CommonLib/RdCost.h +++ b/source/Lib/CommonLib/RdCost.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -87,10 +87,11 @@ public: // (vertical) subsampling shift (for reducing complexity) // - 0 = no subsampling, 1 = even rows, 2 = every 4th, etc. int subShift; - + int cShiftX; + int cShiftY; DistParam() : org(), cur(), step( 1 ), bitDepth( 0 ), useMR( false ), applyWeight( false ), isBiPred( false ), wpCur( nullptr ), compID( MAX_NUM_COMPONENT ), maximumDistortionForEarlyExit( std::numeric_limits<Distortion>::max() ), subShift( 0 ) - + , cShiftX(-1), cShiftY(-1) { } }; @@ -112,9 +113,14 @@ private: static uint32_t m_signalType; static double m_chromaWeight; static int m_lumaBD; + ChromaFormat m_cf; #endif double m_DistScale; - double m_dLambdaMotionSAD[2 /* 0=standard, 1=for transquant bypass when mixed-lossless cost evaluation enabled*/]; + double m_dLambdaMotionSAD; + double m_lambdaStore[2][3]; // 0-org; 1-act + double m_DistScaleStore[2][3]; // 0-org; 1-act + bool m_resetStore; + int m_pairCheck; // for motion cost Mv m_mvPredictor; @@ -128,6 +134,7 @@ public: virtual ~RdCost(); #if WCG_EXT + void setChromaFormat ( const ChromaFormat & _cf) { m_cf = _cf; } double calcRdCost ( uint64_t fracBits, Distortion distortion, bool useUnadjustedLambda = true ); #else double calcRdCost ( uint64_t fracBits, Distortion distortion ); @@ -143,6 +150,9 @@ public: double getLambda() { return m_dLambda; } #endif double getChromaWeight() { return ((m_distortionWeight[COMPONENT_Cb] + m_distortionWeight[COMPONENT_Cr]) / 2.0); } +#if RDOQ_CHROMA_LAMBDA + double getDistortionWeight ( const ComponentID compID ) const { return m_distortionWeight[compID % MAX_NUM_COMPONENT]; } +#endif void setCostMode(CostMode m) { m_costMode = m; } @@ -158,8 +168,8 @@ public: void setDistParam( DistParam &rcDP, const CPelBuf &org, const CPelBuf &cur, int bitDepth, ComponentID compID, bool useHadamard = false ); void setDistParam( DistParam &rcDP, const Pel* pOrg, const Pel* piRefY, int iOrgStride, int iRefStride, int bitDepth, ComponentID compID, int width, int height, int subShiftMode = 0, int step = 1, bool useHadamard = false, bool bioApplied = false ); - double getMotionLambda ( bool bIsTransquantBypass ) { return m_dLambdaMotionSAD[(bIsTransquantBypass && m_costMode==COST_MIXED_LOSSLESS_LOSSY_CODING)?1:0]; } - void selectMotionLambda ( bool bIsTransquantBypass ) { m_motionLambda = getMotionLambda( bIsTransquantBypass ); } + double getMotionLambda ( ) { return m_dLambdaMotionSAD; } + void selectMotionLambda ( ) { m_motionLambda = getMotionLambda( ); } void setPredictor ( const Mv& rcMv ) { m_mvPredictor = rcMv; @@ -167,7 +177,7 @@ public: void setCostScale ( int iCostScale ) { m_iCostScale = iCostScale; } Distortion getCost ( uint32_t b ) { return Distortion( m_motionLambda * b ); } // for ibc - void getMotionCost(int add, bool isTransquantBypass) { m_dCost = m_dLambdaMotionSAD[(isTransquantBypass && m_costMode == COST_MIXED_LOSSLESS_LOSSY_CODING) ? 1 : 0] + add; } + void getMotionCost(int add) { m_dCost = m_dLambdaMotionSAD + add; } void setPredictors(Mv* pcMv) { @@ -179,7 +189,7 @@ public: inline Distortion getBvCostMultiplePreds(int x, int y, bool useIMV) { - return Distortion((m_dCost * getBitsMultiplePreds(x, y, useIMV)) / 65536.0); + return Distortion(m_dCost * getBitsMultiplePreds(x, y, useIMV)); } unsigned int getBitsMultiplePreds(int x, int y, bool useIMV) @@ -286,7 +296,7 @@ public: uiTemp2 >>= MAX_CU_DEPTH; } - return uiLength2 + ( g_aucPrevLog2[uiTemp2] << 1 ); + return uiLength2 + ( floorLog2(uiTemp2) << 1 ); } Distortion getCostOfVectorWithPredictor( const int x, const int y, const unsigned imvShift ) { return Distortion( m_motionLambda * getBitsOfVectorWithPredictor(x, y, imvShift )); } uint32_t getBitsOfVectorWithPredictor( const int x, const int y, const unsigned imvShift ) { return xGetExpGolombNumberOfBits(((x << m_iCostScale) - m_mvPredictor.getHor())>>imvShift) + xGetExpGolombNumberOfBits(((y << m_iCostScale) - m_mvPredictor.getVer())>>imvShift); } @@ -303,6 +313,9 @@ public: inline std::vector<double>& getLumaLevelWeightTable () { return m_lumaLevelToWeightPLUT; } #endif + void lambdaAdjustColorTrans(bool forward, ComponentID compID); + void resetStore() { m_resetStore = true; } + private: static Distortion xGetSSE ( const DistParam& pcDtParam ); @@ -362,19 +375,19 @@ private: static Distortion xCalcHADs8x4 ( const Pel *piOrg, const Pel *piCur, int iStrideOrg, int iStrideCur ); #ifdef TARGET_SIMD_X86 - template< typename Torg, typename Tcur, X86_VEXT vext > + template<X86_VEXT vext> static Distortion xGetSSE_SIMD ( const DistParam& pcDtParam ); - template< typename Torg, typename Tcur, int iWidth, X86_VEXT vext > + template<int iWidth, X86_VEXT vext> static Distortion xGetSSE_NxN_SIMD( const DistParam& pcDtParam ); - template< X86_VEXT vext > + template<X86_VEXT vext> static Distortion xGetSAD_SIMD ( const DistParam& pcDtParam ); - template< int iWidth, X86_VEXT vext > + template<int iWidth, X86_VEXT vext> static Distortion xGetSAD_NxN_SIMD( const DistParam& pcDtParam ); - template< X86_VEXT vext > - static Distortion xGetSAD_IBD_SIMD(const DistParam& pcDtParam); + template<X86_VEXT vext> + static Distortion xGetSAD_IBD_SIMD( const DistParam& pcDtParam ); - template< typename Torg, typename Tcur, X86_VEXT vext > + template<X86_VEXT vext> static Distortion xGetHADs_SIMD ( const DistParam& pcDtParam ); #endif diff --git a/source/Lib/CommonLib/RdCostWeightPrediction.cpp b/source/Lib/CommonLib/RdCostWeightPrediction.cpp index 3a4c4f2652ab1750026322f9564119e9471977a7..f88f665c0cf3074255305c6931c0032d8c4e2d95 100644 --- a/source/Lib/CommonLib/RdCostWeightPrediction.cpp +++ b/source/Lib/CommonLib/RdCostWeightPrediction.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/source/Lib/CommonLib/RdCostWeightPrediction.h b/source/Lib/CommonLib/RdCostWeightPrediction.h index 9687c3cef1b0d74fdf504410f5dd171db58f20fb..cf7d55e6f2fb26f2df631611d1e0b3417be50268 100644 --- a/source/Lib/CommonLib/RdCostWeightPrediction.h +++ b/source/Lib/CommonLib/RdCostWeightPrediction.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/source/Lib/CommonLib/Reshape.cpp b/source/Lib/CommonLib/Reshape.cpp index 85f06103e04aba3cdd177062214aed9fc4e2b613..4d6ac24644cca6d38c6a471c10d8c092ebb6a3fb 100644 --- a/source/Lib/CommonLib/Reshape.cpp +++ b/source/Lib/CommonLib/Reshape.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * -* Copyright (c) 2010-2019, ITU/ISO/IEC +* Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -38,6 +38,7 @@ #include <stdio.h> #include <string.h> #include <math.h> +#include <UnitTools.h> //! \ingroup CommonLib //! \{ @@ -46,10 +47,13 @@ // ==================================================================================================================== Reshape::Reshape() +: m_CTUFlag (false) +, m_recReshaped (false) +, m_reshape (true) +, m_chromaScale (1 << CSCALE_FP_PREC) +, m_vpduX (-1) +, m_vpduY (-1) { - m_CTUFlag = false; - m_recReshaped = false; - m_reshape = true; } Reshape::~Reshape() @@ -67,6 +71,12 @@ void Reshape::createDec(int bitDepth) m_invLUT.resize(m_reshapeLUTSize, 0); if (m_binCW.empty()) m_binCW.resize(PIC_CODE_CW_BINS, 0); + if (m_inputPivot.empty()) + m_inputPivot.resize(PIC_CODE_CW_BINS + 1, 0); + if (m_fwdScaleCoef.empty()) + m_fwdScaleCoef.resize(PIC_CODE_CW_BINS, 1 << FP_PREC); + if (m_invScaleCoef.empty()) + m_invScaleCoef.resize(PIC_CODE_CW_BINS, 1 << FP_PREC); if (m_reshapePivot.empty()) m_reshapePivot.resize(PIC_CODE_CW_BINS + 1, 0); if (m_chromaAdjHelpLUT.empty()) @@ -77,46 +87,6 @@ void Reshape::destroy() { } -/** --Perform inverse of a one dimension LUT -\param InputLUT describing the input LUT -\retval OutputLUT describing the inversed LUT of InputLUT -\param lut_size size of LUT in number of samples -*/ -void Reshape::reverseLUT(std::vector<Pel>& inputLUT, std::vector<Pel>& outputLUT, uint16_t lutSize) -{ - int i, j; - outputLUT[m_reshapePivot[m_sliceReshapeInfo.reshaperModelMinBinIdx]] = m_sliceReshapeInfo.reshaperModelMinBinIdx*m_initCW; - for (i = m_sliceReshapeInfo.reshaperModelMinBinIdx; i <= m_sliceReshapeInfo.reshaperModelMaxBinIdx; i++) - { - int16_t X1 = m_reshapePivot[i]; - int16_t X2 = m_reshapePivot[i + 1]; - outputLUT[X2] = (i + 1)*m_initCW; - int16_t Y1 = outputLUT[X1]; - int16_t Y2 = outputLUT[X2]; - - if (X2 !=X1) - { - int32_t scale = (int32_t)(Y2 - Y1) * (1 << FP_PREC) / (int32_t)(X2 - X1); - for (j = X1 + 1; j < X2; j++) - { - outputLUT[j] = (Pel)((scale*(int32_t)(j - X1) + (1 << (FP_PREC - 1))) >> FP_PREC) + Y1; - } - } - } - - for (i = 0; i < m_reshapePivot[m_sliceReshapeInfo.reshaperModelMinBinIdx]; i++) - outputLUT[i] = outputLUT[m_reshapePivot[m_sliceReshapeInfo.reshaperModelMinBinIdx]]; - for (i = m_reshapePivot[m_sliceReshapeInfo.reshaperModelMaxBinIdx + 1]; i < m_reshapeLUTSize; i++) - outputLUT[i] = outputLUT[m_reshapePivot[m_sliceReshapeInfo.reshaperModelMaxBinIdx + 1]]; - - bool clipRange = ((m_sliceReshapeInfo.reshaperModelMinBinIdx > 0) && (m_sliceReshapeInfo.reshaperModelMaxBinIdx < (PIC_CODE_CW_BINS - 1))); - for (i = 0; i < lutSize; i++) - { - if (clipRange) outputLUT[i] = Clip3((Pel)(16<<(m_lumaBD-8)), (Pel)(235<<(m_lumaBD-8)), outputLUT[i]); - else outputLUT[i] = Clip3((Pel)0, (Pel)((1<<m_lumaBD)-1), outputLUT[i]); - } -} /** compute chroma residuce scale for TU @@ -130,7 +100,103 @@ int Reshape::calculateChromaAdj(Pel avgLuma) return(iAdj); } +/** compute chroma residuce scale for TU +* \param average luma pred of TU +* \return chroma residue scale +*/ +int Reshape::calculateChromaAdjVpduNei(TransformUnit &tu, const CompArea &areaY) +{ + CodingStructure &cs = *tu.cs; + int xPos = areaY.lumaPos().x; + int yPos = areaY.lumaPos().y; + int ctuSize = cs.sps->getCTUSize(); + int numNeighbor = std::min(64, ctuSize); + int numNeighborLog = floorLog2(numNeighbor); + if (ctuSize == 128) + { + xPos = xPos / 64 * 64; + yPos = yPos / 64 * 64; + } + else + { + xPos = xPos / ctuSize * ctuSize; + yPos = yPos / ctuSize * ctuSize; + } + + if (isVPDUprocessed(xPos, yPos) && !cs.pcv->isEncoder) + { + return getChromaScale(); + } + else + { + setVPDULoc(xPos, yPos); + Position topLeft(xPos, yPos); + CodingUnit *topLeftLuma; + const CodingUnit *cuAbove, *cuLeft; + if (CS::isDualITree(cs) && cs.slice->getSliceType() == I_SLICE) + { + topLeftLuma = tu.cs->picture->cs->getCU(topLeft, CHANNEL_TYPE_LUMA); + cuAbove = cs.picture->cs->getCURestricted(topLeftLuma->lumaPos().offset(0, -1), *topLeftLuma, CHANNEL_TYPE_LUMA); + cuLeft = cs.picture->cs->getCURestricted(topLeftLuma->lumaPos().offset(-1, 0), *topLeftLuma, CHANNEL_TYPE_LUMA); + } + else + { + topLeftLuma = cs.getCU(topLeft, CHANNEL_TYPE_LUMA); + cuAbove = cs.getCURestricted(topLeftLuma->lumaPos().offset(0, -1), *topLeftLuma, CHANNEL_TYPE_LUMA); + cuLeft = cs.getCURestricted(topLeftLuma->lumaPos().offset(-1, 0), *topLeftLuma, CHANNEL_TYPE_LUMA); + } + + xPos = topLeftLuma->lumaPos().x; + yPos = topLeftLuma->lumaPos().y; + + CompArea lumaArea = CompArea(COMPONENT_Y, tu.chromaFormat, topLeftLuma->lumaPos(), topLeftLuma->lumaSize(), true); + PelBuf piRecoY = cs.picture->getRecoBuf(lumaArea); + int strideY = piRecoY.stride; + int chromaScale = (1 << CSCALE_FP_PREC); + int lumaValue = -1; + Pel* recSrc0 = piRecoY.bufAt(0, 0); + const uint32_t picH = tu.cs->picture->lheight(); + const uint32_t picW = tu.cs->picture->lwidth(); + const Pel valueDC = 1 << (tu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA) - 1); + int32_t recLuma = 0; + int pelnum = 0; + if (cuLeft != nullptr) + { + for (int i = 0; i < numNeighbor; i++) + { + int k = (yPos + i) >= picH ? (picH - yPos - 1) : i; + recLuma += recSrc0[-1 + k * strideY]; + pelnum++; + } + } + if (cuAbove != nullptr) + { + for (int i = 0; i < numNeighbor; i++) + { + int k = (xPos + i) >= picW ? (picW - xPos - 1) : i; + recLuma += recSrc0[-strideY + k]; + pelnum++; + } + } + if (pelnum == numNeighbor) + { + lumaValue = ClipPel((recLuma + (1 << (numNeighborLog - 1))) >> numNeighborLog, tu.cs->slice->clpRng(COMPONENT_Y)); + } + else if (pelnum == (numNeighbor << 1)) + { + lumaValue = ClipPel((recLuma + (1 << numNeighborLog)) >> (numNeighborLog + 1), tu.cs->slice->clpRng(COMPONENT_Y)); + } + else + { + CHECK(pelnum != 0, ""); + lumaValue = ClipPel(valueDC, tu.cs->slice->clpRng(COMPONENT_Y)); + } + chromaScale = calculateChromaAdj(lumaValue); + setChromaScale(chromaScale); + return(chromaScale); + } +} /** find inx of PWL for inverse mapping * \param average luma pred of TU * \return idx of PWL for inverse mapping @@ -138,18 +204,11 @@ int Reshape::calculateChromaAdj(Pel avgLuma) int Reshape::getPWLIdxInv(int lumaVal) { int idxS = 0; - if (lumaVal < m_reshapePivot[m_sliceReshapeInfo.reshaperModelMinBinIdx + 1]) - return m_sliceReshapeInfo.reshaperModelMinBinIdx; - else if (lumaVal >= m_reshapePivot[m_sliceReshapeInfo.reshaperModelMaxBinIdx]) - return m_sliceReshapeInfo.reshaperModelMaxBinIdx; - else + for (idxS = m_sliceReshapeInfo.reshaperModelMinBinIdx; (idxS <= m_sliceReshapeInfo.reshaperModelMaxBinIdx); idxS++) { - for (idxS = m_sliceReshapeInfo.reshaperModelMinBinIdx; (idxS < m_sliceReshapeInfo.reshaperModelMaxBinIdx); idxS++) - { - if (lumaVal < m_reshapePivot[idxS + 1]) break; - } - return idxS; + if (lumaVal < m_reshapePivot[idxS + 1]) break; } + return std::min(idxS, PIC_CODE_CW_BINS-1); } /** @@ -166,6 +225,7 @@ void Reshape::copySliceReshaperInfo(SliceReshapeInfo& tInfo, SliceReshapeInfo& s tInfo.reshaperModelMinBinIdx = sInfo.reshaperModelMinBinIdx; memcpy(tInfo.reshaperModelBinCWDelta, sInfo.reshaperModelBinCWDelta, sizeof(int)*(PIC_CODE_CW_BINS)); tInfo.maxNbitsNeededDeltaCW = sInfo.maxNbitsNeededDeltaCW; + tInfo.chrResScalingOffset = sInfo.chrResScalingOffset; } tInfo.sliceReshaperEnableFlag = sInfo.sliceReshaperEnableFlag; if (sInfo.sliceReshaperEnableFlag) @@ -193,41 +253,32 @@ void Reshape::constructReshaper() for (int i = 0; i < pwlFwdLUTsize; i++) { m_reshapePivot[i + 1] = m_reshapePivot[i] + m_binCW[i]; - int16_t Y1 = m_reshapePivot[i]; - int16_t Y2 = m_reshapePivot[i + 1]; - - m_fwdLUT[i*pwlFwdBinLen] = Clip3((Pel)0, (Pel)((1 << m_lumaBD) - 1), (Pel)Y1); - - int log2PwlFwdBinLen = floorLog2(pwlFwdBinLen); - - int32_t scale = ((int32_t)(Y2 - Y1) * (1 << FP_PREC) + (1 << (log2PwlFwdBinLen - 1))) >> (log2PwlFwdBinLen); - for (int j = 1; j < pwlFwdBinLen; j++) + m_inputPivot[i + 1] = m_inputPivot[i] + m_initCW; + m_fwdScaleCoef[i] = ((int32_t)m_binCW[i] * (1 << FP_PREC) + (1 << (floorLog2(pwlFwdBinLen) - 1))) >> floorLog2(pwlFwdBinLen); + if (m_binCW[i] == 0) { - int tempVal = Y1 + (((int32_t)scale * (int32_t)j + (1 << (FP_PREC - 1))) >> FP_PREC); - m_fwdLUT[i*pwlFwdBinLen + j] = Clip3((Pel)0, (Pel)((1 << m_lumaBD) - 1), (Pel)tempVal); + m_invScaleCoef[i] = 0; + m_chromaAdjHelpLUT[i] = 1 << CSCALE_FP_PREC; + } + else + { + m_invScaleCoef[i] = (int32_t)(m_initCW * (1 << FP_PREC) / m_binCW[i]); + m_chromaAdjHelpLUT[i] = (int32_t)(m_initCW * (1 << FP_PREC) / ( m_binCW[i] + m_sliceReshapeInfo.chrResScalingOffset ) ); } } - reverseLUT(m_fwdLUT, m_invLUT, m_reshapeLUTSize); - updateChromaScaleLUT(); -} - -/** generate chroma residue scaling LUT -* \param void -* \return void -*/ -void Reshape::updateChromaScaleLUT() -{ - const int16_t CW_bin_SC_LUT[2 * PIC_ANALYZE_CW_BINS] = { 16384, 16384, 16384, 16384, 16384, 16384, 16384, 8192, 8192, 8192, 8192, 5461, 5461, 5461, 5461, 4096, 4096, 4096, 4096, 3277, 3277, 3277, 3277, 2731, 2731, 2731, 2731, 2341, 2341, 2341, 2048, 2048, 2048, 1820, 1820, 1820, 1638, 1638, 1638, 1638, 1489, 1489, 1489, 1489, 1365, 1365, 1365, 1365, 1260, 1260, 1260, 1260, 1170, 1170, 1170, 1170, 1092, 1092, 1092, 1092, 1024, 1024, 1024, 1024 }; //p=11 - for (int i = 0; i < PIC_CODE_CW_BINS; i++) + for (int lumaSample = 0; lumaSample < m_reshapeLUTSize; lumaSample++) { - uint16_t binCW = m_lumaBD > 10 ? (m_binCW[i] >> (m_lumaBD - 10)) : m_lumaBD < 10 ? (m_binCW[i] << (10 -m_lumaBD)): m_binCW[i]; - if ((i < m_sliceReshapeInfo.reshaperModelMinBinIdx) || (i > m_sliceReshapeInfo.reshaperModelMaxBinIdx)) - m_chromaAdjHelpLUT[i] = 1 << CSCALE_FP_PREC; - else - m_chromaAdjHelpLUT[i] = CW_bin_SC_LUT[Clip3((uint16_t)1, (uint16_t)64, (uint16_t)(binCW >> 1)) - 1]; + int idxY = lumaSample / m_initCW; + int tempVal = m_reshapePivot[idxY] + ((m_fwdScaleCoef[idxY] * (lumaSample - m_inputPivot[idxY]) + (1 << (FP_PREC - 1))) >> FP_PREC); + m_fwdLUT[lumaSample] = Clip3((Pel)0, (Pel)((1 << m_lumaBD) - 1), (Pel)(tempVal)); + + int idxYInv = getPWLIdxInv(lumaSample); + int invSample = m_inputPivot[idxYInv] + ((m_invScaleCoef[idxYInv] * (lumaSample - m_reshapePivot[idxYInv]) + (1 << (FP_PREC - 1))) >> FP_PREC); + m_invLUT[lumaSample] = Clip3((Pel)0, (Pel)((1 << m_lumaBD) - 1), (Pel)(invSample)); } } + // //! \} diff --git a/source/Lib/CommonLib/Reshape.h b/source/Lib/CommonLib/Reshape.h index 5b41c402df91324a1e594f2af792c2c9770321ac..6b6e9d58e17adcafa6551c32a41e15f896cfc0ac 100644 --- a/source/Lib/CommonLib/Reshape.h +++ b/source/Lib/CommonLib/Reshape.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * -* Copyright (c) 2010-2019, ITU/ISO/IEC +* Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -64,8 +64,14 @@ protected: uint16_t m_initCW; bool m_reshape; std::vector<Pel> m_reshapePivot; + std::vector<Pel> m_inputPivot; + std::vector<int32_t> m_fwdScaleCoef; + std::vector<int32_t> m_invScaleCoef; int m_lumaBD; int m_reshapeLUTSize; + int m_chromaScale; + int m_vpduX; + int m_vpduY; public: Reshape(); #if ENABLE_SPLIT_PARALLELISM @@ -77,7 +83,6 @@ public: void createDec(int bitDepth); void destroy(); - void reverseLUT(std::vector<Pel>& inputLUT, std::vector<Pel>& outputLUT, uint16_t lutSize); std::vector<Pel>& getFwdLUT() { return m_fwdLUT; } std::vector<Pel>& getInvLUT() { return m_invLUT; } std::vector<int>& getChromaAdjHelpLUT() { return m_chromaAdjHelpLUT; } @@ -93,9 +98,13 @@ public: void copySliceReshaperInfo(SliceReshapeInfo& tInfo, SliceReshapeInfo& sInfo); void constructReshaper(); - void updateChromaScaleLUT(); bool getReshapeFlag() { return m_reshape; } void setReshapeFlag(bool b) { m_reshape = b; } + int calculateChromaAdjVpduNei(TransformUnit &tu, const CompArea &areaY); + void setVPDULoc(int x, int y) { m_vpduX = x, m_vpduY = y; } + bool isVPDUprocessed(int x, int y) { return ((x == m_vpduX) && (y == m_vpduY)); } + void setChromaScale (int chromaScale) { m_chromaScale = chromaScale; } + int getChromaScale() { return m_chromaScale; } };// END CLASS DEFINITION Reshape //! \} diff --git a/source/Lib/CommonLib/Rom.cpp b/source/Lib/CommonLib/Rom.cpp index 4e2a4f03376b5894b135f7ae816f5695597635e4..5bc2f227a26e924598f0185c9169e8450619e685 100644 --- a/source/Lib/CommonLib/Rom.cpp +++ b/source/Lib/CommonLib/Rom.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -53,7 +53,6 @@ CDTrace *g_trace_ctx = NULL; #endif bool g_mctsDecCheckEnabled = false; - //! \ingroup CommonLib //! \{ @@ -63,58 +62,29 @@ const char* nalUnitTypeToString(NalUnitType type) { switch (type) { -#if JVET_M0101_HLS case NAL_UNIT_CODED_SLICE_TRAIL: return "TRAIL"; case NAL_UNIT_CODED_SLICE_STSA: return "STSA"; - case NAL_UNIT_CODED_SLICE_IDR_W_RADL: return "IDR_W_RADL"; - case NAL_UNIT_CODED_SLICE_IDR_N_LP: return "IDR_N_LP"; - case NAL_UNIT_CODED_SLICE_CRA: return "CRA"; case NAL_UNIT_CODED_SLICE_RADL: return "RADL"; case NAL_UNIT_CODED_SLICE_RASL: return "RASL"; -#if HEVC_VPS - case NAL_UNIT_VPS: return "VPS"; -#endif - case NAL_UNIT_SPS: return "SPS"; - case NAL_UNIT_PPS: return "PPS"; - case NAL_UNIT_APS: return "APS"; - case NAL_UNIT_ACCESS_UNIT_DELIMITER: return "AUD"; - case NAL_UNIT_EOS: return "EOS"; - case NAL_UNIT_EOB: return "EOB"; - case NAL_UNIT_FILLER_DATA: return "FILLER"; - case NAL_UNIT_PREFIX_SEI: return "Prefix SEI"; - case NAL_UNIT_SUFFIX_SEI: return "Suffix SEI"; - default: return "UNK"; -#else - case NAL_UNIT_CODED_SLICE_TRAIL_R: return "TRAIL_R"; - case NAL_UNIT_CODED_SLICE_TRAIL_N: return "TRAIL_N"; - case NAL_UNIT_CODED_SLICE_TSA_R: return "TSA_R"; - case NAL_UNIT_CODED_SLICE_TSA_N: return "TSA_N"; - case NAL_UNIT_CODED_SLICE_STSA_R: return "STSA_R"; - case NAL_UNIT_CODED_SLICE_STSA_N: return "STSA_N"; - case NAL_UNIT_CODED_SLICE_BLA_W_LP: return "BLA_W_LP"; - case NAL_UNIT_CODED_SLICE_BLA_W_RADL: return "BLA_W_RADL"; - case NAL_UNIT_CODED_SLICE_BLA_N_LP: return "BLA_N_LP"; case NAL_UNIT_CODED_SLICE_IDR_W_RADL: return "IDR_W_RADL"; case NAL_UNIT_CODED_SLICE_IDR_N_LP: return "IDR_N_LP"; case NAL_UNIT_CODED_SLICE_CRA: return "CRA"; - case NAL_UNIT_CODED_SLICE_RADL_R: return "RADL_R"; - case NAL_UNIT_CODED_SLICE_RADL_N: return "RADL_N"; - case NAL_UNIT_CODED_SLICE_RASL_R: return "RASL_R"; - case NAL_UNIT_CODED_SLICE_RASL_N: return "RASL_N"; -#if HEVC_VPS + case NAL_UNIT_CODED_SLICE_GDR: return "GDR"; + case NAL_UNIT_DPS: return "DPS"; case NAL_UNIT_VPS: return "VPS"; -#endif case NAL_UNIT_SPS: return "SPS"; case NAL_UNIT_PPS: return "PPS"; - case NAL_UNIT_APS: return "APS"; + case NAL_UNIT_PREFIX_APS: return "Prefix APS"; + case NAL_UNIT_SUFFIX_APS: return "Suffix APS"; + case NAL_UNIT_PH: return "PH"; case NAL_UNIT_ACCESS_UNIT_DELIMITER: return "AUD"; case NAL_UNIT_EOS: return "EOS"; case NAL_UNIT_EOB: return "EOB"; - case NAL_UNIT_FILLER_DATA: return "FILLER"; case NAL_UNIT_PREFIX_SEI: return "Prefix SEI"; case NAL_UNIT_SUFFIX_SEI: return "Suffix SEI"; + case NAL_UNIT_FD: return "FD"; default: return "UNK"; -#endif + } } @@ -163,37 +133,47 @@ public: } break; -#if HEVC_USE_MDCS - //------------------------------------------------ - case SCAN_HOR: - - if (m_column == m_blockWidth - 1) + case SCAN_TRAV_HOR: + if (m_line % 2 == 0) { - m_line++; - m_column = 0; + if (m_column == (m_blockWidth - 1)) + { + m_line++; + m_column = m_blockWidth - 1; + } + else m_column++; } else { - m_column++; + if (m_column == 0) + { + m_line++; + m_column = 0; + } + else m_column--; } break; - //------------------------------------------------ - - case SCAN_VER: - - if (m_line == m_blockHeight - 1) + case SCAN_TRAV_VER: + if (m_column % 2 == 0) { - m_column++; - m_line = 0; + if (m_line == (m_blockHeight - 1)) + { + m_column++; + m_line = m_blockHeight - 1; + } + else m_line++; } else { - m_line++; + if (m_line == 0) + { + m_column++; + m_line = 0; + } + else m_line--; } break; - -#endif //------------------------------------------------ default: @@ -205,50 +185,50 @@ public: return rtn; } }; -const int8_t g_GbiLog2WeightBase = 3; -const int8_t g_GbiWeightBase = (1 << g_GbiLog2WeightBase); -const int8_t g_GbiWeights[GBI_NUM] = { -2, 3, 4, 5, 10 }; -const int8_t g_GbiSearchOrder[GBI_NUM] = { GBI_DEFAULT, GBI_DEFAULT - 2, GBI_DEFAULT + 2, GBI_DEFAULT - 1, GBI_DEFAULT + 1 }; -int8_t g_GbiCodingOrder[GBI_NUM]; -int8_t g_GbiParsingOrder[GBI_NUM]; - -int8_t getGbiWeight(uint8_t gbiIdx, uint8_t uhRefFrmList) +const int8_t g_BcwLog2WeightBase = 3; +const int8_t g_BcwWeightBase = (1 << g_BcwLog2WeightBase); +const int8_t g_BcwWeights[BCW_NUM] = { -2, 3, 4, 5, 10 }; +const int8_t g_BcwSearchOrder[BCW_NUM] = { BCW_DEFAULT, BCW_DEFAULT - 2, BCW_DEFAULT + 2, BCW_DEFAULT - 1, BCW_DEFAULT + 1 }; +int8_t g_BcwCodingOrder[BCW_NUM]; +int8_t g_BcwParsingOrder[BCW_NUM]; + +int8_t getBcwWeight(uint8_t bcwIdx, uint8_t uhRefFrmList) { // Weghts for the model: P0 + w * (P1 - P0) = (1-w) * P0 + w * P1 // Retuning 1-w for P0 or w for P1 - return (uhRefFrmList == REF_PIC_LIST_0 ? g_GbiWeightBase - g_GbiWeights[gbiIdx] : g_GbiWeights[gbiIdx]); + return (uhRefFrmList == REF_PIC_LIST_0 ? g_BcwWeightBase - g_BcwWeights[bcwIdx] : g_BcwWeights[bcwIdx]); } -void resetGbiCodingOrder(bool bRunDecoding, const CodingStructure &cs) +void resetBcwCodingOrder(bool bRunDecoding, const CodingStructure &cs) { - // Form parsing order: { GBI_DEFAULT, GBI_DEFAULT+1, GBI_DEFAULT-1, GBI_DEFAULT+2, GBI_DEFAULT-2, ... } - g_GbiParsingOrder[0] = GBI_DEFAULT; - for (int i = 1; i <= (GBI_NUM >> 1); ++i) + // Form parsing order: { BCW_DEFAULT, BCW_DEFAULT+1, BCW_DEFAULT-1, BCW_DEFAULT+2, BCW_DEFAULT-2, ... } + g_BcwParsingOrder[0] = BCW_DEFAULT; + for (int i = 1; i <= (BCW_NUM >> 1); ++i) { - g_GbiParsingOrder[2 * i - 1] = GBI_DEFAULT + (int8_t)i; - g_GbiParsingOrder[2 * i] = GBI_DEFAULT - (int8_t)i; + g_BcwParsingOrder[2 * i - 1] = BCW_DEFAULT + (int8_t)i; + g_BcwParsingOrder[2 * i] = BCW_DEFAULT - (int8_t)i; } // Form encoding order if (!bRunDecoding) { - for (int i = 0; i < GBI_NUM; ++i) + for (int i = 0; i < BCW_NUM; ++i) { - g_GbiCodingOrder[(uint32_t)g_GbiParsingOrder[i]] = i; + g_BcwCodingOrder[(uint32_t)g_BcwParsingOrder[i]] = i; } } } -uint32_t deriveWeightIdxBits(uint8_t gbiIdx) // Note: align this with TEncSbac::codeGbiIdx and TDecSbac::parseGbiIdx +uint32_t deriveWeightIdxBits(uint8_t bcwIdx) // Note: align this with TEncSbac::codeBcwIdx and TDecSbac::parseBcwIdx { uint32_t numBits = 1; - uint8_t gbiCodingIdx = (uint8_t)g_GbiCodingOrder[gbiIdx]; + uint8_t bcwCodingIdx = (uint8_t)g_BcwCodingOrder[bcwIdx]; - if (GBI_NUM > 2 && gbiCodingIdx != 0) + if (BCW_NUM > 2 && bcwCodingIdx != 0) { - uint32_t prefixNumBits = GBI_NUM - 2; + uint32_t prefixNumBits = BCW_NUM - 2; uint32_t step = 1; - uint8_t prefixSymbol = gbiCodingIdx; + uint8_t prefixSymbol = bcwCodingIdx; // Truncated unary code uint8_t idx = 1; @@ -269,55 +249,21 @@ uint32_t deriveWeightIdxBits(uint8_t gbiIdx) // Note: align this with TEncSbac:: return numBits; } -uint32_t g_log2SbbSize[2][MAX_CU_DEPTH+1][MAX_CU_DEPTH+1][2] = +uint32_t g_log2SbbSize[MAX_CU_DEPTH + 1][MAX_CU_DEPTH + 1][2] = +//===== luma/chroma ===== { - //===== luma ===== - { - { {0,0}, {0,1}, {0,2}, {0,3}, {0,4}, {0,4}, {0,4}, {0,4} }, - { {1,0}, {1,1}, {1,2}, {1,3}, {1,3}, {1,3}, {1,3}, {1,3} }, - { {2,0}, {2,1}, {2,2}, {2,2}, {2,2}, {2,2}, {2,2}, {2,2} }, - { {3,0}, {3,1}, {2,2}, {2,2}, {2,2}, {2,2}, {2,2}, {2,2} }, - { {4,0}, {3,1}, {2,2}, {2,2}, {2,2}, {2,2}, {2,2}, {2,2} }, - { {4,0}, {3,1}, {2,2}, {2,2}, {2,2}, {2,2}, {2,2}, {2,2} }, - { {4,0}, {3,1}, {2,2}, {2,2}, {2,2}, {2,2}, {2,2}, {2,2} }, - { {4,0}, {3,1}, {2,2}, {2,2}, {2,2}, {2,2}, {2,2}, {2,2} } - }, - //===== chroma ===== - { - { {0,0}, {0,0}, {0,0}, {0,0}, {0,0}, {0,0}, {0,0}, {0,0} }, - { {0,0}, {1,1}, {1,1}, {1,1}, {1,1}, {1,1}, {1,1}, {1,1} }, - { {0,0}, {1,1}, {2,2}, {2,2}, {2,2}, {2,2}, {2,2}, {2,2} }, - { {0,0}, {1,1}, {2,2}, {2,2}, {2,2}, {2,2}, {2,2}, {2,2} }, - { {0,0}, {1,1}, {2,2}, {2,2}, {2,2}, {2,2}, {2,2}, {2,2} }, - { {0,0}, {1,1}, {2,2}, {2,2}, {2,2}, {2,2}, {2,2}, {2,2} }, - { {0,0}, {1,1}, {2,2}, {2,2}, {2,2}, {2,2}, {2,2}, {2,2} }, - { {0,0}, {1,1}, {2,2}, {2,2}, {2,2}, {2,2}, {2,2}, {2,2} } - }, + { { 0,0 },{ 0,1 },{ 0,2 },{ 0,3 },{ 0,4 },{ 0,4 },{ 0,4 },{ 0,4 } }, + { { 1,0 },{ 1,1 },{ 1,1 },{ 1,3 },{ 1,3 },{ 1,3 },{ 1,3 },{ 1,3 } }, + { { 2,0 },{ 1,1 },{ 2,2 },{ 2,2 },{ 2,2 },{ 2,2 },{ 2,2 },{ 2,2 } }, + { { 3,0 },{ 3,1 },{ 2,2 },{ 2,2 },{ 2,2 },{ 2,2 },{ 2,2 },{ 2,2 } }, + { { 4,0 },{ 3,1 },{ 2,2 },{ 2,2 },{ 2,2 },{ 2,2 },{ 2,2 },{ 2,2 } }, + { { 4,0 },{ 3,1 },{ 2,2 },{ 2,2 },{ 2,2 },{ 2,2 },{ 2,2 },{ 2,2 } }, + { { 4,0 },{ 3,1 },{ 2,2 },{ 2,2 },{ 2,2 },{ 2,2 },{ 2,2 },{ 2,2 } }, + { { 4,0 },{ 3,1 },{ 2,2 },{ 2,2 },{ 2,2 },{ 2,2 },{ 2,2 },{ 2,2 } } }; // initialize ROM variables void initROM() { - int c; - - // g_aucConvertToBit[ x ]: log2(x/4), if x=4 -> 0, x=8 -> 1, x=16 -> 2, ... - // g_aucLog2[ x ]: log2(x), if x=1 -> 0, x=2 -> 1, x=4 -> 2, x=8 -> 3, x=16 -> 4, ... - ::memset(g_aucLog2, 0, sizeof(g_aucLog2)); - c = 0; - for( int i = 0, n = 0; i <= MAX_CU_SIZE; i++ ) - { - g_aucNextLog2[i] = i <= 1 ? 0 : c + 1; - - if( i == ( 1 << n ) ) - { - c = n; - n++; - } - - g_aucPrevLog2[i] = c; - g_aucLog2 [i] = c; - } - - gp_sizeIdxInfo = new SizeIndexInfoLog2(); gp_sizeIdxInfo->init(MAX_CU_SIZE); @@ -325,8 +271,6 @@ void initROM() SizeIndexInfoLog2 sizeInfo; sizeInfo.init(MAX_CU_SIZE); - for( int ch = 0; ch < MAX_NUM_CHANNEL_TYPE; ch++ ) - { // initialize scan orders for (uint32_t blockHeightIdx = 0; blockHeightIdx < sizeInfo.numAllHeights(); blockHeightIdx++) { @@ -350,7 +294,7 @@ void initROM() scan = new ScanElement[totalValues]; } - g_scanOrder[ch][SCAN_UNGROUPED][scanType][blockWidthIdx][blockHeightIdx] = scan; + g_scanOrder[SCAN_UNGROUPED][scanType][blockWidthIdx][blockHeightIdx] = scan; if (scan == nullptr) { @@ -374,7 +318,7 @@ void initROM() //-------------------------------------------------------------------------------------------------- //grouped scan orders - const uint32_t* log2Sbb = g_log2SbbSize[ch][ g_aucLog2[blockWidth] ][ g_aucLog2[blockHeight] ]; + const uint32_t* log2Sbb = g_log2SbbSize[floorLog2(blockWidth)][floorLog2(blockHeight)]; const uint32_t log2CGWidth = log2Sbb[0]; const uint32_t log2CGHeight = log2Sbb[1]; @@ -392,7 +336,7 @@ void initROM() ScanElement *scan = new ScanElement[totalValues]; - g_scanOrder[ch][SCAN_GROUPED_4x4][scanType][blockWidthIdx][blockHeightIdx] = scan; + g_scanOrder[SCAN_GROUPED_4x4][scanType][blockWidthIdx][blockHeightIdx] = scan; if ( blockWidth > JVET_C0024_ZERO_OUT_TH || blockHeight > JVET_C0024_ZERO_OUT_TH ) { @@ -434,6 +378,29 @@ void initROM() //-------------------------------------------------------------------------------------------------- } } + + // initialize CoefTopLeftDiagScan8x8 for LFNST + for( uint32_t blockWidthIdx = 0; blockWidthIdx < sizeInfo.numAllWidths(); blockWidthIdx++ ) + { + const uint32_t blockWidth = sizeInfo.sizeFrom( blockWidthIdx ); + + const static uint8_t g_auiXYDiagScan8x8[ 64 ][ 2 ] = + { + { 0, 0 }, { 0, 1 }, { 1, 0 }, { 0, 2 }, { 1, 1 }, { 2, 0 }, { 0, 3 }, { 1, 2 }, + { 2, 1 }, { 3, 0 }, { 1, 3 }, { 2, 2 }, { 3, 1 }, { 2, 3 }, { 3, 2 }, { 3, 3 }, + { 0, 4 }, { 0, 5 }, { 1, 4 }, { 0, 6 }, { 1, 5 }, { 2, 4 }, { 0, 7 }, { 1, 6 }, + { 2, 5 }, { 3, 4 }, { 1, 7 }, { 2, 6 }, { 3, 5 }, { 2, 7 }, { 3, 6 }, { 3, 7 }, + { 4, 0 }, { 4, 1 }, { 5, 0 }, { 4, 2 }, { 5, 1 }, { 6, 0 }, { 4, 3 }, { 5, 2 }, + { 6, 1 }, { 7, 0 }, { 5, 3 }, { 6, 2 }, { 7, 1 }, { 6, 3 }, { 7, 2 }, { 7, 3 }, + { 4, 4 }, { 4, 5 }, { 5, 4 }, { 4, 6 }, { 5, 5 }, { 6, 4 }, { 4, 7 }, { 5, 6 }, + { 6, 5 }, { 7, 4 }, { 5, 7 }, { 6, 6 }, { 7, 5 }, { 6, 7 }, { 7, 6 }, { 7, 7 } + }; + for( int i = 0; i < 64; i++ ) + { + g_coefTopLeftDiagScan8x8[ blockWidthIdx ][ i ].idx = g_auiXYDiagScan8x8[ i ][ 0 ] + g_auiXYDiagScan8x8[ i ][ 1 ] * blockWidth; + g_coefTopLeftDiagScan8x8[ blockWidthIdx ][ i ].x = g_auiXYDiagScan8x8[ i ][ 0 ]; + g_coefTopLeftDiagScan8x8[ blockWidthIdx ][ i ].y = g_auiXYDiagScan8x8[ i ][ 1 ]; + } } for( int idxH = MAX_CU_DEPTH - MIN_CU_LOG2; idxH >= 0; --idxH ) @@ -457,6 +424,30 @@ void initROM() } } } + + for (int idxH = 0; idxH < MAX_CU_DEPTH - MIN_CU_LOG2 + 2; ++idxH) + { + for (int idxW = 0; idxW < MAX_CU_DEPTH - MIN_CU_LOG2 + 2; ++idxW) + { + const int nCbH = 1 << (idxH + 1); + const int nCbW = 1 << (idxW + 1); + const int nCbR = (nCbW > nCbH) ? nCbW / nCbH : nCbH / nCbW; + + // let SIMD can read at least 64-bit when at last row + g_triangleWeights[0][idxH][idxW] = new int16_t[nCbH * nCbW + 4]; + g_triangleWeights[1][idxH][idxW] = new int16_t[nCbH * nCbW + 4]; + for (int y = 0; y < nCbH; y++) + { + for (int x = 0; x < nCbW; x++) + { + g_triangleWeights[0][idxH][idxW][y*nCbW + x] = (nCbW > nCbH) ? Clip3(0, 8, x / nCbR - y + 4) : Clip3(0, 8, x - y / nCbR + 4); + g_triangleWeights[1][idxH][idxW][y*nCbW + x] = (nCbW > nCbH) ? Clip3(0, 8, nCbH - 1 - x / nCbR - y + 4) : Clip3(0, 8, nCbW - 1 - x - y / nCbR + 4); + } + } + } + } + + ::memset(g_isReusedUniMVsFilled, 0, sizeof(g_isReusedUniMVsFilled)); } void destroyROM() @@ -464,19 +455,16 @@ void destroyROM() unsigned numWidths = gp_sizeIdxInfo->numAllWidths(); unsigned numHeights = gp_sizeIdxInfo->numAllHeights(); - for( uint32_t ch = 0; ch < MAX_NUM_CHANNEL_TYPE; ch++ ) + for (uint32_t groupTypeIndex = 0; groupTypeIndex < SCAN_NUMBER_OF_GROUP_TYPES; groupTypeIndex++) { - for( uint32_t groupTypeIndex = 0; groupTypeIndex < SCAN_NUMBER_OF_GROUP_TYPES; groupTypeIndex++ ) + for (uint32_t scanOrderIndex = 0; scanOrderIndex < SCAN_NUMBER_OF_TYPES; scanOrderIndex++) { - for( uint32_t scanOrderIndex = 0; scanOrderIndex < SCAN_NUMBER_OF_TYPES; scanOrderIndex++ ) + for (uint32_t blockWidthIdx = 0; blockWidthIdx <= numWidths; blockWidthIdx++) { - for( uint32_t blockWidthIdx = 0; blockWidthIdx <= numWidths; blockWidthIdx++ ) + for (uint32_t blockHeightIdx = 0; blockHeightIdx <= numHeights; blockHeightIdx++) { - for( uint32_t blockHeightIdx = 0; blockHeightIdx <= numHeights; blockHeightIdx++ ) - { - delete[] g_scanOrder[ch][groupTypeIndex][scanOrderIndex][blockWidthIdx][blockHeightIdx]; - g_scanOrder[ch][groupTypeIndex][scanOrderIndex][blockWidthIdx][blockHeightIdx] = nullptr; - } + delete[] g_scanOrder[groupTypeIndex][scanOrderIndex][blockWidthIdx][blockHeightIdx]; + g_scanOrder[groupTypeIndex][scanOrderIndex][blockWidthIdx][blockHeightIdx] = nullptr; } } } @@ -484,20 +472,33 @@ void destroyROM() delete gp_sizeIdxInfo; gp_sizeIdxInfo = nullptr; + + for (int idxH = 0; idxH < MAX_CU_DEPTH - MIN_CU_LOG2 + 2; ++idxH) + { + for (int idxW = 0; idxW < MAX_CU_DEPTH - MIN_CU_LOG2 + 2; ++idxW) + { + delete[] g_triangleWeights[0][idxH][idxW]; + delete[] g_triangleWeights[1][idxH][idxW]; + g_triangleWeights[0][idxH][idxW] = nullptr; + g_triangleWeights[1][idxH][idxW] = nullptr; + } + } } // ==================================================================================================================== // Data structure related table & variable // ==================================================================================================================== -const int g_quantScales[SCALING_LIST_REM_NUM] = +const int g_quantScales[2][SCALING_LIST_REM_NUM] = // can be represented as a 9 element table { - 26214,23302,20560,18396,16384,14564 + { 26214,23302,20560,18396,16384,14564 }, + { 18396,16384,14564,13107,11651,10280 } // Note: last 3 values of second row == half of the first 3 values of the first row }; -const int g_invQuantScales[SCALING_LIST_REM_NUM] = +const int g_invQuantScales[2][SCALING_LIST_REM_NUM] = // can be represented as a 9 element table { - 40,45,51,57,64,72 + { 40,45,51,57,64,72 }, + { 57,64,72,80,90,102 } // Note: last 3 values of second row == double of the first 3 values of the first row }; //-------------------------------------------------------------------------------------------------- @@ -505,16 +506,6 @@ const int g_invQuantScales[SCALING_LIST_REM_NUM] = //-------------------------------------------------------------------------------------------------- //coefficients //-------------------------------------------------------------------------------------------------- - -const uint8_t g_aucChromaScale[NUM_CHROMA_FORMAT][chromaQPMappingTableSize] = -{ - //0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69 - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, - { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,29,30,31,32,33,33,34,34,35,35,36,36,37,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63 }, - { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,63,63,63,63,63,63 }, - { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,63,63,63,63,63,63 } -}; - // ==================================================================================================================== // Intra prediction // ==================================================================================================================== @@ -551,10 +542,9 @@ const uint8_t g_aucIntraModeNumFast_NotUseMPM[MAX_CU_DEPTH] = }; const uint8_t g_chroma422IntraAngleMappingTable[NUM_INTRA_MODE] = -// H D V -//0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, DM -{ 0, 1, 2, 2, 2, 2, 2, 2, 2, 3, 4, 6, 8, 10, 12, 13, 14, 16, 18, 20, 22, 23, 24, 26, 28, 30, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 44, 44, 45, 46, 46, 46, 47, 48, 48, 48, 49, 50, 51, 52, 52, 52, 53, 54, 54, 54, 55, 56, 56, 56, 57, 58, 59, 60, DM_CHROMA_IDX }; - +// * H * D * * * * * * * * V * * * * * * * * +//0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, DM +{ 0, 1, 61, 62, 63, 64, 65, 66, 2, 3, 5, 6, 8, 10, 12, 13, 14, 16, 18, 20, 22, 23, 24, 26, 28, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 41, 42, 43, 43, 44, 44, 45, 45, 46, 47, 48, 48, 49, 49, 50, 51, 51, 52, 52, 53, 54, 55, 55, 56, 56, 57, 57, 58, 59, 59, 60, DM_CHROMA_IDX }; @@ -562,9 +552,8 @@ const uint8_t g_chroma422IntraAngleMappingTable[NUM_INTRA_MODE] = // Misc. // ==================================================================================================================== SizeIndexInfo* gp_sizeIdxInfo = NULL; -int8_t g_aucLog2 [MAX_CU_SIZE + 1]; -int8_t g_aucNextLog2[MAX_CU_SIZE + 1]; -int8_t g_aucPrevLog2[MAX_CU_SIZE + 1]; + +const int g_ictModes[2][4] = { { 0, 3, 1, 2 }, { 0, -3, -1, -2 } }; UnitScale g_miScaling( MIN_CU_LOG2, MIN_CU_LOG2 ); @@ -574,7 +563,8 @@ UnitScale g_miScaling( MIN_CU_LOG2, MIN_CU_LOG2 ); // ==================================================================================================================== // scanning order table -ScanElement *g_scanOrder[2][SCAN_NUMBER_OF_GROUP_TYPES][SCAN_NUMBER_OF_TYPES][MAX_CU_SIZE / 2 + 1][MAX_CU_SIZE / 2 + 1]; +ScanElement *g_scanOrder[SCAN_NUMBER_OF_GROUP_TYPES][SCAN_NUMBER_OF_TYPES][MAX_CU_SIZE / 2 + 1][MAX_CU_SIZE / 2 + 1]; +ScanElement g_coefTopLeftDiagScan8x8[ MAX_CU_SIZE / 2 + 1 ][ 64 ]; const uint32_t g_uiMinInGroup[LAST_SIGNIFICANT_GROUPS] = { 0,1,2,3,4,6,8,12,16,24,32,48,64,96 }; const uint32_t g_uiGroupIdx[MAX_TB_SIZEY] = { 0,1,2,3,4,4,5,5,6,6,6,6,7,7,7,7,8,8,8,8,8,8,8,8,9,9,9,9,9,9,9,9, 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11 }; @@ -582,16 +572,16 @@ const uint32_t g_auiGoRiceParsCoeff[32] = { 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3 }; -const uint32_t g_auiGoRicePosCoeff0[3][32] = -{ - {0, 0, 0, 0, 0, 1, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8}, - {1, 1, 1, 1, 2, 3, 4, 4, 4, 6, 6, 6, 8, 8, 8, 8, 8, 8, 12, 12, 12, 12, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16}, - {1, 1, 2, 2, 2, 3, 4, 4, 4, 6, 6, 6, 8, 8, 8, 8, 8, 8, 12, 12, 12, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16} -}; - -#if HEVC_USE_SCALING_LISTS const char *MatrixType[SCALING_LIST_SIZE_NUM][SCALING_LIST_NUM] = { + { + "INTRA1X1_LUMA", + "INTRA1X1_CHROMAU", + "INTRA1X1_CHROMAV", + "INTER1X1_LUMA", + "INTER1X1_CHROMAU", + "INTER1X1_CHROMAV" + }, { "INTRA2X2_LUMA", "INTRA2X2_CHROMAU", @@ -625,17 +615,29 @@ const char *MatrixType[SCALING_LIST_SIZE_NUM][SCALING_LIST_NUM] = "INTER16X16_CHROMAV" }, { - "INTRA32X32_LUMA", - "INTRA32X32_CHROMAU_FROM16x16_CHROMAU", - "INTRA32X32_CHROMAV_FROM16x16_CHROMAV", - "INTER32X32_LUMA", - "INTER32X32_CHROMAU_FROM16x16_CHROMAU", - "INTER32X32_CHROMAV_FROM16x16_CHROMAV" + "INTRA32X32_LUMA", + "INTRA32X32_CHROMAU", + "INTRA32X32_CHROMAV", + "INTER32X32_LUMA", + "INTER32X32_CHROMAU", + "INTER32X32_CHROMAV" + }, + { + "INTRA64X64_LUMA", + "INTRA64X64_CHROMAU", + "INTRA64X64_CHROMAV", + "INTER64X64_LUMA", + "INTER64X64_CHROMAU", + "INTER64X64_CHROMAV" + }, + { }, }; const char *MatrixType_DC[SCALING_LIST_SIZE_NUM][SCALING_LIST_NUM] = { + { //1x1 + }, { }, { @@ -652,11 +654,21 @@ const char *MatrixType_DC[SCALING_LIST_SIZE_NUM][SCALING_LIST_NUM] = }, { "INTRA32X32_LUMA_DC", - "INTRA32X32_CHROMAU_DC_FROM16x16_CHROMAU", - "INTRA32X32_CHROMAV_DC_FROM16x16_CHROMAV", + "INTRA32X32_CHROMAU_DC", + "INTRA32X32_CHROMAV_DC", "INTER32X32_LUMA_DC", - "INTER32X32_CHROMAU_DC_FROM16x16_CHROMAU", - "INTER32X32_CHROMAV_DC_FROM16x16_CHROMAV" + "INTER32X32_CHROMAU_DC", + "INTER32X32_CHROMAV_DC" + }, + { + "INTRA64X64_LUMA_DC", + "INTRA64X64_CHROMAU_DC", + "INTRA64X64_CHROMAV_DC", + "INTER64X64_LUMA_DC", + "INTER64X64_CHROMAU_DC", + "INTER64X64_CHROMAV_DC" + }, + { }, }; @@ -670,33 +682,38 @@ const int g_quantTSDefault4x4[4 * 4] = const int g_quantIntraDefault8x8[8 * 8] = { - 16,16,16,16,17,18,21,24, - 16,16,16,16,17,19,22,25, - 16,16,17,18,20,22,25,29, - 16,16,18,21,24,27,31,36, - 17,17,20,24,30,35,41,47, - 18,19,22,27,35,44,54,65, - 21,22,25,31,41,54,70,88, - 24,25,29,36,47,65,88,115 + 16,16,16,16,16,16,16,16, + 16,16,16,16,16,16,16,16, + 16,16,16,16,16,16,16,16, + 16,16,16,16,16,16,16,16, + 16,16,16,16,16,16,16,16, + 16,16,16,16,16,16,16,16, + 16,16,16,16,16,16,16,16, + 16,16,16,16,16,16,16,16 }; const int g_quantInterDefault8x8[8 * 8] = { - 16,16,16,16,17,18,20,24, - 16,16,16,17,18,20,24,25, - 16,16,17,18,20,24,25,28, - 16,17,18,20,24,25,28,33, - 17,18,20,24,25,28,33,41, - 18,20,24,25,28,33,41,54, - 20,24,25,28,33,41,54,71, - 24,25,28,33,41,54,71,91 + 16,16,16,16,16,16,16,16, + 16,16,16,16,16,16,16,16, + 16,16,16,16,16,16,16,16, + 16,16,16,16,16,16,16,16, + 16,16,16,16,16,16,16,16, + 16,16,16,16,16,16,16,16, + 16,16,16,16,16,16,16,16, + 16,16,16,16,16,16,16,16 }; -const uint32_t g_scalingListSize [SCALING_LIST_SIZE_NUM] = { 4, 16, 64, 256, 1024, 4096, 16384 }; -const uint32_t g_scalingListSizeX[SCALING_LIST_SIZE_NUM] = { 2, 4, 8, 16, 32, 64, 128 }; -#endif +const uint32_t g_scalingListSize [SCALING_LIST_SIZE_NUM] = { 1, 4, 16, 64, 256, 1024, 4096, 16384 }; +const uint32_t g_scalingListSizeX[SCALING_LIST_SIZE_NUM] = { 1, 2, 4, 8, 16, 32, 64, 128 }; uint8_t g_triangleMvStorage[TRIANGLE_DIR_NUM][MAX_CU_DEPTH - MIN_CU_LOG2 + 1][MAX_CU_DEPTH - MIN_CU_LOG2 + 1][MAX_CU_SIZE >> MIN_CU_LOG2][MAX_CU_SIZE >> MIN_CU_LOG2]; +int16_t *g_triangleWeights[TRIANGLE_DIR_NUM][MAX_CU_DEPTH - MIN_CU_LOG2 + 2][MAX_CU_DEPTH - MIN_CU_LOG2 + 2]; +Mv g_reusedUniMVs[32][32][8][8][2][33]; +bool g_isReusedUniMVsFilled[32][32][8][8]; +const uint8_t g_paletteQuant[52] = { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 7, 7, 8, 9, 9, 10, 11, 12, 13, 14, 15, 16, 17, 19, 21, 22, 24, 23, 25, 26, 28, 29, 31, 32, 34, 36, 37, 39, 41, 42, 45 }; +uint8_t g_paletteRunTopLut [5] = { 0, 1, 1, 2, 2 }; +uint8_t g_paletteRunLeftLut[5] = { 0, 1, 2, 3, 4 }; //! \} diff --git a/source/Lib/CommonLib/Rom.h b/source/Lib/CommonLib/Rom.h index d896d304dda40320bfb3a39b13f4fa3991d5c397..929a75547ef2cdafdc21e965dea0b93e8d3288e9 100644 --- a/source/Lib/CommonLib/Rom.h +++ b/source/Lib/CommonLib/Rom.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -59,6 +59,7 @@ void destroyROM(); // Data structure related table & variable // ==================================================================================================================== + // flexible conversion from relative to absolute index struct ScanElement { @@ -67,25 +68,18 @@ struct ScanElement uint16_t y; }; -extern uint32_t g_log2SbbSize [2][MAX_CU_DEPTH+1][MAX_CU_DEPTH+1][2]; +extern uint32_t g_log2SbbSize[MAX_CU_DEPTH + 1][MAX_CU_DEPTH + 1][2]; extern ScanElement - *g_scanOrder[2][SCAN_NUMBER_OF_GROUP_TYPES][SCAN_NUMBER_OF_TYPES][MAX_CU_SIZE / 2 + 1][MAX_CU_SIZE / 2 + 1]; + *g_scanOrder[SCAN_NUMBER_OF_GROUP_TYPES][SCAN_NUMBER_OF_TYPES][MAX_CU_SIZE / 2 + 1][MAX_CU_SIZE / 2 + 1]; +extern ScanElement g_coefTopLeftDiagScan8x8[ MAX_CU_SIZE / 2 + 1 ][ 64 ]; -extern const int g_quantScales [SCALING_LIST_REM_NUM]; // Q(QP%6) -extern const int g_invQuantScales[SCALING_LIST_REM_NUM]; // IQ(QP%6) +extern const int g_quantScales [2/*0=4^n blocks, 1=2*4^n blocks*/][SCALING_LIST_REM_NUM]; // Q(QP%6) +extern const int g_invQuantScales[2/*0=4^n blocks, 1=2*4^n blocks*/][SCALING_LIST_REM_NUM]; // IQ(QP%6) static const int g_numTransformMatrixSizes = 6; static const int g_transformMatrixShift[TRANSFORM_NUMBER_OF_DIRECTIONS] = { 6, 6 }; -// ==================================================================================================================== -// Luma QP to Chroma QP mapping -// ==================================================================================================================== -static const int chromaQPMappingTableSize = (MAX_QP + 7); - -extern const uint8_t g_aucChromaScale[NUM_CHROMA_FORMAT][chromaQPMappingTableSize]; - - // ==================================================================================================================== // Scanning order & context mapping table // ==================================================================================================================== @@ -93,8 +87,10 @@ extern const uint8_t g_aucChromaScale[NUM_CHROMA_FORMAT][chromaQPMappingTableSi extern const uint32_t g_uiGroupIdx[ MAX_TB_SIZEY ]; extern const uint32_t g_uiMinInGroup[ LAST_SIGNIFICANT_GROUPS ]; extern const uint32_t g_auiGoRiceParsCoeff [ 32 ]; -extern const uint32_t g_auiGoRicePosCoeff0[ 3 ][ 32 ]; -extern const uint32_t g_auiGoRiceRange[ MAX_GR_ORDER_RESIDUAL ]; //!< maximum value coded with Rice codes +inline uint32_t g_auiGoRicePosCoeff0(int st, uint32_t ricePar) +{ + return (st < 2 ? 1 : 2) << ricePar; +} // ==================================================================================================================== // Intra prediction table @@ -129,33 +125,36 @@ extern const TMatrixCoeff g_trCoreDST7P8 [TRANSFORM_NUMBER_OF_DIRECTIONS][ 8][ extern const TMatrixCoeff g_trCoreDST7P16 [TRANSFORM_NUMBER_OF_DIRECTIONS][ 16][ 16]; extern const TMatrixCoeff g_trCoreDST7P32 [TRANSFORM_NUMBER_OF_DIRECTIONS][ 32][ 32]; +extern const int8_t g_lfnst8x8[ 4 ][ 2 ][ 16 ][ 48 ]; +extern const int8_t g_lfnst4x4[ 4 ][ 2 ][ 16 ][ 16 ]; + +extern const uint8_t g_lfnstLut[ NUM_INTRA_MODE + NUM_EXT_LUMA_MODE - 1 ]; // ==================================================================================================================== // Misc. // ==================================================================================================================== extern SizeIndexInfo* gp_sizeIdxInfo; -extern int8_t g_aucLog2 [MAX_CU_SIZE + 1]; -extern int8_t g_aucNextLog2 [MAX_CU_SIZE + 1]; -extern int8_t g_aucPrevLog2 [MAX_CU_SIZE + 1]; + +extern const int g_ictModes[2][4]; inline bool is34( const SizeType& size ) { - return ( size & ( ( int64_t ) 1 << ( g_aucLog2[size] - 1 ) ) ); + return ( size & ( ( int64_t ) 1 << ( floorLog2(size) - 1 ) ) ); } inline bool is58( const SizeType& size ) { - return ( size & ( ( int64_t ) 1 << ( g_aucLog2[size] - 2 ) ) ); + return ( size & ( ( int64_t ) 1 << ( floorLog2(size) - 2 ) ) ); } inline bool isNonLog2BlockSize( const Size& size ) { - return ( ( 1 << g_aucLog2[size.width] ) != size.width ) || ( ( 1 << g_aucLog2[size.height] ) != size.height ); + return ( ( 1 << floorLog2(size.width) ) != size.width ) || ( ( 1 << floorLog2(size.height) ) != size.height ); } inline bool isNonLog2Size( const SizeType& size ) { - return ( ( 1 << g_aucLog2[size] ) != size ); + return ( ( 1 << floorLog2(size) ) != size ); } extern UnitScale g_miScaling; // scaling object for motion scaling @@ -168,7 +167,6 @@ extern CDTrace* g_trace_ctx; const char* nalUnitTypeToString(NalUnitType type); -#if HEVC_USE_SCALING_LISTS extern const char *MatrixType [SCALING_LIST_SIZE_NUM][SCALING_LIST_NUM]; extern const char *MatrixType_DC[SCALING_LIST_SIZE_NUM][SCALING_LIST_NUM]; @@ -178,22 +176,21 @@ extern const int g_quantInterDefault8x8[8*8]; extern const uint32_t g_scalingListSize [SCALING_LIST_SIZE_NUM]; extern const uint32_t g_scalingListSizeX[SCALING_LIST_SIZE_NUM]; -#endif extern MsgLevel g_verbosity; -extern const int8_t g_GbiLog2WeightBase; -extern const int8_t g_GbiWeightBase; -extern const int8_t g_GbiWeights[GBI_NUM]; -extern const int8_t g_GbiSearchOrder[GBI_NUM]; -extern int8_t g_GbiCodingOrder[GBI_NUM]; -extern int8_t g_GbiParsingOrder[GBI_NUM]; +extern const int8_t g_BcwLog2WeightBase; +extern const int8_t g_BcwWeightBase; +extern const int8_t g_BcwWeights[BCW_NUM]; +extern const int8_t g_BcwSearchOrder[BCW_NUM]; +extern int8_t g_BcwCodingOrder[BCW_NUM]; +extern int8_t g_BcwParsingOrder[BCW_NUM]; class CodingStructure; -int8_t getGbiWeight(uint8_t gbiIdx, uint8_t uhRefFrmList); -void resetGbiCodingOrder(bool bRunDecoding, const CodingStructure &cs); -uint32_t deriveWeightIdxBits(uint8_t gbiIdx); +int8_t getBcwWeight(uint8_t bcwIdx, uint8_t uhRefFrmList); +void resetBcwCodingOrder(bool bRunDecoding, const CodingStructure &cs); +uint32_t deriveWeightIdxBits(uint8_t bcwIdx); constexpr uint8_t g_tbMax[257] = { 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, @@ -209,8 +206,19 @@ constexpr uint8_t g_tbMax[257] = { 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, //! \} extern uint8_t g_triangleMvStorage[TRIANGLE_DIR_NUM][MAX_CU_DEPTH - MIN_CU_LOG2 + 1][MAX_CU_DEPTH - MIN_CU_LOG2 + 1][MAX_CU_SIZE >> MIN_CU_LOG2][MAX_CU_SIZE >> MIN_CU_LOG2]; - +// 7-tap/3-tap, direction, 2/4/8/16/32/64/128 +extern int16_t *g_triangleWeights[TRIANGLE_DIR_NUM][MAX_CU_DEPTH - MIN_CU_LOG2 + 2][MAX_CU_DEPTH - MIN_CU_LOG2 + 2]; extern bool g_mctsDecCheckEnabled; +class Mv; +extern Mv g_reusedUniMVs[32][32][8][8][2][33]; +extern bool g_isReusedUniMVsFilled[32][32][8][8]; + +extern const uint8_t g_paletteQuant[52]; +extern uint8_t g_paletteRunTopLut[5]; +extern uint8_t g_paletteRunLeftLut[5]; + +const int g_IBCBufferSize = 256 * 128; + #endif //__TCOMROM__ diff --git a/source/Lib/CommonLib/RomLFNST.cpp b/source/Lib/CommonLib/RomLFNST.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d09e56cde128697853f9772d2f5b5bf06dd89fb3 --- /dev/null +++ b/source/Lib/CommonLib/RomLFNST.cpp @@ -0,0 +1,366 @@ +/* The copyright in this software is being made available under the BSD + * License, included below. This software may be subject to other third party + * and contributor rights, including patent rights, and no such rights are + * granted under this license. + * + * Copyright (c) 2010-2020, ITU/ISO/IEC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * * Neither the name of the ITU/ISO/IEC nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file RomLFNST.cpp + \brief LFNST Tables +*/ + +#include "Rom.h" + + +#include <memory.h> +#include <stdlib.h> +#include <stdio.h> +#include <math.h> +#include <iomanip> + +// ==================================================================================================================== +// LFNST Tables +// ==================================================================================================================== + +const uint8_t g_lfnstLut[ NUM_INTRA_MODE + NUM_EXT_LUMA_MODE - 1 ] = +{//0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 + 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 +}; + +const int8_t g_lfnst8x8[ 4 ][ 2 ][ 16 ][ 48 ] = { + { //0 + { + { -117, 28, 18, 2, 4, 1, 2, 1, 32, -18, -2, 0, -1, 0, 0, 0, 14, -1, -3, 0, -1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 3, 0, -1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0 }, + { -29, -91, 47, 1, 9, 0, 3, 0, -54, 26, -8, 3, 0, 1, 0, 0, 33, 5, -9, -1, -2, 0, -1, 0, -3, 3, 0, 0, 0, 0, 0, 0, 7, 2, -2, 0, -1, 1, 0, 0, 2, 1, -1, 0, 0, 0, 0, 0 }, + { -10, 62, -11, -8, -2, -2, -1, -1, -95, 3, 32, 0, 4, 0, 2, 0, 32, -30, -4, 4, -1, 1, 0, 0, 6, 2, -5, 0, 0, 0, 0, 0, 6, -3, 0, 0, 2, 0, -1, 0, 2, -1, 0, 0, 1, 0, 0, 0 }, + { -15, 15, -10, -2, 1, 0, 1, 0, 10, 112, -20, -17, -4, -4, -1, -2, -20, -26, 31, 1, 0, 0, 0, 0, 2, -16, -1, 6, 0, 1, 0, 0, 1, -4, 0, 0, 0, -3, 0, 1, 0, -1, 0, 0, 0, -2, 0, 0 }, + { 32, 39, 92, -44, 4, -10, 1, -4, 26, 12, -15, 13, -5, 2, -2, 0, 29, -16, -22, 8, 0, 1, 0, 1, -20, 6, 4, -3, 1, 0, 0, 0, 1, -4, -3, 2, -4, 1, 0, 0, 1, -1, -2, 1, -2, 0, 0, 0 }, + { -10, 1, 50, -15, 2, -3, 1, -1, -28, -15, 14, 6, 1, 1, 1, 0, -99, -4, 9, 5, 5, 2, 2, 1, 44, -10, -11, 1, -2, 0, -1, 0, -5, 4, -3, 0, 8, -1, -2, 0, -2, 1, -1, 0, 4, 0, -1, 0 }, + { 1, -33, -11, -14, 7, -2, 2, 0, 29, -12, 37, -7, -4, 0, -1, 0, 6, -99, 3, 26, -1, 5, 0, 2, 14, 30, -27, -2, 1, -1, 0, -1, -6, 6, 6, -3, 1, 3, -3, 0, -1, 1, 1, 0, 0, 1, -1, 0 }, + { 0, 6, -6, 21, -4, 2, 0, 0, -20, -24, -104, 30, 5, 5, 1, 2, -7, -46, 10, -14, 7, 0, 1, 0, 9, 21, 7, -6, -2, -1, 0, -1, 2, 2, 5, -2, 0, 3, 4, -1, 0, 0, 1, 0, 0, 1, 2, -1 }, + { -13, -13, -37, -101, 29, -11, 8, -3, -12, -15, -20, 2, -11, 5, -2, 1, -12, 10, 26, 12, -6, 0, -1, 0, -32, -2, 11, 3, 3, -1, 1, 0, 11, -5, -1, 6, -4, 2, 1, 0, 3, -1, 1, 2, -1, 0, 0, 0 }, + { 6, 1, -14, -36, 9, -3, 2, 0, 10, 9, -18, -1, -3, 1, 0, 0, 38, 26, -13, -1, -5, -1, -1, 0, 102, 3, -14, -1, -5, -1, -2, 0, -29, 10, 10, 0, 10, -4, -1, 1, -7, 1, 2, 1, 2, -1, 0, 0 }, + { -12, -2, -26, -12, -9, 2, -1, 1, -3, 30, 4, 34, -4, 0, -1, 0, -30, 3, -92, 14, 19, 0, 3, 0, -11, 34, 21, -33, 1, -2, 0, -1, -9, -4, 18, 3, 2, 0, 0, -2, -1, -1, 3, 0, 0, 0, 0, -1 }, + { 0, -3, 0, -4, -15, 6, -3, 1, -7, -15, -28, -86, 19, -5, 4, -1, -5, -17, -41, 42, -6, 2, -1, 1, -1, -40, 37, 13, -4, 2, -1, 1, -10, 13, -1, -4, 4, -4, 3, 4, -2, 2, -1, -1, 1, -1, 1, 2 }, + { -1, 9, 13, 5, 14, -2, 2, -1, -8, 3, -4, -62, 4, 1, 1, 0, -12, 23, 16, -11, -17, 0, -1, 0, -11, 97, -3, -3, 0, -6, 0, -2, -21, -5, 23, 0, 2, -2, -1, 6, -3, -3, 1, 0, 0, 0, 0, 2 }, + { 6, 2, -3, 2, 10, -1, 2, 0, 8, 3, -1, -20, 0, 1, 0, 0, -4, 4, -16, 0, -2, 0, 1, 0, 34, 23, 6, -7, -4, -2, -1, 0, 108, -5, -30, 6, -27, 10, 7, -2, 11, -3, -1, 1, -4, 1, 0, 1 }, + { 6, 9, -2, 35, 110, -22, 11, -4, -2, 0, -3, 1, -18, 12, -3, 2, -5, -4, -22, 8, -25, 3, 0, 0, -3, -21, 2, -3, 9, -2, 1, 0, -7, 1, 3, -5, 3, 0, -1, 0, 0, 1, 0, -1, 1, 0, 0, 0 }, + { -1, 7, -2, 9, -11, 5, -1, 1, -7, 2, -22, 4, -13, 0, -1, 0, 0, 28, 0, 76, 4, -6, 0, -2, -13, 5, -76, -4, 33, -1, 3, 0, 9, 18, -3, -35, -4, -1, 6, 1, 1, 2, 0, -3, -1, 0, 2, 0 }, + }, + { + { -108, 48, 9, 1, 1, 1, 0, 0, 44, -6, -9, -1, -1, 0, -1, 0, 9, -9, -1, 1, 0, 0, 0, 0, 3, -1, 1, 0, 0, 0, 0, 0, 1, -1, 0, 0, 1, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0 }, + { 55, 66, -37, -5, -6, -1, -2, 0, 67, -30, -20, 4, -2, 0, -1, 0, -31, -19, 14, 4, 1, 1, 1, 0, -6, 3, 5, -2, 0, 0, 0, 0, -7, -1, 1, 0, -1, 1, 1, 0, -2, -1, 1, 0, 0, 0, 0, 0 }, + { 2, 86, -21, -13, -4, -2, -1, -1, -88, 5, 6, 4, 5, 1, 1, 0, 14, -5, 0, 3, 0, 0, 0, 0, 10, -5, -2, 0, -1, 0, 0, 0, 6, -5, 0, 1, 2, -1, 0, 0, 1, -1, 0, 0, 1, 0, 0, 0 }, + { -24, -21, -38, 19, 0, 4, -1, 2, -23, -89, 31, 20, 2, 3, 1, 1, -30, 26, 36, -8, -2, -2, 0, -1, 14, 18, -7, -9, -1, -1, 0, 0, 1, 3, -2, -1, 3, 2, -2, -1, 0, 1, 0, 0, 1, 1, -1, 0 }, + { 9, 20, 98, -26, -3, -5, 0, -2, -9, -26, 15, -16, 2, 0, 1, 0, -61, -3, -2, 3, 7, 1, 1, 0, 12, 16, -6, -1, 0, -1, 0, 0, 2, 0, -8, 1, 3, 1, -1, 1, 0, -1, -2, 0, 1, 0, -1, 0 }, + { -21, -7, -37, 10, 2, 2, -1, 1, -10, 69, -5, -7, -2, -2, 0, -1, -93, 2, 19, 0, 3, 0, 2, 0, 17, 4, 0, 0, -1, 0, 0, 0, 5, -4, -2, 0, 4, -2, 0, 1, 0, 0, 0, 0, 2, -1, 0, 0 }, + { -10, -25, 4, -17, 8, -2, 2, -1, -27, -17, -71, 25, 8, 2, 1, 1, -4, -66, 28, 36, -5, 3, 0, 1, -10, 20, 33, -13, -8, 0, 0, -1, 3, 6, -3, -7, -1, 3, 3, -1, 1, 0, -1, 0, 0, 1, 1, -1 }, + { 2, 5, 10, 64, -9, 4, -3, 1, -4, 8, 62, 3, -17, 1, -2, 0, -3, -75, 5, -14, 1, 4, 0, 1, -36, 3, 18, -4, 4, 0, 1, 0, 1, 14, -2, -8, -2, 1, -3, 0, 2, 2, -1, -2, 0, 1, -1, 0 }, + { -11, -15, -28, -97, 6, -1, 4, -1, 7, 3, 57, -15, 10, -2, 0, -1, -1, -27, 13, 6, 1, -1, 0, 0, -34, -6, 0, 3, 4, 1, 2, 0, -2, 8, 1, 5, -2, 0, -3, 1, 1, 1, 0, 2, -1, 0, -1, 0 }, + { 9, 13, 24, -6, 7, -2, 1, -1, 16, 39, 20, 47, -2, -2, -2, 0, 28, 23, 76, -5, -25, -3, -3, -1, 6, 36, -7, -39, -4, -1, 0, -1, 2, -4, -18, -3, -1, -1, -2, -2, 1, -2, -2, 0, 0, 0, -1, -1 }, + { -7, 11, 12, 7, 2, -1, 0, -1, -14, -1, -24, 11, 2, 0, 0, 0, -20, 48, 11, -13, -5, -2, 0, -1, -105, -19, 17, 0, 6, 2, 3, 0, -14, 8, 8, 2, 1, 2, -1, -2, 3, 0, -1, 0, 0, 0, 0, 0 }, + { 0, 0, 7, -6, 23, -3, 3, -1, 5, 1, 18, 96, 13, -9, -1, -1, -21, -7, -42, 14, -24, -3, 0, 0, 11, -47, -7, 3, -5, 9, 1, 2, 0, -1, 19, -1, 1, 0, -1, -6, -1, 1, 2, 0, 1, 0, 0, -2 }, + { -2, -6, -1, -10, 0, 1, 1, 0, -7, -2, -28, 20, -15, 4, -3, 1, -2, -32, -2, -66, 3, 7, 1, 2, -11, 13, -70, 5, 43, -2, 3, 0, 8, -14, -3, 43, -1, 2, 7, -1, 1, -2, 1, 3, -1, 1, 1, 0 }, + { -1, 6, -16, 0, 24, -3, 1, -1, 2, 6, 6, 16, 18, -7, 1, -1, -3, 11, -63, 9, 4, -5, 2, -1, -22, 94, -4, -6, -4, -4, 1, -2, 10, 23, -19, -5, 0, -6, -4, 6, 3, -2, 1, 1, 0, -1, 0, 0 }, + { -5, -6, -3, -19, -104, 18, -4, 3, 0, 6, 0, 35, -41, 20, -2, 2, -2, 10, -18, 16, 21, 3, -2, 0, -2, 11, 6, -10, 6, -3, -1, 0, -1, 5, -1, -6, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, -1 }, + { -1, -2, 0, 23, -9, 0, -2, 0, 1, 1, 8, -1, 29, 1, 1, 0, 3, -6, 13, 76, 30, -11, -1, -2, -26, -8, -69, 7, -9, -7, 3, -1, -10, -34, -25, 13, -1, 0, 11, 5, 1, -1, 1, -2, 0, 0, 2, 0 }, + } + }, + { //1 + { + { 110, -49, -3, -4, -1, -1, 0, -1, -38, -1, 10, 0, 2, 0, 1, 0, -9, 13, 1, -2, 0, 0, 0, 0, -4, 2, -3, 0, 0, 0, 0, 0, -2, 2, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, -1, 0, 0, 0 }, + { -43, -19, 17, -1, 3, 0, 1, 0, -98, 46, 14, -1, 2, 0, 1, 0, 26, 26, -15, -3, -2, -1, -1, 0, 11, -7, -9, 2, 0, 0, 0, 0, 9, -3, -1, 2, 3, -3, 0, 0, 4, -1, 0, 0, 2, -1, 0, 0 }, + { -19, 17, -7, 3, -2, 1, -1, 0, -32, -59, 29, 3, 4, 0, 2, 0, -72, 43, 34, -9, 3, -2, 1, -1, 13, 36, -18, -10, 0, -2, 0, -1, 3, 0, -12, 3, 6, 1, -3, 2, 1, -1, -2, 0, 3, 1, -1, 1 }, + { -35, -103, 39, 1, 7, 0, 2, 0, 38, -13, 25, -6, 1, -1, 0, 0, -1, 7, 6, -7, 1, -1, 0, 0, -13, 14, 2, -4, 2, -1, 0, 0, -2, 11, -6, -2, -2, 4, -3, 0, 0, 3, -2, 0, -1, 1, -1, 0 }, + { 9, 5, -6, -1, -1, 0, -1, 0, 42, 4, 21, -11, 1, -3, 1, -1, 21, 70, -32, -21, 0, -4, -1, -1, 34, -26, -57, 11, 4, 2, 0, 1, -4, -32, 5, 24, 1, -6, 12, 4, -3, -2, 4, -2, 0, -1, 0, 0 }, + { -5, -5, -28, 9, -3, 2, -1, 1, -20, -78, 22, 16, 1, 3, 0, 1, 80, -6, 25, -5, -4, -1, -1, 0, 6, -24, 7, -9, 0, 0, 0, 0, -7, 3, 13, -4, -3, 5, 1, -5, -2, 3, 1, -2, -1, 2, -1, -2 }, + { 14, 17, 27, -12, 1, -3, 1, -1, 8, 19, -13, 4, -2, 1, -1, 0, 48, -1, 48, -15, -4, -2, -1, -1, 1, 60, -28, -42, 5, -6, 1, -2, 11, -11, -51, 11, -2, -10, -2, 13, 2, -6, -4, 4, -2, -3, 2, 2 }, + { 7, 35, 17, -4, -1, 0, 0, 0, 3, 8, 54, -17, 1, -2, 1, -1, 10, 14, -11, -34, 4, -4, 1, -1, -80, -7, -6, 2, 15, 0, 3, 0, -16, 46, 1, 3, 2, 7, -24, 0, 2, -2, -5, 8, 1, -1, -2, 2 }, + { -13, -27, -101, 24, -8, 6, -3, 2, 11, 43, 6, 28, -6, 3, -1, 1, -3, 14, 21, -12, -7, -2, -1, -1, -23, 10, -4, -12, 3, 0, 1, 0, 2, 9, -10, 0, 1, -5, -4, 4, 2, -2, 2, 2, 0, -2, 1, 0 }, + { -11, -13, -3, -10, 3, -1, 1, 0, -19, -19, -37, 8, 4, 2, 0, 1, -12, -30, 3, -9, 5, 0, 1, 0, -56, -9, -47, 8, 21, 1, 4, 1, -11, -30, 10, 59, -2, 8, 41, 8, 2, 5, 6, -7, -1, 3, 5, -2 }, + { -4, -10, -24, -11, 3, -2, 0, -1, -6, -37, -45, -17, 8, -2, 2, -1, 17, 14, -58, 14, 15, 0, 2, 0, -10, 34, -7, 28, 4, -1, 1, 0, 23, 34, -31, 4, 10, -22, -30, 22, 4, -15, 9, 20, 2, -5, 9, 4 }, + { -2, 1, 13, -17, 3, -5, 1, -2, 3, 0, -55, 22, 6, 1, 1, 0, 8, 74, 21, 40, -14, 0, -2, 0, -36, -8, 11, -13, -23, 1, -3, 0, -36, 6, 16, -14, 2, 19, -4, -12, -1, 0, -7, -3, 0, 2, -2, -1 }, + { 3, 1, 5, -15, 1, -2, 1, -1, 7, 4, -7, 29, -1, 2, -1, 1, 8, 3, 12, -14, -9, -1, -1, 0, 4, 29, -15, 31, 10, 4, 1, 1, 61, 22, 55, 14, 13, 3, -9, -65, 1, -11, -21, -7, 0, 0, -1, 3 }, + { -4, -8, -1, -50, 6, -4, 2, -2, -1, 5, -22, 20, 6, 1, 0, 0, -16, -15, 18, -29, -11, 2, -2, 1, 40, -45, -19, -22, 31, 2, 4, 1, -25, 41, 0, 12, 9, 7, -42, 12, -3, -14, 2, 28, 5, 1, 6, 2 }, + { 5, -1, 26, 102, -13, 12, -4, 4, -4, -2, -40, -7, -23, 3, -5, 1, -1, 5, 8, -23, 7, 2, 1, 1, 10, -11, -13, -3, 12, -3, 2, 0, -9, 23, 4, 9, 14, 9, -14, -4, 0, -12, -7, 6, 3, 0, 6, 3 }, + { -5, -6, -27, -22, -12, 0, -3, 0, -5, 8, -20, -83, 0, 0, 0, 0, 9, 7, 24, -20, 41, 3, 6, 1, 15, 20, 12, 11, 17, -9, 1, -2, -26, -1, 18, -1, -12, 32, 3, -18, -5, 10, -25, -5, -2, 1, -8, 10 }, + }, + { + { 80, -49, 6, -4, 1, -1, 1, -1, -72, 36, 4, 0, 1, 0, 0, 0, 26, 0, -12, 2, -2, 1, -1, 0, -7, -9, 6, 1, 0, 0, 0, 0, 3, 5, -1, -2, -2, -2, -1, 1, 1, 1, 0, 0, -1, -1, 0, 0 }, + { -72, -6, 17, 0, 3, 0, 1, 0, -23, 58, -21, 2, -3, 1, -1, 0, 55, -46, -1, 6, -2, 1, -1, 0, -22, 7, 17, -7, 2, -1, 1, 0, 9, 5, -12, 1, -3, -4, 4, 2, 4, 1, -2, -1, -1, -1, 1, 0 }, + { -50, 19, -15, 4, -1, 1, -1, 1, -58, -2, 30, -3, 4, -1, 2, 0, 6, 57, -34, 0, -2, 0, -1, 0, 34, -48, -2, 14, -4, 3, -1, 1, -10, 7, 21, -10, 6, 1, -11, 0, -1, -1, 4, 2, 3, 0, -2, -1 }, + { -33, -43, 28, -7, 4, -2, 2, -1, -38, 11, -8, 4, 1, 1, 0, 0, -55, 24, 26, -5, 2, -1, 1, 0, 15, 46, -40, -1, -1, 0, -1, 0, 17, -38, 1, 17, -3, 11, 15, -11, 3, -1, -10, 1, 0, 1, 3, 2 }, + { 10, 66, -21, -3, -3, 0, -1, 0, -53, -41, -2, 16, -1, 4, -1, 1, 36, -5, 41, -20, 3, -3, 1, -1, -30, 26, -32, -3, 7, -2, 2, -1, 15, -8, 1, 17, -1, -2, 4, -8, 2, 0, -1, 3, 0, 0, 0, -1 }, + { 18, 14, 13, -9, 2, -2, 1, -1, 34, 32, -31, 12, -5, 2, -2, 1, 40, 4, -4, -9, -3, -2, -1, -1, 27, -31, -43, 19, -2, 3, -1, 1, 7, -49, 52, 10, -11, 22, 7, -26, -1, -6, -9, 6, -2, 2, 4, -2 }, + { 21, 66, -1, 9, -4, 2, -1, 1, -21, 41, -30, -10, 0, -2, 0, -1, -35, -17, -3, 26, -6, 5, -2, 2, 56, 3, 18, -25, -1, -2, -1, -1, -15, -13, -27, 9, 9, -6, 20, 5, -3, 2, -6, -9, 3, -3, 1, 5 }, + { 1, -6, -24, 17, -5, 3, -2, 1, 24, 10, 39, -21, 5, -4, 2, -1, 33, 32, -30, 4, -3, -1, -1, 0, -4, 13, -16, -10, 0, -1, 0, 0, 24, -26, -37, 33, 5, -32, 55, -5, -7, 22, -14, -22, 1, -9, -3, 13 }, + { 9, 33, -24, 1, 4, 0, 1, 0, 6, 50, 26, 1, -10, 0, -2, 0, -27, 1, -28, -21, 16, -5, 3, -2, -23, 36, -2, 40, -17, 4, -3, 1, 43, -13, 4, -41, -19, -2, -24, 17, 11, -4, 8, 4, -3, -3, -3, -3 }, + { -7, -9, -32, 14, -3, 3, -1, 1, -23, -28, 0, -5, -1, 0, 0, 0, -36, -59, -24, 14, 4, 2, 1, 1, -23, -26, 23, 26, -3, 5, 0, 2, 10, -26, 38, 7, -12, 11, 42, -22, -5, 20, -14, -15, -1, -2, 1, 6 }, + { 6, 30, 69, -18, 5, -4, 3, -1, -3, -11, -34, -16, 9, -4, 2, -1, -16, 35, -35, 30, -9, 3, -2, 1, -57, -13, 6, 4, -5, 5, -1, 1, 28, 10, 4, 7, 0, -15, 7, -10, -1, 7, -2, 2, 1, -3, 0, 0 }, + { 1, -8, 24, -3, 7, -2, 2, -1, -6, -51, -6, -4, -5, 0, -1, 0, 38, -1, 0, 25, 6, 2, 1, 1, 47, 20, 35, 1, -27, 1, -5, 0, 37, -37, -9, -47, -28, 5, 0, 18, 8, 6, 0, -8, -4, -3, -3, 1 }, + { 4, 10, 4, 17, -9, 4, -2, 1, 5, 14, 32, -15, 9, -3, 2, -1, 7, 13, 19, 15, -8, 1, -1, 0, 3, 25, 30, -18, 1, -2, 0, -1, 11, 24, 22, -11, -3, 37, -13, -58, -5, 12, -63, 26, 9, -15, 11, 8 }, + { -3, -9, -23, 10, -10, 3, -3, 1, -5, -14, -16, -27, 13, -5, 2, -1, -1, -13, -30, 11, -5, 2, -1, 0, -5, -8, -22, -16, 10, 0, 1, 0, 0, -29, -27, 6, -27, -10, -30, 9, -3, -10, -7, 77, 9, -13, 45, -8 }, + { 2, 11, 22, 2, 9, -2, 2, 0, -6, -7, 20, -32, -3, -4, 0, -1, 13, -5, -28, 6, 18, -4, 3, -1, -26, 27, -14, 6, -20, 0, -2, 0, -76, -26, -4, -7, 12, 51, 5, 24, 7, -17, -16, -12, -5, 4, 2, 13 }, + { 2, -3, 8, 14, -5, 3, -1, 1, -2, -11, 5, -18, 8, -3, 2, -1, 12, -23, -19, 22, 2, 0, 1, 0, 23, 41, -7, 35, -10, 4, -1, 1, 5, 7, 23, 5, 69, -38, -8, -32, -15, -31, 24, 11, 2, 18, 11, -15 }, + } + }, + { //2 + { + { -121, 33, 4, 4, 1, 2, 0, 1, -1, -1, 1, 0, 0, 0, 0, 0, 24, -5, -1, -1, 0, 0, 0, 0, 5, -1, 0, 0, 0, 0, 0, 0, 3, -1, 0, 0, 2, -1, 0, 0, 2, -1, 0, 0, 1, 0, 0, 0 }, + { 0, -2, 0, 0, 0, 0, 0, 0, 121, -23, -7, -3, -2, -1, -1, 0, 17, 1, -2, 0, 0, 0, 0, 0, -27, 4, 2, 0, 0, 0, 0, 0, -12, 2, 1, 0, -5, 1, 0, 0, -1, 0, 0, 0, -2, 0, 0, 0 }, + { -20, 19, -5, 2, -1, 1, 0, 0, 16, 3, -2, 0, 0, 0, 0, 0, -120, 14, 8, 1, 3, 1, 1, 0, -18, -2, 3, 0, 1, 0, 0, 0, 17, -3, -1, 0, 6, -1, -1, 0, 2, 0, 0, 0, 2, 0, 0, 0 }, + { 32, 108, -43, 10, -9, 3, -3, 1, 4, 19, -7, 1, -1, 0, 0, 0, 11, -30, 9, -2, 1, -1, 0, 0, 0, -8, 2, 0, 0, 0, 0, 0, -7, -1, 2, 0, -3, -1, 1, 0, -2, -2, 1, 0, 0, 0, 0, 0 }, + { -3, 0, -1, 0, 0, 0, 0, 0, -29, 11, -2, 1, 0, 0, 0, 0, 12, 7, -1, 0, 0, 0, 0, 0, -117, 12, 9, 1, 3, 0, 1, 0, -32, -3, 3, 0, 12, -2, -1, 0, 7, 0, 0, 0, 1, 0, 0, 0 }, + { -4, -12, -3, 1, -1, 0, 0, 0, 19, 105, -31, 7, -6, 1, -2, 0, 9, 46, -6, 0, 0, 0, 0, 0, 8, -29, 9, -3, 1, 0, 0, 0, -3, -19, 3, 0, -4, -6, 1, 0, 0, 0, 0, 0, 0, -1, 0, 0 }, + { 7, 1, 2, 0, 0, 0, 0, 0, 4, 3, -2, 0, 0, 0, 0, 0, 22, -8, 1, -1, 0, 0, 0, 0, -28, -9, 4, 0, 1, 0, 0, 0, 117, -10, -8, 0, 32, 1, -4, 0, 3, 1, -1, 0, -3, 1, 0, 0 }, + { -8, -31, 14, -4, 3, -1, 1, 0, 9, 43, 0, 1, -1, 0, 0, 0, -13, -105, 17, -2, 2, 0, 0, 0, -8, -25, -3, 0, 0, 0, 0, 0, -7, 32, -5, 1, -1, 4, 0, 0, 2, -1, 0, 0, 1, 0, -1, 0 }, + { -15, -43, -100, 23, -12, 6, -4, 2, -6, -17, -48, 10, -5, 2, -1, 1, 1, -5, 19, -6, 3, -1, 1, 0, 2, 7, 15, -3, 1, -1, 0, 0, 4, 10, 5, -1, 0, 3, 1, 0, -2, 1, 2, 0, -1, 1, 1, 0 }, + { -3, 1, 2, 0, 0, 0, 0, 0, -6, 3, 1, 0, 0, 0, 0, 0, 0, 3, -2, 0, 0, 0, 0, 0, -20, 8, -2, 0, 0, 0, 0, 0, 30, 13, -3, 0, -116, 6, 10, 0, -35, -5, 4, 0, -3, -1, 0, 0 }, + { -1, -6, -3, 2, -1, 0, 0, 0, -6, -35, 9, 0, 2, 0, 0, 0, 1, -6, 11, -2, 2, 0, 1, 0, -9, -100, 17, -1, 1, 0, 0, 0, -10, -63, 1, 2, -17, 3, -4, 0, -1, 9, -1, 0, 3, 4, -1, 0 }, + { -5, -14, -48, 2, -5, 1, -2, 0, 10, 24, 99, -17, 10, -4, 3, -1, 4, 14, 32, 0, 2, 0, 1, 0, -4, 0, -39, 6, -4, 1, -1, 0, 2, -3, -4, 0, 2, -2, -2, 0, 0, 0, -1, 0, 0, -1, -1, 0 }, + { -2, 0, 2, 0, 0, 0, 0, 0, -2, 0, 1, 0, 0, 0, 0, 0, -1, -1, 1, -1, 0, 0, 0, 0, -1, -4, 2, 0, 0, 0, 0, 0, -8, -2, -1, 1, 30, 4, -4, 1, -102, 4, 8, -1, -69, -2, 6, -1 }, + { -2, -10, -4, 0, 0, 0, 0, 0, 3, 11, -1, -1, 0, 0, 0, 0, -6, -40, -15, 6, -2, 1, 0, 0, 5, 57, -6, 2, 0, 0, 0, 0, 1, -95, 18, -6, -10, -34, -2, 0, -4, 17, -2, 0, 0, 2, 1, 0 }, + { -2, -3, -25, -2, -3, 0, -1, 0, -1, -3, -1, 4, -2, 2, 0, 1, -7, -8, -97, 17, -9, 3, -3, 1, -8, -26, -61, -1, -3, -1, -1, -1, 2, 10, 24, -7, 5, 9, 19, -1, 0, 1, 4, 0, -2, 0, 1, 0 }, + { 4, -4, 28, 103, -42, 24, -9, 7, 1, 2, 4, 0, 3, -1, 0, 0, -1, 0, -9, -42, 17, -9, 3, -2, -1, 1, -14, 6, -4, 2, -1, 0, -1, -2, -4, 4, 0, 3, 1, -1, 0, 2, 0, -2, 2, 0, 0, 0 }, + }, + { + { 87, -41, 3, -4, 1, -1, 0, -1, -73, 28, 2, 1, 1, 1, 0, 0, 30, -5, -6, 1, -1, 0, 0, 0, -8, -3, 3, 0, 0, 0, 0, 0, 3, 2, -1, 0, -2, -1, 0, 0, 1, 1, 0, 0, -1, 0, 0, 0 }, + { -75, 4, 7, 0, 2, 0, 1, 0, -41, 36, -7, 3, -1, 1, 0, 0, 72, -29, -2, 0, -1, 0, -1, 0, -37, 6, 7, -2, 1, 0, 0, 0, 12, 3, -4, 0, -3, -2, 1, 0, 4, 0, 0, 0, -1, 0, 0, 0 }, + { 26, -44, 22, -6, 4, -2, 1, -1, 77, 24, -22, 2, -4, 0, -1, 0, 7, -38, 10, 0, 1, 0, 0, 0, -51, 27, 4, -3, 2, -1, 1, 0, 31, -5, -8, 3, -14, 0, 5, -1, 6, 1, -3, 0, -4, -1, 1, 0 }, + { -39, -68, 37, -7, 6, -2, 2, 0, -9, 56, -21, 1, -2, 0, -1, 0, -45, 4, -3, 6, -1, 2, 0, 1, 49, -13, 3, -3, -1, 0, 0, 0, -19, 2, 0, 0, 5, 1, 1, 0, -2, 0, -1, 0, 1, 0, 0, 0 }, + { 10, -20, 2, 0, 1, 0, 0, 0, 50, -1, 8, -5, 1, -1, 0, 0, 66, 17, -24, 4, -3, 1, -1, 0, 13, -49, 15, 1, 0, 0, 0, 0, -53, 34, 6, -5, 30, -7, -11, 3, -11, -2, 5, 1, 4, 2, -1, -1 }, + { -21, -45, 8, -2, 3, -1, 1, 0, -7, -30, 26, -8, 3, -1, 1, -1, -9, 69, -33, 5, -2, 0, -1, 0, -44, -31, 10, 7, -2, 2, 0, 1, 49, 7, 2, -6, -23, -3, -2, 2, 9, 4, 0, 0, -2, -1, -1, 0 }, + { -4, -2, -55, 28, -8, 5, -3, 2, -2, 37, 43, -19, 1, -2, 1, -1, -47, -34, -27, 5, 4, -1, 1, 0, -39, -2, 27, 4, -2, 1, 0, 0, -11, 32, -8, -7, 27, -12, -6, 6, -13, 0, 4, -3, 3, -1, -2, 1 }, + { 2, 19, 47, -23, 6, -4, 2, -1, -23, -22, -44, 17, -2, 2, -1, 0, -33, 3, 22, -2, -4, 1, -1, 0, -58, -17, 6, -6, 7, -1, 1, 0, -23, 40, -2, 5, 43, -11, -8, -1, -18, -4, 5, 2, 4, 3, 0, -1 }, + { -19, -62, -9, 3, 0, 0, 0, 0, -12, -56, 27, -7, 3, -1, 1, 0, 7, -8, 16, -6, 4, -2, 1, -1, -15, 54, -23, 2, -1, 0, 0, 0, -42, -25, 4, 6, 34, 8, 2, -2, -15, -1, 0, -1, 3, 2, 0, 1 }, + { 1, 9, -5, 0, -1, 0, 0, 0, 0, 22, -1, 2, 0, 1, 0, 0, -13, 17, 0, -2, 0, -1, 0, 0, -46, -10, -10, 4, -1, 1, 0, 0, -80, -27, 20, -4, -66, 23, -2, -2, 20, -3, -2, 3, -14, 2, 3, -1 }, + { 5, 17, -9, 0, -2, 1, 0, 0, 13, 54, -2, 7, -1, 1, 0, 0, 4, 51, -3, -6, -1, -1, 0, 0, -20, 6, -34, 9, -2, 2, -1, 0, 16, -52, 28, 1, 59, 15, -8, -5, -28, -7, 2, 2, 10, 3, 0, -1 }, + { 7, 27, 56, -2, 10, -3, 3, -1, -2, -6, 8, -28, 3, -4, 1, -1, -1, -4, -68, 35, -5, 5, -2, 1, 0, 35, 43, -4, -6, 1, -1, 0, -14, -38, -12, -10, 9, 5, 7, 6, -9, 7, -4, -3, 4, -4, 0, 3 }, + { 0, 0, 19, -4, 3, -2, 2, -1, -3, -13, 10, -4, 1, 0, 0, 0, -6, -37, -18, -5, 2, -2, 1, -1, 6, -6, -7, 25, -6, 4, -1, 1, 16, 10, 55, -24, 15, 46, -52, 1, 35, -43, 10, 12, -23, 13, 5, -8 }, + { -3, 0, -27, -80, 40, -16, 6, -4, 4, 3, 31, 61, -22, 7, -1, 1, -4, -7, -26, -6, -10, 6, -4, 1, 3, 8, 14, -18, 15, -5, 2, -1, -2, -4, -1, 13, 0, 2, -4, -3, 3, -1, 2, 1, -2, 0, -2, -1 }, + { 1, 2, -8, 6, -1, 1, 0, 0, 2, 8, -5, -1, 0, 0, 0, 0, 1, 24, 3, 5, -1, 1, 0, 0, -3, 12, 6, -10, 1, -1, 0, 0, -9, -1, -25, 10, 45, -11, 18, 2, 86, 1, -13, -4, -65, -6, 7, 2 }, + { -4, -18, -57, 8, -8, 1, -3, 0, -5, -20, -69, 7, -6, 2, -2, 1, 1, 4, 0, 33, -7, 5, -2, 1, 0, -9, 53, -22, 3, -1, 0, 0, 4, -27, -2, -9, 5, 36, -13, 5, -7, -17, 1, 2, 4, 6, 4, -1 }, + } + }, + { //3 + { + { -115, 37, 9, 2, 2, 1, 1, 0, 10, -29, 8, 0, 1, 0, 1, 0, 23, -8, -8, 1, -1, 0, 0, 0, 3, 3, -2, -1, 0, 0, 0, 0, 4, 0, 0, -1, 1, 1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0 }, + { 15, 51, -18, 0, -3, 0, -1, 0, -95, 7, 34, -3, 5, -1, 2, 0, 23, -47, 1, 6, 0, 1, 0, 1, 8, 5, -12, 0, -1, 0, 0, 0, 3, -3, 1, -1, 2, 1, -2, 0, 1, -1, 0, 0, 1, 1, -1, 0 }, + { 29, -22, 16, -6, 3, -2, 1, -1, -4, -80, 12, 15, 0, 3, 0, 1, 45, 7, -59, 7, -2, 1, -1, 0, -15, 41, -3, -16, 2, -3, 0, -1, 1, 0, 7, -2, -3, 6, 1, -2, 0, 0, 1, 0, -1, 2, 0, -1 }, + { -36, -98, 25, 5, 4, 1, 2, 1, -59, 11, -17, 1, 1, 1, 0, 0, 6, -13, 7, -3, 0, 0, 0, 0, 14, -4, -14, 3, -1, 0, 0, 0, 2, 8, -3, -5, 2, 0, 0, 0, 0, 3, 0, -1, 1, 0, 0, 0 }, + { -6, 18, 3, -3, -1, 0, 0, 0, -50, -5, -38, 12, 0, 2, 0, 1, 3, 67, -7, -40, 3, -6, 1, -3, -12, -13, 65, -3, -10, 0, -1, 0, 9, -20, -5, 22, -2, 0, 0, -1, 2, -3, -2, 3, -1, 0, 1, 0 }, + { 4, 15, 52, -13, 5, -3, 2, -1, -17, -45, 16, 24, -2, 4, -1, 2, -87, -8, -14, 7, 8, 1, 2, 0, 23, -35, -6, -3, 1, 1, 0, 0, 2, 5, -17, 0, 3, -1, -1, -5, 0, 1, -4, 0, 1, 0, 0, -2 }, + { -20, -7, -43, 4, 0, 1, -1, 1, -7, 35, 0, 12, -4, 1, -1, 0, -51, -2, -57, 5, 15, 0, 4, 0, 7, 39, 5, -55, 1, -7, 1, -3, 1, -10, 41, 2, 4, -3, -2, 3, -1, -2, 7, 1, 1, -1, -1, 0 }, + { 4, 29, 1, 26, -5, 4, -2, 1, -17, -7, -73, 6, 6, 2, 1, 1, -5, 21, -3, 5, -1, -3, 0, -1, -11, 2, -52, -3, 27, -2, 5, 0, 0, 27, 8, -58, 2, -5, 25, 3, 0, 3, 0, -5, 0, -2, 7, 0 }, + { 12, 13, 10, 2, -1, 3, -1, 1, 17, -2, -46, 12, 7, 0, 2, 0, 16, -45, -9, -53, 6, 1, 1, 0, 70, 16, 8, -4, -37, 1, -7, 0, -12, 29, 3, 21, 4, 0, 5, -1, -3, 4, 1, 4, 2, 0, 1, 0 }, + { 5, 20, 90, -17, 4, -3, 2, -1, 6, 66, 8, 28, -7, 3, -1, 1, 29, 5, -19, 12, 9, -1, 1, 0, -10, 14, -1, -13, 7, 0, 1, 0, 0, -6, 13, -4, 0, -4, 1, 5, 0, -1, -1, 1, 0, -1, 0, 0 }, + { -3, -4, -34, -12, 2, -1, -1, 0, 5, 25, 11, 43, -10, 4, -2, 1, 23, 20, -40, 12, 21, -3, 4, -1, 25, -28, -10, 5, 8, 6, 0, 2, -4, 21, -64, -8, -5, 19, 10, -48, 3, -1, 10, -3, 0, 4, 3, -6 }, + { -1, -3, 2, 19, -2, 4, -1, 2, 9, 3, -35, 22, 11, 1, 2, 0, -7, -65, -19, -22, 11, 4, 2, 1, -75, -18, 3, -1, -10, 2, 0, 1, 2, -35, -27, 4, 1, 8, -17, -19, 3, 0, 3, -6, 0, 2, -1, -2 }, + { 10, -4, -6, 12, 5, 1, 1, 0, 11, -9, -12, -2, -7, 0, -1, 0, 33, -10, -4, 18, 18, -4, 4, -1, 28, -72, 1, -49, 15, 2, 2, 1, 56, -23, 22, -1, 4, -1, -15, 26, 6, 4, -10, 0, 0, 2, -3, 2 }, + { 4, 6, 14, 53, -4, 4, 0, 2, 0, -1, -20, -13, 3, 2, -1, 1, -3, 1, -5, 35, -16, -6, -1, -2, 46, 29, 13, 21, 37, -5, 4, -1, -10, -53, -18, 8, 9, 12, -41, -25, -2, 2, 13, -16, 4, 1, -5, 1 }, + { 2, 9, 13, 37, 19, 6, 2, 2, -9, -3, -9, -28, -20, -4, -3, -1, 1, 18, 9, 28, 24, 6, 2, 2, -20, -5, -25, -33, -36, 9, -2, 2, -13, 42, 1, 57, -22, -2, -25, -28, 5, 6, 19, -12, -5, -3, -2, 4 }, + { 3, -3, 12, 84, -12, 8, -2, 3, 6, 13, 50, -1, 45, 1, 7, 0, -2, 18, -22, -37, -13, 14, 0, 3, 1, -12, -3, 2, -15, -8, 1, -1, 19, 14, -4, -12, -4, 5, 17, 8, 2, -4, -4, 4, -2, 2, 1, 0 }, + }, + { + { 109, -26, -8, -3, -2, -1, -1, 0, -50, 28, 2, 1, 0, 0, 0, 0, -18, -8, 6, 0, 1, 0, 1, 0, 6, -2, -3, 0, 0, 0, 0, 0, -3, 2, 1, -1, 0, 0, 0, 0, -2, 0, 0, 0, 0, 0, 0, 0 }, + { -39, 31, -5, 2, -1, 1, 0, 0, -95, 6, 18, 0, 4, 0, 1, 0, 32, -49, 5, 1, 1, 0, 0, 0, 27, -1, -14, 2, -2, 1, -1, 0, 3, 5, -3, -2, 4, 1, -1, -1, 2, 0, 0, 0, 2, 0, 0, 0 }, + { 29, -3, -2, -2, 0, 0, 0, 0, 0, -41, 9, 0, 2, 0, 1, 0, 86, 4, -33, 2, -6, 1, -2, 0, -32, 58, 1, -7, 0, -2, 0, -1, -14, -8, 20, 0, -2, -3, 0, 4, -1, -1, 0, 0, -1, 1, 0, 0 }, + { 18, 96, -23, 2, -5, 1, -2, 0, -10, 6, 10, -2, 1, -1, 1, 0, -14, 26, 2, -4, 1, -1, 0, 0, -43, -9, 35, -2, 4, -1, 1, 0, 14, -40, 1, 10, 2, 1, -10, 1, 2, -4, -1, -1, 0, 0, -1, 0 }, + { -29, -60, 16, -2, 3, -1, 1, 0, -52, 9, -17, 5, -2, 1, -1, 1, 13, 56, -2, -9, 0, -2, 0, -1, -34, -18, 41, 0, 3, 0, 1, 0, 19, -36, -10, 13, 3, 6, -14, -1, 3, 1, -1, -3, 1, 1, -1, -1 }, + { -23, -5, -15, 5, -2, 1, -1, 1, 2, 79, -13, -4, -2, -1, -1, 0, -9, 1, 5, -1, 1, 0, 0, 0, -4, 49, 2, -14, 1, -3, 0, -1, -31, -14, 56, -1, 13, -37, -4, 20, -2, 2, -10, 0, 2, -4, 0, -1 }, + { -7, -3, 12, -3, 3, -1, 1, 0, -31, -62, 8, 7, 0, 2, 0, 1, -75, 9, -45, 5, -1, 1, -1, 0, 14, 35, 0, -23, 2, -5, 1, -2, 1, -8, 32, -1, 7, -12, -4, 10, 0, 2, -6, -1, 2, 0, 0, -2 }, + { 1, -26, 5, 0, 1, 0, 1, 0, 24, -3, 43, -6, 4, -2, 1, -1, -7, -64, 9, 14, 0, 3, 0, 1, -12, -4, 5, 3, -1, 1, 0, 0, 8, -59, -3, 26, 14, 6, -58, 6, -5, 17, -7, -18, 3, 3, -1, -5 }, + { 11, 14, 6, -3, 1, -1, 1, 0, 10, -7, -9, 3, -2, 1, -1, 0, 22, 21, 1, -21, 2, -4, 1, -2, 92, 1, 53, 0, -9, 1, -2, 0, -21, -11, 1, 40, -5, -4, -24, 5, -4, 5, -6, -5, 0, 0, 0, -3 }, + { -10, -11, -47, 3, -4, 1, -1, 0, 5, 28, 11, -2, -1, 0, 0, 0, -12, -2, -38, 2, 0, 1, 0, 0, 16, 38, 11, -16, -1, -3, 0, -2, 12, -9, -22, 7, -8, 60, 4, -36, -6, -15, 54, 7, 3, -7, -8, 14 }, + { -8, -24, -99, 11, -10, 3, -4, 1, -5, -36, 19, -26, 4, -5, 1, -2, 0, 25, 41, 5, -3, 1, 0, 0, 10, -5, -7, 12, 2, 1, 0, 0, -1, 1, 9, -3, -3, -14, -3, 12, 2, 4, -13, -2, -1, 3, 2, -4 }, + { -5, 1, -1, 0, 1, 0, 0, 0, -10, -14, -6, 8, 0, 1, 0, 0, -17, -2, 7, -5, 3, -1, 0, 0, -16, 13, 3, 31, -1, 6, 0, 2, -93, -15, -46, -3, 23, -19, 0, -47, 8, 4, 8, 3, 2, 3, 0, 0 }, + { 1, 12, -20, 21, -4, 5, -2, 2, -5, -2, -75, 9, -1, 2, -1, 1, -1, -2, -16, -4, 0, -1, 0, 0, -7, 7, -31, 0, 3, 0, 0, 0, 4, 11, -12, 4, -12, 14, -50, -1, -8, 32, -4, -54, 2, 0, 30, -15 }, + { 2, -9, -18, 8, -3, 3, -1, 1, 3, -25, -62, -6, 0, -2, 0, -1, -6, -61, 14, -51, 2, -6, 0, -2, -19, 0, 40, -7, -17, 0, -3, 0, 13, -4, 11, 9, 17, 0, 24, 5, 1, -12, 4, 28, 0, 0, -15, 8 }, + { 4, 9, 39, 18, 0, 2, 0, 1, -6, -16, -22, -37, 5, -5, 1, -2, -5, 15, 63, 9, -16, 0, -3, 0, 18, 42, -18, 27, 15, 1, 3, 1, 12, -34, 9, -24, 4, 28, -2, 4, -11, -4, 30, 2, 5, -13, -4, 18 }, + { -7, -2, 15, -6, 1, -1, 1, -1, -11, -3, 22, -14, 0, -2, 1, -1, -18, -7, 30, -9, -4, 0, -1, 0, -35, 23, 23, 10, -17, 1, -3, 0, -19, 53, 6, 48, -65, 12, -12, 11, -8, -16, 10, -21, -2, -12, 6, 2 }, + } + } +}; + +const int8_t g_lfnst4x4[ 4 ][ 2 ][ 16 ][ 16 ] = { + { //0 + { + { 108, -44, -15, 1, -44, 19, 7, -1, -11, 6, 2, -1, 0, -1, -1, 0 }, + { -40, -97, 56, 12, -11, 29, -12, -3, 18, 18, -15, -3, -1, -3, 2, 1 }, + { 25, -31, -1, 7, 100, -16, -29, 1, -54, 21, 14, -4, -7, 2, 4, 0 }, + { -32, -39, -92, 51, -6, -16, 36, -8, 3, 22, 18, -15, 4, 1, -5, 2 }, + { 8, -9, 33, -8, -16, -102, 36, 23, -4, 38, -27, -5, 5, 16, -8, -6 }, + { -25, 5, 16, -3, -38, 14, 11, -3, -97, 7, 26, 1, 55, -10, -19, 3 }, + { 8, 9, 16, 1, 37, 36, 94, -38, -7, 3, -47, 11, -6, -13, -17, 10 }, + { 2, 34, -5, 1, -7, 24, -25, -3, 8, 99, -28, -29, 6, -43, 21, 11 }, + { -16, -27, -39, -109, 6, 10, 16, 24, 3, 19, 10, 24, -4, -7, -2, -3 }, + { -9, -10, -34, 4, -9, -5, -29, 5, -33, -26, -96, 33, 14, 4, 39, -14 }, + { -13, 1, 4, -9, -30, -17, -3, -64, -35, 11, 17, 19, -86, 6, 36, 14 }, + { 8, -7, -5, -15, 7, -30, -28, -87, 31, 4, 4, 33, 61, -5, -17, 22 }, + { -2, 13, -6, -4, -2, 28, -13, -14, -3, 37, -15, -3, -2, 107, -36, -24 }, + { 4, 9, 11, 31, 4, 9, 16, 19, 12, 33, 32, 94, 12, 0, 34, -45 }, + { 2, -2, 8, -16, 8, 5, 28, -17, 6, -7, 18, -45, 40, 36, 97, -8 }, + { 0, -2, 0, -10, -1, -7, -3, -35, -1, -7, -2, -32, -6, -33, -16, -112 }, + }, + { + { 119, -30, -22, -3, -23, -2, 3, 2, -16, 3, 6, 0, -3, 2, 1, 0 }, + { -27, -101, 31, 17, -47, 2, 22, 3, 19, 30, -7, -9, 5, 3, -5, -1 }, + { 0, 58, 22, -15, -102, 2, 38, 2, 10, -13, -5, 4, 14, -1, -9, 0 }, + { 23, 4, 66, -11, 22, 89, -2, -26, 13, -8, -38, -1, -9, -20, -2, 8 }, + { -19, -5, -89, 2, -26, 76, -11, -17, 20, 13, 18, -4, 1, -15, 3, 5 }, + { -10, -1, -1, 6, 23, 25, 87, -7, -74, 4, 39, -5, 0, -1, -20, -1 }, + { -17, -28, 12, -8, -32, 14, -53, -6, -68, -67, 17, 29, 2, 6, 25, 4 }, + { 1, -24, -23, 1, 17, -7, 52, 9, 50, -92, -15, 27, -15, -10, -6, 3 }, + { -6, -17, -2, -111, 7, -17, 8, -42, 9, 18, 16, 25, -4, 2, -1, 11 }, + { 9, 5, 35, 0, 6, 21, -9, 34, 44, -3, 102, 11, -7, 13, 11, -20 }, + { 4, -5, -5, -10, 15, 19, -2, 6, 6, -12, -13, 6, 95, 69, -29, -24 }, + { -6, -4, -9, -39, 1, 22, 0, 102, -19, 19, -32, 30, -16, -14, -8, -23 }, + { 4, -4, 7, 8, 4, -13, -18, 5, 0, 0, 21, 22, 58, -88, -54, 28 }, + { -4, -7, 0, -24, -7, 0, -25, 3, -3, -30, 8, -76, -34, 4, -80, -26 }, + { 0, 6, 0, 30, -6, 1, -13, -23, 1, 20, -2, 80, -44, 37, -68, 1 }, + { 0, 0, -1, 5, -1, -7, 1, -34, -2, 3, -6, 19, 5, -38, 11, -115 }, + } + }, + { //1 + { + { -111, 39, 4, 3, 44, 11, -12, -1, 7, -16, -5, 2, 3, -1, 4, 2 }, + { -47, -27, 15, -1, -92, 43, 20, -2, 20, 39, -16, -5, 10, -5, -13, 2 }, + { -35, -23, 4, 4, -17, -72, 32, 6, -59, 18, 50, -6, 0, 40, 0, -13 }, + { 13, 93, -27, -4, -48, 13, -34, 4, -52, 11, 1, 10, 3, 16, -3, 1 }, + { -11, -27, 1, 2, -47, -4, -36, 10, -2, -85, 14, 29, -20, -2, 57, 4 }, + { 0, -35, 32, -2, 26, 60, -3, -17, -82, 1, -30, 0, -37, 21, 3, 12 }, + { -17, -46, -92, 14, 7, -10, -39, 29, -17, 27, -28, 17, 1, -15, -13, 17 }, + { 4, -10, -23, 4, 16, 58, -17, 26, 30, 21, 67, 2, -13, 59, 13, -40 }, + { 5, -20, 32, -5, 8, -3, -46, -7, -4, 2, -15, 24, 100, 44, 0, 5 }, + { -4, -1, 38, -18, -7, -42, -63, -6, 33, 34, -23, 15, -65, 33, -20, 2 }, + { -2, -10, 35, -19, 5, 8, -44, 14, -25, 25, 58, 17, 7, -84, -16, -18 }, + { 5, 13, 18, 34, 11, -4, 18, 18, 5, 58, -3, 42, -2, -10, 85, 38 }, + { -5, -7, -34, -83, 2, -1, -4, -73, 4, 20, 15, -12, 4, -3, 44, 12 }, + { 0, 4, -2, -60, 5, 9, 42, 34, 5, -14, 9, 80, -5, 13, -38, 37 }, + { -1, 2, 7, -57, 3, -7, 9, 68, -9, 6, -49, -20, 6, -4, 36, -64 }, + { -1, 0, -12, 23, 1, -4, 17, -53, -3, 4, -21, 72, -4, -8, -3, -83 }, + }, + { + { 88, -55, 6, -3, -66, 27, 9, -2, 11, 11, -13, 1, -2, -7, 1, 2 }, + { -58, -20, 27, -2, -27, 75, -29, 0, 47, -42, -11, 11, -9, -3, 19, -4 }, + { -51, 23, -22, 5, -63, 3, 37, -5, 1, 64, -35, -4, 29, -31, -11, 13 }, + { -27, -76, 49, -2, 40, 14, 9, -17, -56, 36, -25, 6, 14, 3, -6, 8 }, + { 19, -4, -36, 22, 52, 7, 36, -23, 28, -17, -64, 15, -5, -44, 48, 9 }, + { 29, 50, 13, -10, 1, 34, -59, 1, -51, 4, -16, 30, 52, -33, 24, -5 }, + { -12, -21, -74, 43, -13, 39, 18, -5, -58, -35, 27, -5, 19, 26, 6, -5 }, + { 19, 38, -10, -5, 28, 66, 0, -5, -4, 19, -30, -26, -40, 28, -60, 37 }, + { -6, 27, 18, -5, -37, -18, 12, -25, -44, -10, -38, 37, -66, 45, 40, -7 }, + { -13, -28, -45, -39, 0, -5, -39, 69, -23, 16, -12, -18, -50, -31, 24, 13 }, + { -1, 8, 24, -51, -15, -9, 44, 10, -28, -70, -12, -39, 24, -18, -4, 51 }, + { -8, -22, -17, 33, -18, -45, -57, -27, 0, -31, -30, 29, -2, -13, -53, 49 }, + { 1, 12, 32, 51, -8, 8, -2, -31, -22, 4, 46, -39, -49, -67, 14, 17 }, + { 4, 5, 24, 60, -5, -14, -23, 38, 9, 8, -34, -59, 24, 47, 42, 28 }, + { -1, -5, -20, -34, 4, 4, -15, -46, 18, 31, 42, 10, 10, 27, 49, 78 }, + { -3, -7, -22, -34, -5, -11, -36, -69, -1, -3, -25, -73, 5, 4, 4, -49 }, + } + }, + { //2 + { + { -112, 47, -2, 2, -34, 13, 2, 0, 15, -7, 1, 0, 8, -3, -1, 0 }, + { 29, -7, 1, -1, -108, 40, 2, 0, -45, 13, 4, -1, 8, -5, 1, 0 }, + { -36, -87, 69, -10, -17, -33, 26, -2, 7, 14, -11, 2, 6, 8, -7, 0 }, + { 28, -5, 2, -2, -29, 13, -2, 0, 103, -36, -4, 1, 48, -16, -4, 1 }, + { -12, -24, 15, -3, 26, 80, -61, 9, 15, 54, -36, 2, 0, -4, 6, -2 }, + { 18, 53, 69, -74, 14, 24, 28, -30, -6, -7, -11, 12, -5, -7, -6, 8 }, + { 5, -1, 2, 0, -26, 6, 0, 1, 45, -9, -1, 0, -113, 28, 8, -1 }, + { -13, -32, 18, -2, 15, 34, -27, 7, -25, -80, 47, -1, -16, -50, 28, 2 }, + { -4, -13, -10, 19, 18, 46, 60, -48, 16, 33, 60, -48, 1, 0, 5, -2 }, + { 15, 33, 63, 89, 8, 15, 25, 40, -4, -8, -15, -8, -2, -6, -9, -7 }, + { -8, -24, -27, 15, 12, 41, 26, -29, -17, -50, -39, 27, 0, 35, -67, 26 }, + { -2, -6, -24, 13, -1, -8, 37, -22, 3, 18, -51, 22, -23, -95, 17, 17 }, + { -3, -7, -16, -21, 10, 24, 46, 75, 8, 20, 38, 72, 1, 2, 1, 7 }, + { 2, 6, 10, -3, -5, -16, -31, 12, 7, 24, 41, -16, -16, -41, -89, 49 }, + { 4, 8, 21, 40, -4, -11, -28, -57, 5, 14, 31, 70, 7, 18, 32, 52 }, + { 0, 1, 4, 11, -2, -4, -13, -34, 3, 7, 20, 47, -6, -19, -42, -101 }, + }, + { + { -99, 39, -1, 2, 65, -20, -5, 0, -15, -2, 5, -1, 0, 3, -1, 0 }, + { 58, 42, -33, 3, 33, -63, 23, -1, -55, 32, 3, -5, 21, -2, -8, 3 }, + { -15, 71, -44, 5, -58, -29, 25, 3, 62, -7, -4, -4, -19, 4, 0, 1 }, + { 46, 5, 4, -6, 71, -12, -15, 5, 52, -38, 13, -2, -63, 23, 3, -3 }, + { -14, -54, -29, 29, 25, -9, 61, -29, 27, 44, -48, 5, -27, -21, 12, 7 }, + { -3, 3, 69, -42, -11, -50, -26, 26, 24, 63, -19, -5, -18, -22, 12, 0 }, + { 17, 16, -2, 1, 38, 18, -12, 0, 62, 1, -14, 5, 89, -42, 8, -2 }, + { 15, 54, -8, 6, 6, 60, -26, -8, -30, 17, -38, 22, -43, -45, 42, -7 }, + { -6, -17, -55, -28, 9, 30, -8, 58, 4, 34, 41, -52, -16, -36, -20, 16 }, + { -2, -1, -9, -79, 7, 11, 48, 44, -13, -34, -55, 6, 12, 23, 20, -11 }, + { 7, 29, 14, -6, 12, 53, 10, -11, 14, 59, -15, -3, 5, 71, -54, 13 }, + { -5, -24, -53, 15, -3, -15, -61, 26, 6, 30, -16, 23, 13, 56, 44, -35 }, + { 4, 8, 21, 52, -1, -1, -5, 29, -7, -17, -44, -84, 8, 20, 31, 39 }, + { -2, -11, -25, -4, -4, -21, -53, 2, -5, -26, -64, 19, -8, -19, -73, 39 }, + { -3, -5, -23, -57, -2, -4, -24, -75, 1, 3, 9, -25, 6, 15, 41, 61 }, + { 1, 1, 7, 18, 1, 2, 16, 47, 2, 5, 24, 67, 3, 9, 25, 88 }, + } + }, + { //3 + { + { -114, 37, 3, 2, -22, -23, 14, 0, 21, -17, -5, 2, 5, 2, -4, -1 }, + { -19, -41, 19, -2, 85, -60, -11, 7, 17, 31, -34, 2, -11, 19, 2, -8 }, + { 36, -25, 18, -2, -42, -53, 35, 5, 46, -60, -25, 19, 8, 21, -33, -1 }, + { -27, -80, 44, -3, -58, 1, -29, 19, -41, 18, -12, -7, 12, -17, 7, -6 }, + { -11, -21, 37, -10, 44, -4, 47, -12, -37, -41, 58, 18, 10, -46, -16, 31 }, + { 15, 47, 10, -6, -16, -44, 42, 10, -80, 25, -40, 21, -23, -2, 3, -14 }, + { 13, 25, 79, -39, -13, 10, 31, -4, 49, 45, 12, -8, 3, -1, 43, 7 }, + { 16, 11, -26, 13, -13, -74, -20, -1, 5, -6, 29, -47, 26, -49, 54, 2 }, + { -8, -34, -26, 7, -26, -19, 29, -37, 1, 22, 46, -9, -81, 37, 14, 20 }, + { -6, -30, -42, -12, -3, 5, 57, -52, -2, 37, -12, 6, 74, 10, 6, -15 }, + { 5, 9, -6, 42, -15, -18, -9, 26, 15, 58, 14, 43, 23, -10, -37, 75 }, + { -5, -23, -23, 36, 3, 22, 36, 40, 27, -4, -16, 56, -25, -46, 56, -24 }, + { 1, 3, 23, 73, 8, 5, 34, 46, -12, 2, 35, -38, 26, 52, 2, -31 }, + { -3, -2, -21, -52, 1, -10, -17, 44, -19, -20, 30, 45, 27, 61, 49, 21 }, + { -2, -7, -33, -56, -4, -6, 21, 63, 15, 31, 32, -22, -10, -26, -52, -38 }, + { -5, -12, -18, -12, 8, 22, 38, 36, -5, -15, -51, -63, -5, 0, 15, 73 }, + }, + { + { -102, 22, 7, 2, 66, -25, -6, -1, -15, 14, 1, -1, 2, -2, 1, 0 }, + { 12, 93, -27, -6, -27, -64, 36, 6, 13, 5, -23, 0, -2, 6, 5, -3 }, + { -59, -24, 17, 1, -62, -2, -3, 2, 83, -12, -17, -2, -24, 14, 7, -2 }, + { -33, 23, -36, 11, -21, 50, 35, -16, -23, -78, 16, 19, 22, 15, -30, -5 }, + { 0, -38, -81, 30, 27, 5, 51, -32, 24, 36, -16, 12, -24, -8, 9, 1 }, + { 28, 38, 8, -9, 62, 32, -13, 2, 51, -32, 15, 5, -66, 28, 0, -1 }, + { 11, -35, 21, -17, 30, -18, 31, 18, -11, -36, -80, 12, 16, 49, 13, -32 }, + { -13, 23, 22, -36, -12, 64, 39, 25, -19, 23, -36, 9, -30, -58, 33, -7 }, + { -9, -20, -55, -83, 3, -2, 1, 62, 8, 2, 27, -28, 7, 15, -11, 5 }, + { -6, 24, -38, 23, -8, 40, -49, 0, -7, 9, -25, -44, 23, 39, 70, -3 }, + { 12, 17, 17, 0, 32, 27, 21, 2, 67, 11, -6, -10, 89, -22, -12, 16 }, + { 2, -9, 8, 45, 7, -8, 27, 35, -9, -31, -17, -87, -23, -22, -19, 44 }, + { -1, -9, 28, -24, -1, -10, 49, -30, -8, -7, 40, 1, 4, 33, 65, 67 }, + { 5, -12, -24, -17, 13, -34, -32, -16, 14, -67, -7, 9, 7, -74, 49, 1 }, + { 2, -6, 11, 45, 3, -10, 33, 55, 8, -5, 59, 4, 7, -4, 44, -66 }, + { -1, 1, -14, 36, -1, 2, -20, 69, 0, 0, -15, 72, 3, 4, 5, 65 }, + } + } +}; + +//-------------------------------------------------------------------------------------------------- diff --git a/source/Lib/CommonLib/RomTr.cpp b/source/Lib/CommonLib/RomTr.cpp index f7fcc1ffed168a922f71213e18aa076deb894d03..a60611a633be90961c6282a6fe04acfd24741ea6 100644 --- a/source/Lib/CommonLib/RomTr.cpp +++ b/source/Lib/CommonLib/RomTr.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/source/Lib/CommonLib/SEI.cpp b/source/Lib/CommonLib/SEI.cpp index 10fc50e50ac168e4c2a58d471bb201fd1eaa563c..d061c51fdb6759d12dffb6a53421b2097421718d 100644 --- a/source/Lib/CommonLib/SEI.cpp +++ b/source/Lib/CommonLib/SEI.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -96,35 +96,51 @@ void deleteSEIs (SEIMessages &seiList) seiList.clear(); } -void SEIBufferingPeriod::copyTo (SEIBufferingPeriod& target) +void SEIBufferingPeriod::copyTo (SEIBufferingPeriod& target) const { - target.m_bpSeqParameterSetId = m_bpSeqParameterSetId; - target.m_rapCpbParamsPresentFlag = m_rapCpbParamsPresentFlag; - target.m_cpbDelayOffset = m_cpbDelayOffset; - target.m_dpbDelayOffset = m_dpbDelayOffset; + target.m_bpNalCpbParamsPresentFlag = m_bpNalCpbParamsPresentFlag; + target.m_bpVclCpbParamsPresentFlag = m_bpVclCpbParamsPresentFlag; + target.m_initialCpbRemovalDelayLength = m_initialCpbRemovalDelayLength; + target.m_cpbRemovalDelayLength = m_cpbRemovalDelayLength; + target.m_dpbOutputDelayLength = m_dpbOutputDelayLength; + target.m_duCpbRemovalDelayIncrementLength = m_duCpbRemovalDelayIncrementLength; + target.m_dpbOutputDelayDuLength = m_dpbOutputDelayDuLength; target.m_concatenationFlag = m_concatenationFlag; target.m_auCpbRemovalDelayDelta = m_auCpbRemovalDelayDelta; + target.m_cpbRemovalDelayDeltasPresentFlag = m_cpbRemovalDelayDeltasPresentFlag; + target.m_numCpbRemovalDelayDeltas = m_numCpbRemovalDelayDeltas; + target.m_bpMaxSubLayers = m_bpMaxSubLayers; ::memcpy(target.m_initialCpbRemovalDelay, m_initialCpbRemovalDelay, sizeof(m_initialCpbRemovalDelay)); - ::memcpy(target.m_initialCpbRemovalDelayOffset, m_initialCpbRemovalDelayOffset, sizeof(m_initialCpbRemovalDelayOffset)); - ::memcpy(target.m_initialAltCpbRemovalDelay, m_initialAltCpbRemovalDelay, sizeof(m_initialAltCpbRemovalDelay)); - ::memcpy(target.m_initialAltCpbRemovalDelayOffset, m_initialAltCpbRemovalDelayOffset, sizeof(m_initialAltCpbRemovalDelayOffset)); + ::memcpy(target.m_initialCpbRemovalOffset, m_initialCpbRemovalOffset, sizeof(m_initialCpbRemovalOffset)); + ::memcpy(target.m_cpbRemovalDelayDelta, m_cpbRemovalDelayDelta, sizeof(m_cpbRemovalDelayDelta)); + target.m_bpCpbCnt = m_bpCpbCnt; + target.m_bpDecodingUnitHrdParamsPresentFlag = m_bpDecodingUnitHrdParamsPresentFlag; + target.m_decodingUnitCpbParamsInPicTimingSeiFlag = m_decodingUnitCpbParamsInPicTimingSeiFlag; + target.m_sublayerInitialCpbRemovalDelayPresentFlag = m_sublayerInitialCpbRemovalDelayPresentFlag; + target.m_concatenationFlag = m_concatenationFlag; + target.m_maxInitialRemovalDelayForConcatenation = m_maxInitialRemovalDelayForConcatenation; + target.m_altCpbParamsPresentFlag = m_altCpbParamsPresentFlag; } -void SEIPictureTiming::copyTo (SEIPictureTiming& target) +void SEIPictureTiming::copyTo (SEIPictureTiming& target) const { - target.m_picStruct = m_picStruct; - target.m_sourceScanType = m_sourceScanType; - target.m_duplicateFlag = m_duplicateFlag; - - target.m_auCpbRemovalDelay = m_auCpbRemovalDelay; + ::memcpy(target.m_auCpbRemovalDelay, m_auCpbRemovalDelay, sizeof(m_auCpbRemovalDelay)); + ::memcpy(target.m_ptSubLayerDelaysPresentFlag, m_ptSubLayerDelaysPresentFlag, sizeof(m_ptSubLayerDelaysPresentFlag)); + ::memcpy(target.m_duCommonCpbRemovalDelayMinus1, m_duCommonCpbRemovalDelayMinus1, sizeof(m_duCommonCpbRemovalDelayMinus1)); + ::memcpy(target.m_cpbRemovalDelayDeltaEnabledFlag, m_cpbRemovalDelayDeltaEnabledFlag, sizeof(m_cpbRemovalDelayDeltaEnabledFlag)); + ::memcpy(target.m_cpbRemovalDelayDeltaIdx, m_cpbRemovalDelayDeltaIdx, sizeof(m_cpbRemovalDelayDeltaIdx)); target.m_picDpbOutputDelay = m_picDpbOutputDelay; target.m_picDpbOutputDuDelay = m_picDpbOutputDuDelay; target.m_numDecodingUnitsMinus1 = m_numDecodingUnitsMinus1; target.m_duCommonCpbRemovalDelayFlag = m_duCommonCpbRemovalDelayFlag; - target.m_duCommonCpbRemovalDelayMinus1 = m_duCommonCpbRemovalDelayMinus1; target.m_numNalusInDuMinus1 = m_numNalusInDuMinus1; target.m_duCpbRemovalDelayMinus1 = m_duCpbRemovalDelayMinus1; + target.m_cpbAltTimingInfoPresentFlag = m_cpbAltTimingInfoPresentFlag; + target.m_cpbAltInitialCpbRemovalDelayDelta = m_cpbAltInitialCpbRemovalDelayDelta; + target.m_cpbAltInitialCpbRemovalOffsetDelta = m_cpbAltInitialCpbRemovalOffsetDelta; + target.m_cpbDelayOffset = m_cpbDelayOffset; + target.m_dpbDelayOffset = m_dpbDelayOffset; } // Static member @@ -134,41 +150,34 @@ const char *SEI::getSEIMessageString(SEI::PayloadType payloadType) { case SEI::BUFFERING_PERIOD: return "Buffering period"; case SEI::PICTURE_TIMING: return "Picture timing"; +#if HEVC_SEI case SEI::PAN_SCAN_RECT: return "Pan-scan rectangle"; // not currently decoded +#endif case SEI::FILLER_PAYLOAD: return "Filler payload"; // not currently decoded case SEI::USER_DATA_REGISTERED_ITU_T_T35: return "User data registered"; // not currently decoded case SEI::USER_DATA_UNREGISTERED: return "User data unregistered"; - case SEI::RECOVERY_POINT: return "Recovery point"; - case SEI::SCENE_INFO: return "Scene information"; // not currently decoded - case SEI::FULL_FRAME_SNAPSHOT: return "Picture snapshot"; // not currently decoded - case SEI::PROGRESSIVE_REFINEMENT_SEGMENT_START: return "Progressive refinement segment start"; // not currently decoded - case SEI::PROGRESSIVE_REFINEMENT_SEGMENT_END: return "Progressive refinement segment end"; // not currently decoded case SEI::FILM_GRAIN_CHARACTERISTICS: return "Film grain characteristics"; // not currently decoded - case SEI::POST_FILTER_HINT: return "Post filter hint"; // not currently decoded - case SEI::TONE_MAPPING_INFO: return "Tone mapping information"; - case SEI::KNEE_FUNCTION_INFO: return "Knee function information"; case SEI::FRAME_PACKING: return "Frame packing arrangement"; - case SEI::DISPLAY_ORIENTATION: return "Display orientation"; - case SEI::GREEN_METADATA: return "Green metadata information"; - case SEI::SOP_DESCRIPTION: return "Structure of pictures information"; - case SEI::ACTIVE_PARAMETER_SETS: return "Active parameter sets"; case SEI::DECODING_UNIT_INFO: return "Decoding unit information"; +#if HEVC_SEI case SEI::TEMPORAL_LEVEL0_INDEX: return "Temporal sub-layer zero index"; +#endif case SEI::DECODED_PICTURE_HASH: return "Decoded picture hash"; - case SEI::SCALABLE_NESTING: return "Scalable nesting"; - case SEI::REGION_REFRESH_INFO: return "Region refresh information"; - case SEI::NO_DISPLAY: return "No display"; - case SEI::TIME_CODE: return "Time code"; + case SEI::DEPENDENT_RAP_INDICATION: return "Dependent RAP indication"; case SEI::MASTERING_DISPLAY_COLOUR_VOLUME: return "Mastering display colour volume"; - case SEI::SEGM_RECT_FRAME_PACKING: return "Segmented rectangular frame packing arrangement"; -#if HEVC_TILES_WPP - case SEI::TEMP_MOTION_CONSTRAINED_TILE_SETS: return "Temporal motion constrained tile sets"; -#endif - case SEI::CHROMA_RESAMPLING_FILTER_HINT: return "Chroma sampling filter hint"; - case SEI::COLOUR_REMAPPING_INFO: return "Colour remapping info"; #if U0033_ALTERNATIVE_TRANSFER_CHARACTERISTICS_SEI case SEI::ALTERNATIVE_TRANSFER_CHARACTERISTICS: return "Alternative transfer characteristics"; #endif + case SEI::CONTENT_LIGHT_LEVEL_INFO: return "Content light level information"; + case SEI::AMBIENT_VIEWING_ENVIRONMENT: return "Ambient viewing environment"; + case SEI::CONTENT_COLOUR_VOLUME: return "Content colour volume"; + case SEI::EQUIRECTANGULAR_PROJECTION: return "Equirectangular projection"; + case SEI::SPHERE_ROTATION: return "Sphere rotation"; + case SEI::REGION_WISE_PACKING: return "Region wise packing information"; + case SEI::OMNI_VIEWPORT: return "Omni viewport"; + case SEI::GENERALIZED_CUBEMAP_PROJECTION: return "Generalized cubemap projection"; + case SEI::SAMPLE_ASPECT_RATIO_INFO: return "Sample aspect ratio information"; + case SEI::SUBPICTURE_LEVEL_INFO: return "Subpicture level information"; default: return "Unknown"; } } diff --git a/source/Lib/CommonLib/SEI.h b/source/Lib/CommonLib/SEI.h index c6ea74f82eb4bc67b37642992002c3c2fb6cecf4..a214052564b2c582c38f8d2fd461bce7b134ee78 100644 --- a/source/Lib/CommonLib/SEI.h +++ b/source/Lib/CommonLib/SEI.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -56,41 +56,33 @@ public: { BUFFERING_PERIOD = 0, PICTURE_TIMING = 1, +#if HEVC_SEI PAN_SCAN_RECT = 2, +#endif FILLER_PAYLOAD = 3, USER_DATA_REGISTERED_ITU_T_T35 = 4, USER_DATA_UNREGISTERED = 5, - RECOVERY_POINT = 6, - SCENE_INFO = 9, - FULL_FRAME_SNAPSHOT = 15, - PROGRESSIVE_REFINEMENT_SEGMENT_START = 16, - PROGRESSIVE_REFINEMENT_SEGMENT_END = 17, FILM_GRAIN_CHARACTERISTICS = 19, - POST_FILTER_HINT = 22, - TONE_MAPPING_INFO = 23, FRAME_PACKING = 45, - DISPLAY_ORIENTATION = 47, - GREEN_METADATA = 56, - SOP_DESCRIPTION = 128, - ACTIVE_PARAMETER_SETS = 129, DECODING_UNIT_INFO = 130, +#if HEVC_SEI TEMPORAL_LEVEL0_INDEX = 131, +#endif DECODED_PICTURE_HASH = 132, - SCALABLE_NESTING = 133, - REGION_REFRESH_INFO = 134, - NO_DISPLAY = 135, - TIME_CODE = 136, MASTERING_DISPLAY_COLOUR_VOLUME = 137, - SEGM_RECT_FRAME_PACKING = 138, -#if HEVC_TILES_WPP - TEMP_MOTION_CONSTRAINED_TILE_SETS = 139, -#endif - CHROMA_RESAMPLING_FILTER_HINT = 140, - KNEE_FUNCTION_INFO = 141, - COLOUR_REMAPPING_INFO = 142, -#if U0033_ALTERNATIVE_TRANSFER_CHARACTERISTICS_SEI - ALTERNATIVE_TRANSFER_CHARACTERISTICS = 182, -#endif + DEPENDENT_RAP_INDICATION = 145, + EQUIRECTANGULAR_PROJECTION = 150, + SPHERE_ROTATION = 154, + REGION_WISE_PACKING = 155, + OMNI_VIEWPORT = 156, + GENERALIZED_CUBEMAP_PROJECTION = 153, + FRAME_FIELD_INFO = 168, + SUBPICTURE_LEVEL_INFO = 203, + SAMPLE_ASPECT_RATIO_INFO = 204, + CONTENT_LIGHT_LEVEL_INFO = 144, + ALTERNATIVE_TRANSFER_CHARACTERISTICS = 147, + AMBIENT_VIEWING_ENVIRONMENT = 148, + CONTENT_COLOUR_VOLUME = 149, }; SEI() {} @@ -101,6 +93,130 @@ public: virtual PayloadType payloadType() const = 0; }; + +class SEIEquirectangularProjection : public SEI +{ +public: + PayloadType payloadType() const { return EQUIRECTANGULAR_PROJECTION; } + + SEIEquirectangularProjection() {} + virtual ~SEIEquirectangularProjection() {} + + bool m_erpCancelFlag; + bool m_erpPersistenceFlag; + bool m_erpGuardBandFlag; + uint8_t m_erpGuardBandType; + uint8_t m_erpLeftGuardBandWidth; + uint8_t m_erpRightGuardBandWidth; +}; + +class SEISphereRotation : public SEI +{ +public: + PayloadType payloadType() const { return SPHERE_ROTATION; } + + SEISphereRotation() {} + virtual ~SEISphereRotation() {} + + bool m_sphereRotationCancelFlag; + bool m_sphereRotationPersistenceFlag; + int m_sphereRotationYaw; + int m_sphereRotationPitch; + int m_sphereRotationRoll; +}; + +class SEIOmniViewport : public SEI +{ +public: + PayloadType payloadType() const { return OMNI_VIEWPORT; } + + SEIOmniViewport() {} + virtual ~SEIOmniViewport() {} + + struct OmniViewport + { + int azimuthCentre; + int elevationCentre; + int tiltCentre; + uint32_t horRange; + uint32_t verRange; + }; + + uint32_t m_omniViewportId; + bool m_omniViewportCancelFlag; + bool m_omniViewportPersistenceFlag; + uint8_t m_omniViewportCntMinus1; + std::vector<OmniViewport> m_omniViewportRegions; +}; + +class SEIRegionWisePacking : public SEI +{ +public: + PayloadType payloadType() const { return REGION_WISE_PACKING; } + SEIRegionWisePacking() {} + virtual ~SEIRegionWisePacking() {} + bool m_rwpCancelFlag; + bool m_rwpPersistenceFlag; + bool m_constituentPictureMatchingFlag; + int m_numPackedRegions; + int m_projPictureWidth; + int m_projPictureHeight; + int m_packedPictureWidth; + int m_packedPictureHeight; + std::vector<uint8_t> m_rwpTransformType; + std::vector<bool> m_rwpGuardBandFlag; + std::vector<uint32_t> m_projRegionWidth; + std::vector<uint32_t> m_projRegionHeight; + std::vector<uint32_t> m_rwpProjRegionTop; + std::vector<uint32_t> m_projRegionLeft; + std::vector<uint16_t> m_packedRegionWidth; + std::vector<uint16_t> m_packedRegionHeight; + std::vector<uint16_t> m_packedRegionTop; + std::vector<uint16_t> m_packedRegionLeft; + std::vector<uint8_t> m_rwpLeftGuardBandWidth; + std::vector<uint8_t> m_rwpRightGuardBandWidth; + std::vector<uint8_t> m_rwpTopGuardBandHeight; + std::vector<uint8_t> m_rwpBottomGuardBandHeight; + std::vector<bool> m_rwpGuardBandNotUsedForPredFlag; + std::vector<uint8_t> m_rwpGuardBandType; +}; + +class SEIGeneralizedCubemapProjection : public SEI +{ +public: + PayloadType payloadType() const { return GENERALIZED_CUBEMAP_PROJECTION; } + + SEIGeneralizedCubemapProjection() {} + virtual ~SEIGeneralizedCubemapProjection() {} + + bool m_gcmpCancelFlag; + bool m_gcmpPersistenceFlag; + uint8_t m_gcmpPackingType; + uint8_t m_gcmpMappingFunctionType; + std::vector<uint8_t> m_gcmpFaceIndex; + std::vector<uint8_t> m_gcmpFaceRotation; + std::vector<uint8_t> m_gcmpFunctionCoeffU; + std::vector<bool> m_gcmpFunctionUAffectedByVFlag; + std::vector<uint8_t> m_gcmpFunctionCoeffV; + std::vector<bool> m_gcmpFunctionVAffectedByUFlag; + bool m_gcmpGuardBandFlag; + bool m_gcmpGuardBandBoundaryType; + uint8_t m_gcmpGuardBandSamplesMinus1; +}; + +class SEISampleAspectRatioInfo : public SEI +{ +public: + PayloadType payloadType() const { return SAMPLE_ASPECT_RATIO_INFO; } + SEISampleAspectRatioInfo() {} + virtual ~SEISampleAspectRatioInfo() {} + bool m_sariCancelFlag; + bool m_sariPersistenceFlag; + int m_sariAspectRatioIdc; + int m_sariSarWidth; + int m_sariSarHeight; +}; + static const uint32_t ISO_IEC_11578_LEN=16; class SEIuserDataUnregistered : public SEI @@ -135,90 +251,138 @@ public: PictureHash m_pictureHash; }; +class SEIDependentRAPIndication : public SEI +{ +public: + PayloadType payloadType() const { return DEPENDENT_RAP_INDICATION; } + SEIDependentRAPIndication() { } + + virtual ~SEIDependentRAPIndication() { } +}; + +#if HEVC_SEI class SEIActiveParameterSets : public SEI { public: PayloadType payloadType() const { return ACTIVE_PARAMETER_SETS; } SEIActiveParameterSets() -#if HEVC_VPS - : activeVPSId (0) - , m_selfContainedCvsFlag (false) -#else : m_selfContainedCvsFlag(false) -#endif , m_noParameterSetUpdateFlag (false) , numSpsIdsMinus1 (0) {} virtual ~SEIActiveParameterSets() {} -#if HEVC_VPS - int activeVPSId; -#endif bool m_selfContainedCvsFlag; bool m_noParameterSetUpdateFlag; int numSpsIdsMinus1; std::vector<int> activeSeqParameterSetId; }; +#endif class SEIBufferingPeriod : public SEI { public: PayloadType payloadType() const { return BUFFERING_PERIOD; } - void copyTo (SEIBufferingPeriod& target); + void copyTo (SEIBufferingPeriod& target) const; SEIBufferingPeriod() - : m_bpSeqParameterSetId (0) - , m_rapCpbParamsPresentFlag (false) - , m_cpbDelayOffset (0) - , m_dpbDelayOffset (0) + : m_bpNalCpbParamsPresentFlag (false) + , m_bpVclCpbParamsPresentFlag (false) + , m_initialCpbRemovalDelayLength (0) + , m_cpbRemovalDelayLength (0) + , m_dpbOutputDelayLength (0) + , m_bpCpbCnt(0) + , m_duCpbRemovalDelayIncrementLength (0) + , m_dpbOutputDelayDuLength (0) + , m_cpbRemovalDelayDeltasPresentFlag (false) + , m_numCpbRemovalDelayDeltas (0) + , m_bpMaxSubLayers (0) + , m_bpDecodingUnitHrdParamsPresentFlag (false) + , m_decodingUnitCpbParamsInPicTimingSeiFlag (false) + , m_sublayerInitialCpbRemovalDelayPresentFlag(false) + , m_additionalConcatenationInfoPresentFlag (false) + , m_maxInitialRemovalDelayForConcatenation (0) + , m_altCpbParamsPresentFlag (false) + , m_useAltCpbParamsFlag (false) { ::memset(m_initialCpbRemovalDelay, 0, sizeof(m_initialCpbRemovalDelay)); - ::memset(m_initialCpbRemovalDelayOffset, 0, sizeof(m_initialCpbRemovalDelayOffset)); - ::memset(m_initialAltCpbRemovalDelay, 0, sizeof(m_initialAltCpbRemovalDelay)); - ::memset(m_initialAltCpbRemovalDelayOffset, 0, sizeof(m_initialAltCpbRemovalDelayOffset)); + ::memset(m_initialCpbRemovalOffset, 0, sizeof(m_initialCpbRemovalOffset)); + ::memset(m_cpbRemovalDelayDelta, 0, sizeof(m_cpbRemovalDelayDelta)); } virtual ~SEIBufferingPeriod() {} - uint32_t m_bpSeqParameterSetId; - bool m_rapCpbParamsPresentFlag; - uint32_t m_cpbDelayOffset; - uint32_t m_dpbDelayOffset; - uint32_t m_initialCpbRemovalDelay [MAX_CPB_CNT][2]; - uint32_t m_initialCpbRemovalDelayOffset [MAX_CPB_CNT][2]; - uint32_t m_initialAltCpbRemovalDelay [MAX_CPB_CNT][2]; - uint32_t m_initialAltCpbRemovalDelayOffset[MAX_CPB_CNT][2]; + void setDuCpbRemovalDelayIncrementLength( uint32_t value ) { m_duCpbRemovalDelayIncrementLength = value; } + uint32_t getDuCpbRemovalDelayIncrementLength( ) const { return m_duCpbRemovalDelayIncrementLength; } + void setDpbOutputDelayDuLength( uint32_t value ) { m_dpbOutputDelayDuLength = value; } + uint32_t getDpbOutputDelayDuLength( ) const { return m_dpbOutputDelayDuLength; } + bool m_bpNalCpbParamsPresentFlag; + bool m_bpVclCpbParamsPresentFlag; + uint32_t m_initialCpbRemovalDelayLength; + uint32_t m_cpbRemovalDelayLength; + uint32_t m_dpbOutputDelayLength; + int m_bpCpbCnt; + uint32_t m_duCpbRemovalDelayIncrementLength; + uint32_t m_dpbOutputDelayDuLength; + uint32_t m_initialCpbRemovalDelay [MAX_TLAYER][MAX_CPB_CNT][2]; + uint32_t m_initialCpbRemovalOffset [MAX_TLAYER][MAX_CPB_CNT][2]; bool m_concatenationFlag; uint32_t m_auCpbRemovalDelayDelta; + bool m_cpbRemovalDelayDeltasPresentFlag; + int m_numCpbRemovalDelayDeltas; + int m_bpMaxSubLayers; + uint32_t m_cpbRemovalDelayDelta [15]; + bool m_bpDecodingUnitHrdParamsPresentFlag; + bool m_decodingUnitCpbParamsInPicTimingSeiFlag; + bool m_sublayerInitialCpbRemovalDelayPresentFlag; + bool m_additionalConcatenationInfoPresentFlag; + uint32_t m_maxInitialRemovalDelayForConcatenation; + bool m_altCpbParamsPresentFlag; + bool m_useAltCpbParamsFlag; }; + class SEIPictureTiming : public SEI { public: PayloadType payloadType() const { return PICTURE_TIMING; } - void copyTo (SEIPictureTiming& target); + void copyTo (SEIPictureTiming& target) const; SEIPictureTiming() - : m_picStruct (0) - , m_sourceScanType (0) - , m_duplicateFlag (false) - , m_picDpbOutputDuDelay (0) - {} + : m_picDpbOutputDelay (0) + , m_picDpbOutputDuDelay (0) + , m_numDecodingUnitsMinus1 (0) + , m_duCommonCpbRemovalDelayFlag (false) + , m_cpbAltTimingInfoPresentFlag (false) + , m_cpbDelayOffset (0) + , m_dpbDelayOffset (0) + { + ::memset(m_ptSubLayerDelaysPresentFlag, 0, sizeof(m_ptSubLayerDelaysPresentFlag)); + ::memset(m_duCommonCpbRemovalDelayMinus1, 0, sizeof(m_duCommonCpbRemovalDelayMinus1)); + ::memset(m_cpbRemovalDelayDeltaEnabledFlag, 0, sizeof(m_cpbRemovalDelayDeltaEnabledFlag)); + ::memset(m_cpbRemovalDelayDeltaIdx, 0, sizeof(m_cpbRemovalDelayDeltaIdx)); + ::memset(m_auCpbRemovalDelay, 0, sizeof(m_auCpbRemovalDelay)); + } virtual ~SEIPictureTiming() { } - uint32_t m_picStruct; - uint32_t m_sourceScanType; - bool m_duplicateFlag; - uint32_t m_auCpbRemovalDelay; + bool m_ptSubLayerDelaysPresentFlag[MAX_TLAYER]; + bool m_cpbRemovalDelayDeltaEnabledFlag[MAX_TLAYER]; + uint32_t m_cpbRemovalDelayDeltaIdx[MAX_TLAYER]; + uint32_t m_auCpbRemovalDelay[MAX_TLAYER]; uint32_t m_picDpbOutputDelay; uint32_t m_picDpbOutputDuDelay; uint32_t m_numDecodingUnitsMinus1; bool m_duCommonCpbRemovalDelayFlag; - uint32_t m_duCommonCpbRemovalDelayMinus1; + uint32_t m_duCommonCpbRemovalDelayMinus1[MAX_TLAYER]; std::vector<uint32_t> m_numNalusInDuMinus1; std::vector<uint32_t> m_duCpbRemovalDelayMinus1; + bool m_cpbAltTimingInfoPresentFlag; + std::vector<uint32_t> m_cpbAltInitialCpbRemovalDelayDelta; + std::vector<uint32_t> m_cpbAltInitialCpbRemovalOffsetDelta; + uint32_t m_cpbDelayOffset; + uint32_t m_dpbDelayOffset; }; class SEIDecodingUnitInfo : public SEI @@ -228,30 +392,51 @@ public: SEIDecodingUnitInfo() : m_decodingUnitIdx(0) - , m_duSptCpbRemovalDelay(0) , m_dpbOutputDuDelayPresentFlag(false) , m_picSptDpbOutputDuDelay(0) - {} + { + ::memset(m_duiSubLayerDelaysPresentFlag, 0, sizeof(m_duiSubLayerDelaysPresentFlag)); + ::memset(m_duSptCpbRemovalDelayIncrement, 0, sizeof(m_duSptCpbRemovalDelayIncrement)); + } virtual ~SEIDecodingUnitInfo() {} int m_decodingUnitIdx; - int m_duSptCpbRemovalDelay; + bool m_duiSubLayerDelaysPresentFlag[MAX_TLAYER]; + int m_duSptCpbRemovalDelayIncrement[MAX_TLAYER]; bool m_dpbOutputDuDelayPresentFlag; int m_picSptDpbOutputDuDelay; }; -class SEIRecoveryPoint : public SEI + +class SEIFrameFieldInfo : public SEI { public: - PayloadType payloadType() const { return RECOVERY_POINT; } - - SEIRecoveryPoint() {} - virtual ~SEIRecoveryPoint() {} - - int m_recoveryPocCnt; - bool m_exactMatchingFlag; - bool m_brokenLinkFlag; + PayloadType payloadType() const { return FRAME_FIELD_INFO; } + + SEIFrameFieldInfo() + : m_fieldPicFlag(false) + , m_bottomFieldFlag (false) + , m_pairingIndicatedFlag (false) + , m_pairedWithNextFieldFlag(false) + , m_displayFieldsFromFrameFlag(false) + , m_topFieldFirstFlag(false) + , m_displayElementalPeriodsMinus1(0) + , m_sourceScanType(0) + , m_duplicateFlag(false) + {} + virtual ~SEIFrameFieldInfo() {} + + bool m_fieldPicFlag; + bool m_bottomFieldFlag; + bool m_pairingIndicatedFlag; + bool m_pairedWithNextFieldFlag; + bool m_displayFieldsFromFrameFlag; + bool m_topFieldFirstFlag; + int m_displayElementalPeriodsMinus1; + int m_sourceScanType; + bool m_duplicateFlag; }; + class SEIFramePacking : public SEI { public: @@ -280,210 +465,6 @@ public: bool m_upsampledAspectRatio; }; -class SEISegmentedRectFramePacking : public SEI -{ -public: - PayloadType payloadType() const { return SEGM_RECT_FRAME_PACKING; } - - SEISegmentedRectFramePacking() {} - virtual ~SEISegmentedRectFramePacking() {} - - bool m_arrangementCancelFlag; - int m_contentInterpretationType; - bool m_arrangementPersistenceFlag; -}; - -class SEIDisplayOrientation : public SEI -{ -public: - PayloadType payloadType() const { return DISPLAY_ORIENTATION; } - - SEIDisplayOrientation() - : cancelFlag(true) - , persistenceFlag(0) - , extensionFlag(false) - {} - virtual ~SEIDisplayOrientation() {} - - bool cancelFlag; - bool horFlip; - bool verFlip; - - uint32_t anticlockwiseRotation; - bool persistenceFlag; - bool extensionFlag; -}; - -class SEITemporalLevel0Index : public SEI -{ -public: - PayloadType payloadType() const { return TEMPORAL_LEVEL0_INDEX; } - - SEITemporalLevel0Index() - : tl0Idx(0) - , rapIdx(0) - {} - virtual ~SEITemporalLevel0Index() {} - - uint32_t tl0Idx; - uint32_t rapIdx; -}; - -class SEIGradualDecodingRefreshInfo : public SEI -{ -public: - PayloadType payloadType() const { return REGION_REFRESH_INFO; } - - SEIGradualDecodingRefreshInfo() - : m_gdrForegroundFlag(0) - {} - virtual ~SEIGradualDecodingRefreshInfo() {} - - bool m_gdrForegroundFlag; -}; - -class SEINoDisplay : public SEI -{ -public: - PayloadType payloadType() const { return NO_DISPLAY; } - - SEINoDisplay() - : m_noDisplay(false) - {} - virtual ~SEINoDisplay() {} - - bool m_noDisplay; -}; - -class SEISOPDescription : public SEI -{ -public: - PayloadType payloadType() const { return SOP_DESCRIPTION; } - - SEISOPDescription() {} - virtual ~SEISOPDescription() {} - - uint32_t m_sopSeqParameterSetId; - uint32_t m_numPicsInSopMinus1; - - uint32_t m_sopDescVclNaluType[MAX_NUM_PICS_IN_SOP]; - uint32_t m_sopDescTemporalId[MAX_NUM_PICS_IN_SOP]; - uint32_t m_sopDescStRpsIdx[MAX_NUM_PICS_IN_SOP]; - int m_sopDescPocDelta[MAX_NUM_PICS_IN_SOP]; -}; - -class SEIToneMappingInfo : public SEI -{ -public: - PayloadType payloadType() const { return TONE_MAPPING_INFO; } - SEIToneMappingInfo() {} - virtual ~SEIToneMappingInfo() {} - - int m_toneMapId; - bool m_toneMapCancelFlag; - bool m_toneMapPersistenceFlag; - int m_codedDataBitDepth; - int m_targetBitDepth; - int m_modelId; - int m_minValue; - int m_maxValue; - int m_sigmoidMidpoint; - int m_sigmoidWidth; - std::vector<int> m_startOfCodedInterval; - int m_numPivots; - std::vector<int> m_codedPivotValue; - std::vector<int> m_targetPivotValue; - int m_cameraIsoSpeedIdc; - int m_cameraIsoSpeedValue; - int m_exposureIndexIdc; - int m_exposureIndexValue; - bool m_exposureCompensationValueSignFlag; - int m_exposureCompensationValueNumerator; - int m_exposureCompensationValueDenomIdc; - int m_refScreenLuminanceWhite; - int m_extendedRangeWhiteLevel; - int m_nominalBlackLevelLumaCodeValue; - int m_nominalWhiteLevelLumaCodeValue; - int m_extendedWhiteLevelLumaCodeValue; -}; - -class SEIKneeFunctionInfo : public SEI -{ -public: - PayloadType payloadType() const { return KNEE_FUNCTION_INFO; } - SEIKneeFunctionInfo() {} - virtual ~SEIKneeFunctionInfo() {} - - int m_kneeId; - bool m_kneeCancelFlag; - bool m_kneePersistenceFlag; - int m_kneeInputDrange; - int m_kneeInputDispLuminance; - int m_kneeOutputDrange; - int m_kneeOutputDispLuminance; - int m_kneeNumKneePointsMinus1; - std::vector<int> m_kneeInputKneePoint; - std::vector<int> m_kneeOutputKneePoint; -}; - -class SEIColourRemappingInfo : public SEI -{ -public: - - struct CRIlut - { - int codedValue; - int targetValue; - bool operator < (const CRIlut& a) const - { - return codedValue < a.codedValue; - } - }; - - PayloadType payloadType() const { return COLOUR_REMAPPING_INFO; } - SEIColourRemappingInfo() {} - ~SEIColourRemappingInfo() {} - - void copyFrom( const SEIColourRemappingInfo &seiCriInput) - { - (*this) = seiCriInput; - } - - uint32_t m_colourRemapId; - bool m_colourRemapCancelFlag; - bool m_colourRemapPersistenceFlag; - bool m_colourRemapVideoSignalInfoPresentFlag; - bool m_colourRemapFullRangeFlag; - int m_colourRemapPrimaries; - int m_colourRemapTransferFunction; - int m_colourRemapMatrixCoefficients; - int m_colourRemapInputBitDepth; - int m_colourRemapBitDepth; - int m_preLutNumValMinus1[3]; - std::vector<CRIlut> m_preLut[3]; - bool m_colourRemapMatrixPresentFlag; - int m_log2MatrixDenom; - int m_colourRemapCoeffs[3][3]; - int m_postLutNumValMinus1[3]; - std::vector<CRIlut> m_postLut[3]; -}; - -class SEIChromaResamplingFilterHint : public SEI -{ -public: - PayloadType payloadType() const {return CHROMA_RESAMPLING_FILTER_HINT;} - SEIChromaResamplingFilterHint() {} - virtual ~SEIChromaResamplingFilterHint() {} - - int m_verChromaFilterIdc; - int m_horChromaFilterIdc; - bool m_verFilteringFieldProcessingFlag; - int m_targetFormatIdc; - bool m_perfectReconstructionFlag; - std::vector<std::vector<int> > m_verFilterCoeff; - std::vector<std::vector<int> > m_horFilterCoeff; -}; - class SEIMasteringDisplayColourVolume : public SEI { public: @@ -505,6 +486,7 @@ SEIMessages extractSeisByType(SEIMessages &seiList, SEI::PayloadType seiType); /// delete list of SEI messages (freeing the referenced objects) void deleteSEIs (SEIMessages &seiList); +#if HEVC_SEI class SEIScalableNesting : public SEI { public: @@ -543,7 +525,6 @@ public: SEITimeSet timeSetArray[MAX_TIMECODE_SEI_SETS]; }; -#if HEVC_TILES_WPP //definition according to P1005_v1; class SEITempMotionConstrainedTileSets: public SEI { @@ -607,7 +588,7 @@ void xTraceSEIHeader(); void xTraceSEIMessageType( SEI::PayloadType payloadType ); #endif -#if U0033_ALTERNATIVE_TRANSFER_CHARACTERISTICS_SEI +#if U0033_ALTERNATIVE_TRANSFER_CHARACTERISTICS_SEI class SEIAlternativeTransferCharacteristics : public SEI { public: @@ -621,20 +602,125 @@ public: uint32_t m_preferredTransferCharacteristics; }; #endif +class SEIUserDataRegistered : public SEI +{ +public: + PayloadType payloadType() const { return USER_DATA_REGISTERED_ITU_T_T35; } + + SEIUserDataRegistered() {} + virtual ~SEIUserDataRegistered() {} + + uint16_t m_ituCountryCode; + std::vector<uint8_t> m_userData; +}; + +class SEIFilmGrainCharacteristics : public SEI +{ +public: + PayloadType payloadType() const { return FILM_GRAIN_CHARACTERISTICS; } + + SEIFilmGrainCharacteristics() {} + virtual ~SEIFilmGrainCharacteristics() {} + + bool m_filmGrainCharacteristicsCancelFlag; + uint8_t m_filmGrainModelId; + bool m_separateColourDescriptionPresentFlag; + uint8_t m_filmGrainBitDepthLumaMinus8; + uint8_t m_filmGrainBitDepthChromaMinus8; + bool m_filmGrainFullRangeFlag; + uint8_t m_filmGrainColourPrimaries; + uint8_t m_filmGrainTransferCharacteristics; + uint8_t m_filmGrainMatrixCoeffs; + uint8_t m_blendingModeId; + uint8_t m_log2ScaleFactor; + + struct CompModelIntensityValues + { + uint8_t intensityIntervalLowerBound; + uint8_t intensityIntervalUpperBound; + std::vector<int> compModelValue; + }; + + struct CompModel + { + bool presentFlag; + uint8_t numModelValues; + std::vector<CompModelIntensityValues> intensityValues; + }; + + CompModel m_compModel[MAX_NUM_COMPONENT]; + bool m_filmGrainCharacteristicsPersistenceFlag; +}; + +class SEIContentLightLevelInfo : public SEI +{ +public: + PayloadType payloadType() const { return CONTENT_LIGHT_LEVEL_INFO; } + SEIContentLightLevelInfo() { } + + virtual ~SEIContentLightLevelInfo() { } -class SEIGreenMetadataInfo : public SEI + uint32_t m_maxContentLightLevel; + uint32_t m_maxPicAverageLightLevel; +}; + +class SEIAmbientViewingEnvironment : public SEI { public: - PayloadType payloadType() const { return GREEN_METADATA; } - SEIGreenMetadataInfo() {} + PayloadType payloadType() const { return AMBIENT_VIEWING_ENVIRONMENT; } + SEIAmbientViewingEnvironment() { } - virtual ~SEIGreenMetadataInfo() {} + virtual ~SEIAmbientViewingEnvironment() { } - uint32_t m_greenMetadataType; - uint32_t m_xsdMetricType; - uint32_t m_xsdMetricValue; + uint32_t m_ambientIlluminance; + uint16_t m_ambientLightX; + uint16_t m_ambientLightY; +}; + +class SEIContentColourVolume : public SEI +{ +public: + PayloadType payloadType() const { return CONTENT_COLOUR_VOLUME; } + SEIContentColourVolume() {} + virtual ~SEIContentColourVolume() {} + + bool m_ccvCancelFlag; + bool m_ccvPersistenceFlag; + bool m_ccvPrimariesPresentFlag; + bool m_ccvMinLuminanceValuePresentFlag; + bool m_ccvMaxLuminanceValuePresentFlag; + bool m_ccvAvgLuminanceValuePresentFlag; + int m_ccvPrimariesX[MAX_NUM_COMPONENT]; + int m_ccvPrimariesY[MAX_NUM_COMPONENT]; + uint32_t m_ccvMinLuminanceValue; + uint32_t m_ccvMaxLuminanceValue; + uint32_t m_ccvAvgLuminanceValue; }; #endif + +class SEISubpicureLevelInfo : public SEI +{ +public: + PayloadType payloadType() const { return SUBPICTURE_LEVEL_INFO; } + SEISubpicureLevelInfo() + : m_sliSeqParameterSetId(0) + , m_numRefLevels(0) + , m_explicitFractionPresentFlag (false) + {} + virtual ~SEISubpicureLevelInfo() {} + + int m_sliSeqParameterSetId; + int m_numRefLevels; + bool m_explicitFractionPresentFlag; + std::vector<Level::Name> m_refLevelIdc; + std::vector<std::vector<int>> m_refLevelFraction; +}; + + + + //! \} + + diff --git a/source/Lib/CommonLib/SampleAdaptiveOffset.cpp b/source/Lib/CommonLib/SampleAdaptiveOffset.cpp index 7180629c0ae50656bd64a49f2a1633295a61a223..0953b569a4650468f729db30dff7a300d8d78aad 100644 --- a/source/Lib/CommonLib/SampleAdaptiveOffset.cpp +++ b/source/Lib/CommonLib/SampleAdaptiveOffset.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -112,6 +112,7 @@ const SAOBlkParam& SAOBlkParam::operator= (const SAOBlkParam& src) SampleAdaptiveOffset::SampleAdaptiveOffset() { + m_numberOfComponents = 0; } @@ -291,7 +292,9 @@ void SampleAdaptiveOffset::xReconstructBlkSAOParams(CodingStructure& cs, SAOBlkP void SampleAdaptiveOffset::offsetBlock(const int channelBitDepth, const ClpRng& clpRng, int typeIdx, int* offset , const Pel* srcBlk, Pel* resBlk, int srcStride, int resStride, int width, int height - , bool isLeftAvail, bool isRightAvail, bool isAboveAvail, bool isBelowAvail, bool isAboveLeftAvail, bool isAboveRightAvail, bool isBelowLeftAvail, bool isBelowRightAvail) + , bool isLeftAvail, bool isRightAvail, bool isAboveAvail, bool isBelowAvail, bool isAboveLeftAvail, bool isAboveRightAvail, bool isBelowLeftAvail, bool isBelowRightAvail + , bool isCtuCrossedByVirtualBoundaries, int horVirBndryPos[], int verVirBndryPos[], int numHorVirBndry, int numVerVirBndry + ) { int x,y, startX, startY, endX, endY, edgeType; int firstLineStartX, firstLineEndX, lastLineStartX, lastLineEndX; @@ -313,6 +316,11 @@ void SampleAdaptiveOffset::offsetBlock(const int channelBitDepth, const ClpRng& for (x=startX; x< endX; x++) { signRight = (int8_t)sgn(srcLine[x] - srcLine[x+1]); + if (isCtuCrossedByVirtualBoundaries && isProcessDisabled(x, y, numVerVirBndry, 0, verVirBndryPos, horVirBndryPos)) + { + signLeft = -signRight; + continue; + } edgeType = signRight + signLeft; signLeft = -signRight; @@ -351,6 +359,11 @@ void SampleAdaptiveOffset::offsetBlock(const int channelBitDepth, const ClpRng& for (x=0; x< width; x++) { signDown = (int8_t)sgn(srcLine[x] - srcLineBelow[x]); + if (isCtuCrossedByVirtualBoundaries && isProcessDisabled(x, y, 0, numHorVirBndry, verVirBndryPos, horVirBndryPos)) + { + signUpLine[x] = -signDown; + continue; + } edgeType = signDown + signUpLine[x]; signUpLine[x]= -signDown; @@ -386,6 +399,10 @@ void SampleAdaptiveOffset::offsetBlock(const int channelBitDepth, const ClpRng& firstLineEndX = isAboveAvail? endX: 1; for(x= firstLineStartX; x< firstLineEndX; x++) { + if (isCtuCrossedByVirtualBoundaries && isProcessDisabled(x, 0, numVerVirBndry, numHorVirBndry, verVirBndryPos, horVirBndryPos)) + { + continue; + } edgeType = sgn(srcLine[x] - srcLineAbove[x- 1]) - signUpLine[x+1]; resLine[x] = ClipPel<int>( srcLine[x] + offset[edgeType], clpRng); @@ -402,6 +419,11 @@ void SampleAdaptiveOffset::offsetBlock(const int channelBitDepth, const ClpRng& for (x=startX; x<endX; x++) { signDown = (int8_t)sgn(srcLine[x] - srcLineBelow[x+ 1]); + if (isCtuCrossedByVirtualBoundaries && isProcessDisabled(x, y, numVerVirBndry, numHorVirBndry, verVirBndryPos, horVirBndryPos)) + { + signDownLine[x + 1] = -signDown; + continue; + } edgeType = signDown + signUpLine[x]; resLine[x] = ClipPel<int>( srcLine[x] + offset[edgeType], clpRng); @@ -423,6 +445,10 @@ void SampleAdaptiveOffset::offsetBlock(const int channelBitDepth, const ClpRng& lastLineEndX = isBelowRightAvail ? width : (width -1); for(x= lastLineStartX; x< lastLineEndX; x++) { + if (isCtuCrossedByVirtualBoundaries && isProcessDisabled(x, height - 1, numVerVirBndry, numHorVirBndry, verVirBndryPos, horVirBndryPos)) + { + continue; + } edgeType = sgn(srcLine[x] - srcLineBelow[x+ 1]) + signUpLine[x]; resLine[x] = ClipPel<int>( srcLine[x] + offset[edgeType], clpRng); @@ -451,6 +477,10 @@ void SampleAdaptiveOffset::offsetBlock(const int channelBitDepth, const ClpRng& firstLineEndX = isAboveRightAvail ? width : (width-1); for(x= firstLineStartX; x< firstLineEndX; x++) { + if (isCtuCrossedByVirtualBoundaries && isProcessDisabled(x, 0, numVerVirBndry, numHorVirBndry, verVirBndryPos, horVirBndryPos)) + { + continue; + } edgeType = sgn(srcLine[x] - srcLineAbove[x+1]) -signUpLine[x-1]; resLine[x] = ClipPel<int>(srcLine[x] + offset[edgeType], clpRng); } @@ -465,6 +495,11 @@ void SampleAdaptiveOffset::offsetBlock(const int channelBitDepth, const ClpRng& for(x= startX; x< endX; x++) { signDown = (int8_t)sgn(srcLine[x] - srcLineBelow[x-1]); + if (isCtuCrossedByVirtualBoundaries && isProcessDisabled(x, y, numVerVirBndry, numHorVirBndry, verVirBndryPos, horVirBndryPos)) + { + signUpLine[x - 1] = -signDown; + continue; + } edgeType = signDown + signUpLine[x]; resLine[x] = ClipPel<int>(srcLine[x] + offset[edgeType], clpRng); signUpLine[x-1] = -signDown; @@ -480,6 +515,10 @@ void SampleAdaptiveOffset::offsetBlock(const int channelBitDepth, const ClpRng& lastLineEndX = isBelowAvail ? endX : 1; for(x= lastLineStartX; x< lastLineEndX; x++) { + if (isCtuCrossedByVirtualBoundaries && isProcessDisabled(x, height - 1, numVerVirBndry, numHorVirBndry, verVirBndryPos, horVirBndryPos)) + { + continue; + } edgeType = sgn(srcLine[x] - srcLineBelow[x-1]) + signUpLine[x]; resLine[x] = ClipPel<int>(srcLine[x] + offset[edgeType], clpRng); @@ -535,6 +574,12 @@ void SampleAdaptiveOffset::offsetCTU( const UnitArea& area, const CPelUnitBuf& s m_signLineBuf2.resize(lineBufferSize); } + int numHorVirBndry = 0, numVerVirBndry = 0; + int horVirBndryPos[] = { -1,-1,-1 }; + int verVirBndryPos[] = { -1,-1,-1 }; + int horVirBndryPosComp[] = { -1,-1,-1 }; + int verVirBndryPosComp[] = { -1,-1,-1 }; + bool isCtuCrossedByVirtualBoundaries = isCrossedByVirtualBoundaries(area.Y().x, area.Y().y, area.Y().width, area.Y().height, numHorVirBndry, numVerVirBndry, horVirBndryPos, verVirBndryPos, cs.picHeader ); for(int compIdx = 0; compIdx < numberOfComponents; compIdx++) { const ComponentID compID = ComponentID(compIdx); @@ -547,6 +592,14 @@ void SampleAdaptiveOffset::offsetCTU( const UnitArea& area, const CPelUnitBuf& s const Pel* srcBlk = src.get(compID).bufAt(compArea); int resStride = res.get(compID).stride; Pel* resBlk = res.get(compID).bufAt(compArea); + for (int i = 0; i < numHorVirBndry; i++) + { + horVirBndryPosComp[i] = (horVirBndryPos[i] >> ::getComponentScaleY(compID, area.chromaFormat)) - compArea.y; + } + for (int i = 0; i < numVerVirBndry; i++) + { + verVirBndryPosComp[i] = (verVirBndryPos[i] >> ::getComponentScaleX(compID, area.chromaFormat)) - compArea.x; + } offsetBlock( cs.sps->getBitDepth(toChannelType(compID)), cs.slice->clpRng(compID), @@ -556,6 +609,7 @@ void SampleAdaptiveOffset::offsetCTU( const UnitArea& area, const CPelUnitBuf& s , isAboveAvail, isBelowAvail , isAboveLeftAvail, isAboveRightAvail , isBelowLeftAvail, isBelowRightAvail + , isCtuCrossedByVirtualBoundaries, horVirBndryPosComp, verVirBndryPosComp, numHorVirBndry, numVerVirBndry ); } } //compIdx @@ -608,101 +662,8 @@ void SampleAdaptiveOffset::SAOProcess( CodingStructure& cs, SAOBlkParam* saoBlkP DTRACE ( g_trace_ctx, D_CRC, "SAO" ); DTRACE_CRC( g_trace_ctx, D_CRC, cs, cs.getRecoBuf() ); - xPCMLFDisableProcess(cs); -} - -void SampleAdaptiveOffset::xPCMLFDisableProcess(CodingStructure& cs) -{ - const PreCalcValues& pcv = *cs.pcv; - const bool bPCMFilter = (cs.sps->getPCMEnabledFlag() && cs.sps->getPCMFilterDisableFlag()) ? true : false; - - if( bPCMFilter || cs.pps->getTransquantBypassEnabledFlag() ) - { - for( uint32_t yPos = 0; yPos < pcv.lumaHeight; yPos += pcv.maxCUHeight ) - { - for( uint32_t xPos = 0; xPos < pcv.lumaWidth; xPos += pcv.maxCUWidth ) - { - UnitArea ctuArea( cs.area.chromaFormat, Area( xPos, yPos, pcv.maxCUWidth, pcv.maxCUHeight ) ); - - // CU-based deblocking - xPCMCURestoration(cs, ctuArea); - } - } - } } -void SampleAdaptiveOffset::xPCMCURestoration(CodingStructure& cs, const UnitArea &ctuArea) -{ - const SPS& sps = *cs.sps; - uint32_t numComponents = CS::isDualITree(cs) ? 1 : m_numberOfComponents; - for( auto &cu : cs.traverseCUs( ctuArea, CH_L ) ) - { - // restore PCM samples - if( ( cu.ipcm && sps.getPCMFilterDisableFlag() ) || CU::isLosslessCoded( cu ) ) - { - - for( uint32_t comp = 0; comp < numComponents; comp++ ) - { - xPCMSampleRestoration( cu, ComponentID( comp ) ); - } - } - } - numComponents = m_numberOfComponents; - if (CS::isDualITree(cs) && numComponents) - { - for (auto &cu : cs.traverseCUs(ctuArea, CH_C)) - { - // restore PCM samples - if ((cu.ipcm && sps.getPCMFilterDisableFlag()) || CU::isLosslessCoded(cu)) - { - for (uint32_t comp = 1; comp < numComponents; comp++) - { - xPCMSampleRestoration(cu, ComponentID(comp)); - } - } - } - } -} - -void SampleAdaptiveOffset::xPCMSampleRestoration(CodingUnit& cu, const ComponentID compID) -{ - const CompArea& ca = cu.block(compID); - - if( CU::isLosslessCoded( cu ) && !cu.ipcm ) - { - for( auto &currTU : CU::traverseTUs( cu ) ) - { - const CPelBuf& pcmBuf = currTU.getPcmbuf( compID ); - PelBuf dstBuf = cu.cs->getRecoBuf( currTU.block(compID) ); - - dstBuf.copyFrom( pcmBuf ); - if (cu.slice->getReshapeInfo().getUseSliceReshaper() && isLuma(compID)) - { - dstBuf.rspSignal(m_pcReshape->getInvLUT()); - } - } - - return; - } - - const TransformUnit& tu = *cu.firstTU; CHECK( cu.firstTU != cu.lastTU, "Multiple TUs present in a PCM CU" ); - const CPelBuf& pcmBuf = tu.getPcmbuf( compID ); - PelBuf dstBuf = cu.cs->getRecoBuf( ca ); - const SPS &sps = *cu.cs->sps; - const uint32_t uiPcmLeftShiftBit = sps.getBitDepth(toChannelType(compID)) - sps.getPCMBitDepth(toChannelType(compID)); - - for (uint32_t y = 0; y < ca.height; y++) - { - for (uint32_t x = 0; x < ca.width; x++) - { - dstBuf.at(x,y) = (pcmBuf.at(x,y) << uiPcmLeftShiftBit); - } - } - if (cu.slice->getReshapeInfo().getUseSliceReshaper() && isLuma(compID)) - { - dstBuf.rspSignal(m_pcReshape->getInvLUT()); - } -} void SampleAdaptiveOffset::deriveLoopFilterBoundaryAvailibility(CodingStructure& cs, const Position &pos, bool& isLeftAvail, @@ -728,43 +689,30 @@ void SampleAdaptiveOffset::deriveLoopFilterBoundaryAvailibility(CodingStructure& const CodingUnit* cuBelowRight = cs.getCU(pos.offset(width, height), CH_L); // check cross slice flags + const bool isLoopFilterAcrossSlicePPS = cs.pps->getLoopFilterAcrossSlicesEnabledFlag(); + if (!isLoopFilterAcrossSlicePPS) { - //left - isLeftAvail = (cuLeft != NULL) ? ( !CU::isSameSlice(*cuCurr, *cuLeft) ? cuCurr->slice->getLFCrossSliceBoundaryFlag() : true ) : false; - - //above - isAboveAvail = (cuAbove != NULL) ? ( !CU::isSameSlice(*cuCurr, *cuAbove) ? cuCurr->slice->getLFCrossSliceBoundaryFlag() : true ) : false; - - //right - isRightAvail = (cuRight != NULL) ? ( !CU::isSameSlice(*cuCurr, *cuRight) ? cuRight->slice->getLFCrossSliceBoundaryFlag() : true ) : false; - - //below - isBelowAvail = (cuBelow != NULL) ? ( !CU::isSameSlice(*cuCurr, *cuBelow) ? cuBelow->slice->getLFCrossSliceBoundaryFlag() : true ) : false; - - //above-left - isAboveLeftAvail = (cuAboveLeft != NULL) ? ( !CU::isSameSlice(*cuCurr, *cuAboveLeft) ? cuCurr->slice->getLFCrossSliceBoundaryFlag() : true ) : false; - - //below-right - isBelowRightAvail = (cuBelowRight != NULL) ? ( !CU::isSameSlice(*cuCurr, *cuBelowRight) ? cuBelowRight->slice->getLFCrossSliceBoundaryFlag() : true ) : false; - - //above-right - isAboveRightAvail = false; - if (cuAboveRight != NULL) - { - const bool bLFCrossSliceBoundaryFlag = (cuCurr->slice->getSliceCurStartCtuTsAddr() > cuAboveRight->slice->getSliceCurStartCtuTsAddr()) ? cuCurr->slice->getLFCrossSliceBoundaryFlag() : cuAboveRight->slice->getLFCrossSliceBoundaryFlag(); - isAboveRightAvail = ( !CU::isSameSlice(*cuCurr, *cuAboveRight) ) ? bLFCrossSliceBoundaryFlag : true; - } - - //below-left - isBelowLeftAvail = false; - if (cuBelowLeft != NULL) - { - const bool bLFCrossSliceBoundaryFlag = (cuCurr->slice->getSliceCurStartCtuTsAddr() > cuBelowLeft->slice->getSliceCurStartCtuTsAddr()) ? cuCurr->slice->getLFCrossSliceBoundaryFlag() : cuBelowLeft->slice->getLFCrossSliceBoundaryFlag(); - isBelowLeftAvail = ( !CU::isSameSlice(*cuCurr, *cuBelowLeft) ) ? bLFCrossSliceBoundaryFlag : true; - } + isLeftAvail = (cuLeft == NULL) ? false : CU::isSameSlice(*cuCurr, *cuLeft); + isAboveAvail = (cuAbove == NULL) ? false : CU::isSameSlice(*cuCurr, *cuAbove); + isRightAvail = (cuRight == NULL) ? false : CU::isSameSlice(*cuCurr, *cuRight); + isBelowAvail = (cuBelow == NULL) ? false : CU::isSameSlice(*cuCurr, *cuBelow); + isAboveLeftAvail = (cuAboveLeft == NULL) ? false : CU::isSameSlice(*cuCurr, *cuAboveLeft); + isAboveRightAvail = (cuAboveRight == NULL) ? false : CU::isSameSlice(*cuCurr, *cuAboveRight); + isBelowLeftAvail = (cuBelowLeft == NULL) ? false : CU::isSameSlice(*cuCurr, *cuBelowLeft); + isBelowRightAvail = (cuBelowRight == NULL) ? false : CU::isSameSlice(*cuCurr, *cuBelowRight); + } + else + { + isLeftAvail = (cuLeft != NULL); + isAboveAvail = (cuAbove != NULL); + isRightAvail = (cuRight != NULL); + isBelowAvail = (cuBelow != NULL); + isAboveLeftAvail = (cuAboveLeft != NULL); + isAboveRightAvail = (cuAboveRight != NULL); + isBelowLeftAvail = (cuBelowLeft != NULL); + isBelowRightAvail = (cuBelowRight != NULL); } -#if HEVC_TILES_WPP // check cross tile flags const bool isLoopFilterAcrossTilePPS = cs.pps->getLoopFilterAcrossTilesEnabledFlag(); if (!isLoopFilterAcrossTilePPS) @@ -778,7 +726,28 @@ void SampleAdaptiveOffset::deriveLoopFilterBoundaryAvailibility(CodingStructure& isBelowLeftAvail = (!isBelowLeftAvail) ? false : CU::isSameTile(*cuCurr, *cuBelowLeft); isBelowRightAvail = (!isBelowRightAvail) ? false : CU::isSameTile(*cuCurr, *cuBelowRight); } -#endif } +bool SampleAdaptiveOffset::isCrossedByVirtualBoundaries(const int xPos, const int yPos, const int width, const int height, int& numHorVirBndry, int& numVerVirBndry, int horVirBndryPos[], int verVirBndryPos[], const PicHeader* picHeader ) +{ + numHorVirBndry = 0; numVerVirBndry = 0; + if (picHeader->getLoopFilterAcrossVirtualBoundariesDisabledFlag()) + { + for (int i = 0; i < picHeader->getNumHorVirtualBoundaries(); i++) + { + if (yPos <= picHeader->getVirtualBoundariesPosY(i) && picHeader->getVirtualBoundariesPosY(i) <= yPos + height) + { + horVirBndryPos[numHorVirBndry++] = picHeader->getVirtualBoundariesPosY(i); + } + } + for (int i = 0; i < picHeader->getNumVerVirtualBoundaries(); i++) + { + if (xPos <= picHeader->getVirtualBoundariesPosX(i) && picHeader->getVirtualBoundariesPosX(i) <= xPos + width) + { + verVirBndryPos[numVerVirBndry++] = picHeader->getVirtualBoundariesPosX(i); + } + } + } + return numHorVirBndry > 0 || numVerVirBndry > 0 ; +} //! \} diff --git a/source/Lib/CommonLib/SampleAdaptiveOffset.h b/source/Lib/CommonLib/SampleAdaptiveOffset.h index d3141d56ed63f1141f1eb9d08b7883691ef2364e..b8b47d48f7bb27590bb75e2ed1a8b1a7a0d84cbd 100644 --- a/source/Lib/CommonLib/SampleAdaptiveOffset.h +++ b/source/Lib/CommonLib/SampleAdaptiveOffset.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -84,15 +84,36 @@ protected: ) const; void offsetBlock(const int channelBitDepth, const ClpRng& clpRng, int typeIdx, int* offset, const Pel* srcBlk, Pel* resBlk, int srcStride, int resStride, int width, int height - , bool isLeftAvail, bool isRightAvail, bool isAboveAvail, bool isBelowAvail, bool isAboveLeftAvail, bool isAboveRightAvail, bool isBelowLeftAvail, bool isBelowRightAvail); + , bool isLeftAvail, bool isRightAvail, bool isAboveAvail, bool isBelowAvail, bool isAboveLeftAvail, bool isAboveRightAvail, bool isBelowLeftAvail, bool isBelowRightAvail + , bool isCtuCrossedByVirtualBoundaries, int horVirBndryPos[], int verVirBndryPos[], int numHorVirBndry, int numVerVirBndry + ); void invertQuantOffsets(ComponentID compIdx, int typeIdc, int typeAuxInfo, int* dstOffsets, int* srcOffsets); void reconstructBlkSAOParam(SAOBlkParam& recParam, SAOBlkParam* mergeList[NUM_SAO_MERGE_TYPES]); int getMergeList(CodingStructure& cs, int ctuRsAddr, SAOBlkParam* blkParams, SAOBlkParam* mergeList[NUM_SAO_MERGE_TYPES]); void offsetCTU(const UnitArea& area, const CPelUnitBuf& src, PelUnitBuf& res, SAOBlkParam& saoblkParam, CodingStructure& cs); - void xPCMLFDisableProcess(CodingStructure& cs); - void xPCMCURestoration(CodingStructure& cs, const UnitArea &ctuArea); - void xPCMSampleRestoration(CodingUnit& cu, const ComponentID compID); void xReconstructBlkSAOParams(CodingStructure& cs, SAOBlkParam* saoBlkParams); + bool isCrossedByVirtualBoundaries(const int xPos, const int yPos, const int width, const int height, int& numHorVirBndry, int& numVerVirBndry, int horVirBndryPos[], int verVirBndryPos[], const PicHeader* picHeader); + inline bool isProcessDisabled(int xPos, int yPos, int numVerVirBndry, int numHorVirBndry, int verVirBndryPos[], int horVirBndryPos[]) + { + bool bDisabledFlag = false; + for (int i = 0; i < numVerVirBndry; i++) + { + if ((xPos == verVirBndryPos[i]) || (xPos == verVirBndryPos[i] - 1)) + { + bDisabledFlag = true; + break; + } + } + for (int i = 0; i < numHorVirBndry; i++) + { + if ((yPos == horVirBndryPos[i]) || (yPos == horVirBndryPos[i] - 1)) + { + bDisabledFlag = true; + break; + } + } + return bDisabledFlag; + } Reshape* m_pcReshape; protected: uint32_t m_offsetStepLog2[MAX_NUM_COMPONENT]; //offset step diff --git a/source/Lib/CommonLib/Slice.cpp b/source/Lib/CommonLib/Slice.cpp index f6c45dfc631bffffa8465aadf707495e003dc731..4281fecbeb0209d48fb0ccf6fd84590ff62ef433 100644 --- a/source/Lib/CommonLib/Slice.cpp +++ b/source/Lib/CommonLib/Slice.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -47,97 +47,49 @@ //! \{ Slice::Slice() -: m_iPPSId ( -1 ) -, m_PicOutputFlag ( true ) -, m_iPOC ( 0 ) +: m_iPOC ( 0 ) , m_iLastIDR ( 0 ) , m_iAssociatedIRAP ( 0 ) , m_iAssociatedIRAPType ( NAL_UNIT_INVALID ) -, m_pRPS ( 0 ) -, m_localRPS ( ) -, m_rpsIdx ( 0 ) -, m_RefPicListModification ( ) +, m_rpl0Idx ( -1 ) +, m_rpl1Idx ( -1 ) , m_eNalUnitType ( NAL_UNIT_CODED_SLICE_IDR_W_RADL ) , m_eSliceType ( I_SLICE ) , m_iSliceQp ( 0 ) -#if HEVC_DEPENDENT_SLICES -, m_dependentSliceSegmentFlag ( false ) -#endif , m_ChromaQpAdjEnabled ( false ) , m_deblockingFilterDisable ( false ) , m_deblockingFilterOverrideFlag ( false ) , m_deblockingFilterBetaOffsetDiv2( 0 ) , m_deblockingFilterTcOffsetDiv2 ( 0 ) , m_pendingRasInit ( false ) -, m_depQuantEnabledFlag ( false ) -#if HEVC_USE_SIGN_HIDING -, m_signDataHidingEnabledFlag ( false ) -#endif , m_bCheckLDC ( false ) , m_biDirPred ( false ) , m_iSliceQpDelta ( 0 ) , m_iDepth ( 0 ) -#if HEVC_VPS -, m_pcVPS ( NULL ) -#endif +, m_dps ( nullptr ) , m_pcSPS ( NULL ) , m_pcPPS ( NULL ) , m_pcPic ( NULL ) +, m_pcPicHeader ( NULL ) , m_colFromL0Flag ( true ) -, m_noOutputPriorPicsFlag ( false ) -, m_noRaslOutputFlag ( false ) +, m_noIncorrectPicOutputFlag ( false ) , m_handleCraAsCvsStartFlag ( false ) , m_colRefIdx ( 0 ) -, m_maxNumMergeCand ( 0 ) -, m_maxNumAffineMergeCand ( 0 ) -, m_disFracMMVD ( false ) , m_uiTLayer ( 0 ) , m_bTLayerSwitchingFlag ( false ) -, m_sliceMode ( NO_SLICES ) -, m_sliceArgument ( 0 ) -, m_sliceCurStartCtuTsAddr ( 0 ) -, m_sliceCurEndCtuTsAddr ( 0 ) , m_independentSliceIdx ( 0 ) -#if HEVC_DEPENDENT_SLICES -, m_sliceSegmentIdx ( 0 ) -, m_sliceSegmentMode ( NO_SLICES ) -, m_sliceSegmentArgument ( 0 ) -, m_sliceSegmentCurStartCtuTsAddr ( 0 ) -, m_sliceSegmentCurEndCtuTsAddr ( 0 ) -#endif , m_nextSlice ( false ) -#if HEVC_DEPENDENT_SLICES -, m_nextSliceSegment ( false ) -#endif , m_sliceBits ( 0 ) -#if HEVC_DEPENDENT_SLICES -, m_sliceSegmentBits ( 0 ) -#endif , m_bFinalized ( false ) , m_bTestWeightPred ( false ) , m_bTestWeightBiPred ( false ) , m_substreamSizes ( ) +, m_numEntryPoints ( 0 ) , m_cabacInitFlag ( false ) -, m_bLMvdL1Zero ( false ) -#if !JVET_M0101_HLS -, m_temporalLayerNonReferenceFlag ( false ) -#endif -, m_LFCrossSliceBoundaryFlag ( false ) -, m_enableTMVPFlag ( true ) + , m_sliceSubPicId ( 0 ) , m_encCABACTableIdx (I_SLICE) , m_iProcessingStartTime ( 0 ) , m_dProcessingTime ( 0 ) -, m_splitConsOverrideFlag ( false ) -, m_uiMinQTSize ( 0 ) -, m_uiMaxBTDepth ( 0 ) -, m_uiMaxTTSize ( 0 ) -, m_uiMinQTSizeIChroma ( 0 ) -, m_uiMaxBTDepthIChroma ( 0 ) -, m_uiMaxBTSizeIChroma ( 0 ) -, m_uiMaxTTSizeIChroma ( 0 ) -, m_uiMaxBTSize ( 0 ) -, m_apsId ( -1 ) -, m_aps (NULL) { for(uint32_t i=0; i<NUM_REF_PIC_LIST_01; i++) { @@ -149,6 +101,7 @@ Slice::Slice() m_lambdas [component] = 0.0; m_iSliceChromaQpDelta[component] = 0; } + m_iSliceChromaQpDelta[JOINT_CbCr] = 0; initEqualRef(); @@ -174,16 +127,14 @@ Slice::Slice() m_saoEnabledFlag[ch] = false; } - m_sliceReshapeInfo.setUseSliceReshaper(false); - m_sliceReshapeInfo.setSliceReshapeModelPresentFlag(false); - m_sliceReshapeInfo.setSliceReshapeChromaAdj(0); - m_sliceReshapeInfo.reshaperModelMinBinIdx = 0; - m_sliceReshapeInfo.reshaperModelMaxBinIdx = PIC_CODE_CW_BINS - 1; - memset(m_sliceReshapeInfo.reshaperModelBinCWDelta, 0, PIC_CODE_CW_BINS * sizeof(int)); + memset(m_alfApss, 0, sizeof(m_alfApss)); + + m_sliceMap.initSliceMap(); } Slice::~Slice() { + m_sliceMap.initSliceMap(); } @@ -208,16 +159,79 @@ void Slice::initSlice() { m_iSliceChromaQpDelta[component] = 0; } + m_iSliceChromaQpDelta[JOINT_CbCr] = 0; - m_maxNumMergeCand = MRG_MAX_NUM_CANDS; - m_maxNumAffineMergeCand = AFFINE_MRG_MAX_NUM_CANDS; m_bFinalized=false; - m_disFracMMVD = false; m_substreamSizes.clear(); m_cabacInitFlag = false; - m_enableTMVPFlag = true; + m_enableDRAPSEI = false; + m_useLTforDRAP = false; + m_isDRAP = false; + m_latestDRAPPOC = MAX_INT; + resetTileGroupAlfEnabledFlag(); +} + +void Slice::inheritFromPicHeader( PicHeader *picHeader, const PPS *pps, const SPS *sps ) +{ + if(picHeader->getPicRplPresentFlag()) + { + setRPL0idx( picHeader->getRPL0idx() ); + *getLocalRPL0() = *picHeader->getLocalRPL0(); + if(getRPL0idx() != -1) + { + setRPL0(sps->getRPLList0()->getReferencePictureList(getRPL0idx())); + } + else + { + setRPL0(getLocalRPL0()); + } + + setRPL1idx( picHeader->getRPL1idx() ); + *getLocalRPL1() = *picHeader->getLocalRPL1(); + if(getRPL1idx() != -1) + { + setRPL1(sps->getRPLList1()->getReferencePictureList(getRPL1idx())); + } + else + { + setRPL1(getLocalRPL1()); + } + } + + setDeblockingFilterDisable( picHeader->getDeblockingFilterDisable() ); + setDeblockingFilterBetaOffsetDiv2( picHeader->getDeblockingFilterBetaOffsetDiv2() ); + setDeblockingFilterTcOffsetDiv2( picHeader->getDeblockingFilterTcOffsetDiv2() ); + + setSaoEnabledFlag(CHANNEL_TYPE_LUMA, picHeader->getSaoEnabledFlag(CHANNEL_TYPE_LUMA)); + setSaoEnabledFlag(CHANNEL_TYPE_CHROMA, picHeader->getSaoEnabledFlag(CHANNEL_TYPE_CHROMA)); + + setTileGroupAlfEnabledFlag(COMPONENT_Y, picHeader->getAlfEnabledFlag(COMPONENT_Y)); + setTileGroupAlfEnabledFlag(COMPONENT_Cb, picHeader->getAlfEnabledFlag(COMPONENT_Cb)); + setTileGroupAlfEnabledFlag(COMPONENT_Cr, picHeader->getAlfEnabledFlag(COMPONENT_Cr)); + setTileGroupNumAps(picHeader->getNumAlfAps()); + setAlfAPSs(picHeader->getAlfAPSs()); + setTileGroupApsIdChroma(picHeader->getAlfApsIdChroma()); +} + +void Slice::setNumEntryPoints( const PPS *pps ) +{ + uint32_t ctuAddr, ctuX, ctuY; + m_numEntryPoints = 0; + + // count the number of CTUs that align with either the start of a tile, or with an entropy coding sync point + // ignore the first CTU since it doesn't count as an entry point + for( uint32_t i = 1; i < m_sliceMap.getNumCtuInSlice(); i++ ) + { + ctuAddr = m_sliceMap.getCtuAddrInSlice( i ); + ctuX = ( ctuAddr % pps->getPicWidthInCtu() ); + ctuY = ( ctuAddr / pps->getPicWidthInCtu() ); + if( pps->ctuIsTileColBd( ctuX ) && (pps->ctuIsTileRowBd( ctuY ) || pps->getEntropyCodingSyncEnabledFlag() ) ) + { + m_numEntryPoints++; + } + } } void Slice::setDefaultClpRng( const SPS& sps ) @@ -237,11 +251,6 @@ bool Slice::getRapPicFlag() const { return getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_W_RADL || getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_N_LP -#if !JVET_M0101_HLS - || getNalUnitType() == NAL_UNIT_CODED_SLICE_BLA_N_LP - || getNalUnitType() == NAL_UNIT_CODED_SLICE_BLA_W_RADL - || getNalUnitType() == NAL_UNIT_CODED_SLICE_BLA_W_LP -#endif || getNalUnitType() == NAL_UNIT_CODED_SLICE_CRA; } @@ -284,14 +293,14 @@ void Slice::sortPicList (PicList& rcListPic) } } -Picture* Slice::xGetRefPic (PicList& rcListPic, int poc) +Picture* Slice::xGetRefPic( PicList& rcListPic, int poc, const int layerId ) { PicList::iterator iterPic = rcListPic.begin(); Picture* pcPic = *(iterPic); while ( iterPic != rcListPic.end() ) { - if(pcPic->getPOC() == poc) + if( pcPic->getPOC() == poc && pcPic->layerId == layerId ) { break; } @@ -301,8 +310,7 @@ Picture* Slice::xGetRefPic (PicList& rcListPic, int poc) return pcPic; } - -Picture* Slice::xGetLongTermRefPic( PicList& rcListPic, int poc, bool pocHasMsb) +Picture* Slice::xGetLongTermRefPic( PicList& rcListPic, int poc, bool pocHasMsb, const int layerId ) { PicList::iterator iterPic = rcListPic.begin(); Picture* pcPic = *(iterPic); @@ -317,7 +325,7 @@ Picture* Slice::xGetLongTermRefPic( PicList& rcListPic, int poc, bool pocHasMsb) while ( iterPic != rcListPic.end() ) { pcPic = *(iterPic); - if (pcPic && pcPic->getPOC()!=this->getPOC() && pcPic->referenced) + if( pcPic && pcPic->getPOC() != this->getPOC() && pcPic->referenced && pcPic->layerId == layerId ) { int picPoc = pcPic->getPOC(); if (!pocHasMsb) @@ -374,181 +382,85 @@ void Slice::setList1IdxToList0Idx() } } -void Slice::setRefPicList( PicList& rcListPic, bool checkNumPocTotalCurr, bool bCopyL0toL1ErrorCase ) +void Slice::constructRefPicList(PicList& rcListPic) { - if ( m_eSliceType == I_SLICE) + ::memset(m_bIsUsedAsLongTerm, 0, sizeof(m_bIsUsedAsLongTerm)); + if (m_eSliceType == I_SLICE) { - ::memset( m_apcRefPicList, 0, sizeof (m_apcRefPicList)); - ::memset( m_aiNumRefIdx, 0, sizeof ( m_aiNumRefIdx )); - - if (!checkNumPocTotalCurr) - { - return; - } + ::memset(m_apcRefPicList, 0, sizeof(m_apcRefPicList)); + ::memset(m_aiNumRefIdx, 0, sizeof(m_aiNumRefIdx)); + return; } - Picture* pcRefPic= NULL; - static const uint32_t MAX_NUM_NEGATIVE_PICTURES=16; - Picture* RefPicSetStCurr0[MAX_NUM_NEGATIVE_PICTURES]; - Picture* RefPicSetStCurr1[MAX_NUM_NEGATIVE_PICTURES]; - Picture* RefPicSetLtCurr[MAX_NUM_NEGATIVE_PICTURES]; - uint32_t NumPicStCurr0 = 0; - uint32_t NumPicStCurr1 = 0; - uint32_t NumPicLtCurr = 0; - int i; + Picture* pcRefPic = NULL; + uint32_t numOfActiveRef = 0; + //construct L0 + numOfActiveRef = getNumRefIdx(REF_PIC_LIST_0); + int layerIdx = m_pcPic->cs->vps == nullptr ? 0 : m_pcPic->cs->vps->getGeneralLayerIdx( m_pcPic->layerId ); - for(i=0; i < m_pRPS->getNumberOfNegativePictures(); i++) + for (int ii = 0; ii < numOfActiveRef; ii++) { - if(m_pRPS->getUsed(i)) + if( m_pRPL0->isInterLayerRefPic( ii ) ) { - pcRefPic = xGetRefPic(rcListPic, getPOC()+m_pRPS->getDeltaPOC(i)); - pcRefPic->longTerm = false; - pcRefPic->extendPicBorder(); - RefPicSetStCurr0[NumPicStCurr0] = pcRefPic; - NumPicStCurr0++; - } - } + CHECK( m_pRPL0->getInterLayerRefPicIdx( ii ) == NOT_VALID, "Wrong ILRP index" ); - for(; i < m_pRPS->getNumberOfNegativePictures()+m_pRPS->getNumberOfPositivePictures(); i++) - { - if(m_pRPS->getUsed(i)) - { - pcRefPic = xGetRefPic(rcListPic, getPOC()+m_pRPS->getDeltaPOC(i)); - pcRefPic->longTerm = false; - pcRefPic->extendPicBorder(); - RefPicSetStCurr1[NumPicStCurr1] = pcRefPic; - NumPicStCurr1++; - } - } + int refLayerIdx = m_pcPic->cs->vps->getDirectRefLayerIdx( layerIdx, m_pRPL0->getInterLayerRefPicIdx( ii ) ); - for(i = m_pRPS->getNumberOfNegativePictures()+m_pRPS->getNumberOfPositivePictures()+m_pRPS->getNumberOfLongtermPictures()-1; i > m_pRPS->getNumberOfNegativePictures()+m_pRPS->getNumberOfPositivePictures()-1 ; i--) - { - if(m_pRPS->getUsed(i)) - { - pcRefPic = xGetLongTermRefPic(rcListPic, m_pRPS->getPOC(i), m_pRPS->getCheckLTMSBPresent(i)); + pcRefPic = xGetRefPic( rcListPic, getPOC(), refLayerIdx ); pcRefPic->longTerm = true; - pcRefPic->extendPicBorder(); - RefPicSetLtCurr[NumPicLtCurr] = pcRefPic; - NumPicLtCurr++; } - if(pcRefPic==NULL) - { - pcRefPic = xGetLongTermRefPic(rcListPic, m_pRPS->getPOC(i), m_pRPS->getCheckLTMSBPresent(i)); - } - } - // ref_pic_list_init - Picture* rpsCurrList0[MAX_NUM_REF+1]; - Picture* rpsCurrList1[MAX_NUM_REF+1]; - int numPicTotalCurr = NumPicStCurr0 + NumPicStCurr1 + NumPicLtCurr; - - if (checkNumPocTotalCurr) - { - // The variable NumPocTotalCurr is derived as specified in subclause 7.4.7.2. It is a requirement of bitstream conformance that the following applies to the value of NumPocTotalCurr: - // - If the current picture is a BLA or CRA picture, the value of NumPocTotalCurr shall be equal to 0. - // - Otherwise, when the current picture contains a P or B slice, the value of NumPocTotalCurr shall not be equal to 0. - if (getRapPicFlag()) + else + if (!m_pRPL0->isRefPicLongterm(ii)) { - CHECK(numPicTotalCurr != 0, "Invalid state"); + pcRefPic = xGetRefPic( rcListPic, getPOC() - m_pRPL0->getRefPicIdentifier( ii ), m_pcPic->layerId ); + pcRefPic->longTerm = false; } - - if (m_eSliceType == I_SLICE) + else { - return; + int pocBits = getSPS()->getBitsForPOC(); + int pocMask = (1 << pocBits) - 1; + int ltrpPoc = m_pRPL0->getRefPicIdentifier(ii) & pocMask; + ltrpPoc += m_localRPL0.getDeltaPocMSBPresentFlag(ii) ? (pocMask + 1) * m_localRPL0.getDeltaPocMSBCycleLT(ii) : 0; + pcRefPic = xGetLongTermRefPic( rcListPic, ltrpPoc, m_localRPL0.getDeltaPocMSBPresentFlag( ii ), m_pcPic->layerId ); + pcRefPic->longTerm = true; } - - CHECK(numPicTotalCurr == 0, "Invalid state"); - // general tier and level limit: - CHECK(numPicTotalCurr > 8, "Invalid state"); + pcRefPic->extendPicBorder(); + m_apcRefPicList[REF_PIC_LIST_0][ii] = pcRefPic; + m_bIsUsedAsLongTerm[REF_PIC_LIST_0][ii] = pcRefPic->longTerm; } - int cIdx = 0; - for ( i=0; i<NumPicStCurr0; i++, cIdx++) + //construct L1 + numOfActiveRef = getNumRefIdx(REF_PIC_LIST_1); + for (int ii = 0; ii < numOfActiveRef; ii++) { - rpsCurrList0[cIdx] = RefPicSetStCurr0[i]; - } - for ( i=0; i<NumPicStCurr1; i++, cIdx++) - { - rpsCurrList0[cIdx] = RefPicSetStCurr1[i]; - } - for ( i=0; i<NumPicLtCurr; i++, cIdx++) - { - rpsCurrList0[cIdx] = RefPicSetLtCurr[i]; - } - CHECK(cIdx != numPicTotalCurr, "Invalid state"); - - if (m_eSliceType==B_SLICE) - { - cIdx = 0; - for ( i=0; i<NumPicStCurr1; i++, cIdx++) - { - rpsCurrList1[cIdx] = RefPicSetStCurr1[i]; - } - for ( i=0; i<NumPicStCurr0; i++, cIdx++) - { - rpsCurrList1[cIdx] = RefPicSetStCurr0[i]; - } - for ( i=0; i<NumPicLtCurr; i++, cIdx++) + if( m_pRPL1->isInterLayerRefPic( ii ) ) { - rpsCurrList1[cIdx] = RefPicSetLtCurr[i]; - } - CHECK(cIdx != numPicTotalCurr, "Invalid state"); - } + CHECK( m_pRPL1->getInterLayerRefPicIdx( ii ) == NOT_VALID, "Wrong ILRP index" ); - ::memset(m_bIsUsedAsLongTerm, 0, sizeof(m_bIsUsedAsLongTerm)); + int refLayerIdx = m_pcPic->cs->vps->getDirectRefLayerIdx( layerIdx, m_pRPL1->getInterLayerRefPicIdx( ii ) ); - for (int rIdx = 0; rIdx < m_aiNumRefIdx[REF_PIC_LIST_0]; rIdx ++) - { - cIdx = m_RefPicListModification.getRefPicListModificationFlagL0() ? m_RefPicListModification.getRefPicSetIdxL0(rIdx) : rIdx % numPicTotalCurr; - CHECK(cIdx < 0 || cIdx >= numPicTotalCurr, "Invalid state"); - m_apcRefPicList[REF_PIC_LIST_0][rIdx] = rpsCurrList0[ cIdx ]; - m_bIsUsedAsLongTerm[REF_PIC_LIST_0][rIdx] = ( cIdx >= NumPicStCurr0 + NumPicStCurr1 ); - } - if ( m_eSliceType != B_SLICE ) - { - m_aiNumRefIdx[REF_PIC_LIST_1] = 0; - ::memset( m_apcRefPicList[REF_PIC_LIST_1], 0, sizeof(m_apcRefPicList[REF_PIC_LIST_1])); - } - else - { - for (int rIdx = 0; rIdx < m_aiNumRefIdx[REF_PIC_LIST_1]; rIdx ++) - { - cIdx = m_RefPicListModification.getRefPicListModificationFlagL1() ? m_RefPicListModification.getRefPicSetIdxL1(rIdx) : rIdx % numPicTotalCurr; - CHECK(cIdx < 0 || cIdx >= numPicTotalCurr, "Invalid state"); - m_apcRefPicList[REF_PIC_LIST_1][rIdx] = rpsCurrList1[ cIdx ]; - m_bIsUsedAsLongTerm[REF_PIC_LIST_1][rIdx] = ( cIdx >= NumPicStCurr0 + NumPicStCurr1 ); + pcRefPic = xGetRefPic( rcListPic, getPOC(), refLayerIdx ); + pcRefPic->longTerm = true; } - } - // For generalized B - // note: maybe not existed case (always L0 is copied to L1 if L1 is empty) - if( bCopyL0toL1ErrorCase && isInterB() && getNumRefIdx(REF_PIC_LIST_1) == 0) - { - int iNumRefIdx = getNumRefIdx(REF_PIC_LIST_0); - setNumRefIdx( REF_PIC_LIST_1, iNumRefIdx ); - - for (int iRefIdx = 0; iRefIdx < iNumRefIdx; iRefIdx++) + else + if (!m_pRPL1->isRefPicLongterm(ii)) { - m_apcRefPicList[REF_PIC_LIST_1][iRefIdx] = m_apcRefPicList[REF_PIC_LIST_0] [iRefIdx]; + pcRefPic = xGetRefPic( rcListPic, getPOC() - m_pRPL1->getRefPicIdentifier( ii ), m_pcPic->layerId ); + pcRefPic->longTerm = false; } - } -} - - -int Slice::getNumRpsCurrTempList() const -{ - int numRpsCurrTempList = 0; - - if (m_eSliceType == I_SLICE) - { - return 0; - } - for(uint32_t i=0; i < m_pRPS->getNumberOfNegativePictures()+ m_pRPS->getNumberOfPositivePictures() + m_pRPS->getNumberOfLongtermPictures(); i++) - { - if(m_pRPS->getUsed(i)) + else { - numRpsCurrTempList++; + int pocBits = getSPS()->getBitsForPOC(); + int pocMask = (1 << pocBits) - 1; + int ltrpPoc = m_pRPL1->getRefPicIdentifier(ii) & pocMask; + ltrpPoc += m_localRPL1.getDeltaPocMSBPresentFlag(ii) ? (pocMask + 1) * m_localRPL1.getDeltaPocMSBCycleLT(ii) : 0; + pcRefPic = xGetLongTermRefPic( rcListPic, ltrpPoc, m_localRPL1.getDeltaPocMSBPresentFlag( ii ), m_pcPic->layerId ); + pcRefPic->longTerm = true; } + pcRefPic->extendPicBorder(); + m_apcRefPicList[REF_PIC_LIST_1][ii] = pcRefPic; + m_bIsUsedAsLongTerm[REF_PIC_LIST_1][ii] = pcRefPic->longTerm; } - return numRpsCurrTempList; } void Slice::initEqualRef() @@ -589,50 +501,108 @@ void Slice::checkColRefIdx(uint32_t curSliceSegmentIdx, const Picture* pic) } } -void Slice::checkCRA(const ReferencePictureSet *pReferencePictureSet, int& pocCRA, NalUnitType& associatedIRAPType, PicList& rcListPic) +void Slice::checkCRA(const ReferencePictureList *pRPL0, const ReferencePictureList *pRPL1, int& pocCRA, NalUnitType& associatedIRAPType, PicList& rcListPic) { - for(int i = 0; i < pReferencePictureSet->getNumberOfNegativePictures()+pReferencePictureSet->getNumberOfPositivePictures(); i++) + if (pocCRA < MAX_UINT && getPOC() > pocCRA) { - if(pocCRA < MAX_UINT && getPOC() > pocCRA) + uint32_t numRefPic = pRPL0->getNumberOfShorttermPictures() + pRPL0->getNumberOfLongtermPictures(); + for (int i = 0; i < numRefPic; i++) { - CHECK(getPOC()+pReferencePictureSet->getDeltaPOC(i) < pocCRA, "Invalid state"); + if (!pRPL0->isRefPicLongterm(i)) + { + CHECK(getPOC() - pRPL0->getRefPicIdentifier(i) < pocCRA, "Invalid state"); + } + else + { + CHECK( xGetLongTermRefPic( rcListPic, pRPL0->getRefPicIdentifier( i ), pRPL0->getDeltaPocMSBPresentFlag( i ), m_pcPic->layerId )->getPOC() < pocCRA, "Invalid state" ); + } } - } - for(int i = pReferencePictureSet->getNumberOfNegativePictures()+pReferencePictureSet->getNumberOfPositivePictures(); i < pReferencePictureSet->getNumberOfPictures(); i++) - { - if(pocCRA < MAX_UINT && getPOC() > pocCRA) + numRefPic = pRPL1->getNumberOfShorttermPictures() + pRPL1->getNumberOfLongtermPictures(); + for (int i = 0; i < numRefPic; i++) { - if (!pReferencePictureSet->getCheckLTMSBPresent(i)) + if (!pRPL1->isRefPicLongterm(i)) { - CHECK(xGetLongTermRefPic(rcListPic, pReferencePictureSet->getPOC(i), false)->getPOC() < pocCRA, "Invalid state"); + CHECK(getPOC() - pRPL1->getRefPicIdentifier(i) < pocCRA, "Invalid state"); } - else + else if( !pRPL1->isInterLayerRefPic( i ) ) { - CHECK(pReferencePictureSet->getPOC(i) < pocCRA, "Invalid state"); + CHECK( xGetLongTermRefPic( rcListPic, pRPL1->getRefPicIdentifier( i ), pRPL1->getDeltaPocMSBPresentFlag( i ), m_pcPic->layerId )->getPOC() < pocCRA, "Invalid state" ); } } } - if ( getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_W_RADL || getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_N_LP ) // IDR picture found + if (getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_W_RADL || getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_N_LP) // IDR picture found { pocCRA = getPOC(); associatedIRAPType = getNalUnitType(); } - else if ( getNalUnitType() == NAL_UNIT_CODED_SLICE_CRA ) // CRA picture found + else if (getNalUnitType() == NAL_UNIT_CODED_SLICE_CRA) // CRA picture found { pocCRA = getPOC(); associatedIRAPType = getNalUnitType(); } -#if !JVET_M0101_HLS - else if ( getNalUnitType() == NAL_UNIT_CODED_SLICE_BLA_W_LP - || getNalUnitType() == NAL_UNIT_CODED_SLICE_BLA_W_RADL - || getNalUnitType() == NAL_UNIT_CODED_SLICE_BLA_N_LP ) // BLA picture found +} + +void Slice::checkSTSA(PicList& rcListPic) +{ + int ii; + Picture* pcRefPic = NULL; + int numOfActiveRef = getNumRefIdx(REF_PIC_LIST_0); + + for (ii = 0; ii < numOfActiveRef; ii++) { - pocCRA = getPOC(); - associatedIRAPType = getNalUnitType(); + pcRefPic = m_apcRefPicList[REF_PIC_LIST_0][ii]; + + // Checking this: "When the current picture is an STSA picture, there shall be no active entry in RefPicList[ 0 ] or RefPicList[ 1 ] that has TemporalId equal to that of the current picture" + if (getNalUnitType() == NAL_UNIT_CODED_SLICE_STSA) + { + CHECK(pcRefPic->layer == m_uiTLayer, "When the current picture is an STSA picture, there shall be no active entry in the RPL that has TemporalId equal to that of the current picture"); + } + + // Checking this: "When the current picture is a picture that follows, in decoding order, an STSA picture that has TemporalId equal to that of the current picture, there shall be no + // picture that has TemporalId equal to that of the current picture included as an active entry in RefPicList[ 0 ] or RefPicList[ 1 ] that precedes the STSA picture in decoding order." + CHECK(pcRefPic->subLayerNonReferencePictureDueToSTSA, "The RPL of the current picture contains a picture that is not allowed in this temporal layer due to an earlier STSA picture"); + } + + numOfActiveRef = getNumRefIdx(REF_PIC_LIST_1); + for (ii = 0; ii < numOfActiveRef; ii++) + { + pcRefPic = m_apcRefPicList[REF_PIC_LIST_1][ii]; + + // Checking this: "When the current picture is an STSA picture, there shall be no active entry in RefPicList[ 0 ] or RefPicList[ 1 ] that has TemporalId equal to that of the current picture" + if (getNalUnitType() == NAL_UNIT_CODED_SLICE_STSA) + { + CHECK(pcRefPic->layer == m_uiTLayer, "When the current picture is an STSA picture, there shall be no active entry in the RPL that has TemporalId equal to that of the current picture"); + } + + // Checking this: "When the current picture is a picture that follows, in decoding order, an STSA picture that has TemporalId equal to that of the current picture, there shall be no + // picture that has TemporalId equal to that of the current picture included as an active entry in RefPicList[ 0 ] or RefPicList[ 1 ] that precedes the STSA picture in decoding order." + CHECK(pcRefPic->subLayerNonReferencePictureDueToSTSA, "The active RPL part of the current picture contains a picture that is not allowed in this temporal layer due to an earlier STSA picture"); + } + + // If the current picture is an STSA picture, make all reference pictures in the DPB with temporal + // id equal to the temproal id of the current picture sub-layer non-reference pictures. The flag + // subLayerNonReferencePictureDueToSTSA equal to true means that the picture may not be used for + // reference by a picture that follows the current STSA picture in decoding order + if (getNalUnitType() == NAL_UNIT_CODED_SLICE_STSA) + { + PicList::iterator iterPic = rcListPic.begin(); + while (iterPic != rcListPic.end()) + { + pcRefPic = *(iterPic++); + if (!pcRefPic->referenced || pcRefPic->getPOC() == m_iPOC) + { + continue; + } + + if (pcRefPic->layer == m_uiTLayer) + { + pcRefPic->subLayerNonReferencePictureDueToSTSA = true; + } + } } -#endif } + /** Function for marking the reference pictures when an IDR/CRA/CRANT/BLA/BLANT is encountered. * \param pocCRA POC of the CRA/CRANT/BLA/BLANT picture * \param bRefreshPending flag indicating if a deferred decoding refresh is pending @@ -656,16 +626,8 @@ void Slice::decodingRefreshMarking(int& pocCRA, bool& bRefreshPending, PicList& Picture* rpcPic; int pocCurr = getPOC(); -#if !JVET_M0101_HLS - if ( getNalUnitType() == NAL_UNIT_CODED_SLICE_BLA_W_LP - || getNalUnitType() == NAL_UNIT_CODED_SLICE_BLA_W_RADL - || getNalUnitType() == NAL_UNIT_CODED_SLICE_BLA_N_LP - || getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_W_RADL - || getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_N_LP ) // IDR or BLA picture -#else if ( getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_W_RADL || getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_N_LP) // IDR picture -#endif { // mark all pictures as not used for reference PicList::iterator iterPic = rcListPic.begin(); @@ -679,14 +641,6 @@ void Slice::decodingRefreshMarking(int& pocCRA, bool& bRefreshPending, PicList& } iterPic++; } -#if !JVET_M0101_HLS - if ( getNalUnitType() == NAL_UNIT_CODED_SLICE_BLA_W_LP - || getNalUnitType() == NAL_UNIT_CODED_SLICE_BLA_W_RADL - || getNalUnitType() == NAL_UNIT_CODED_SLICE_BLA_N_LP ) - { - pocCRA = pocCurr; - } -#endif if (bEfficientFieldIRAPEnabled) { bRefreshPending = true; @@ -776,6 +730,8 @@ void Slice::copySliceInfo(Slice *pSrc, bool cpyAlmostAll) { m_iSliceChromaQpDelta[component] = pSrc->m_iSliceChromaQpDelta[component]; } + m_iSliceChromaQpDelta[JOINT_CbCr] = pSrc->m_iSliceChromaQpDelta[JOINT_CbCr]; + for (i = 0; i < NUM_REF_PIC_LIST_01; i++) { for (j = 0; j < MAX_NUM_REF; j++) @@ -789,11 +745,13 @@ void Slice::copySliceInfo(Slice *pSrc, bool cpyAlmostAll) if( cpyAlmostAll ) m_iDepth = pSrc->m_iDepth; // access channel - if( cpyAlmostAll ) m_pRPS = pSrc->m_pRPS; + if (cpyAlmostAll) m_pRPL0 = pSrc->m_pRPL0; + if (cpyAlmostAll) m_pRPL1 = pSrc->m_pRPL1; m_iLastIDR = pSrc->m_iLastIDR; if( cpyAlmostAll ) m_pcPic = pSrc->m_pcPic; + m_pcPicHeader = pSrc->m_pcPicHeader; m_colFromL0Flag = pSrc->m_colFromL0Flag; m_colRefIdx = pSrc->m_colRefIdx; @@ -813,22 +771,9 @@ void Slice::copySliceInfo(Slice *pSrc, bool cpyAlmostAll) m_uiTLayer = pSrc->m_uiTLayer; m_bTLayerSwitchingFlag = pSrc->m_bTLayerSwitchingFlag; - m_sliceMode = pSrc->m_sliceMode; - m_sliceArgument = pSrc->m_sliceArgument; - m_sliceCurStartCtuTsAddr = pSrc->m_sliceCurStartCtuTsAddr; - m_sliceCurEndCtuTsAddr = pSrc->m_sliceCurEndCtuTsAddr; + m_sliceMap = pSrc->m_sliceMap; m_independentSliceIdx = pSrc->m_independentSliceIdx; -#if HEVC_DEPENDENT_SLICES - m_sliceSegmentIdx = pSrc->m_sliceSegmentIdx; - m_sliceSegmentMode = pSrc->m_sliceSegmentMode; - m_sliceSegmentArgument = pSrc->m_sliceSegmentArgument; - m_sliceSegmentCurStartCtuTsAddr = pSrc->m_sliceSegmentCurStartCtuTsAddr; - m_sliceSegmentCurEndCtuTsAddr = pSrc->m_sliceSegmentCurEndCtuTsAddr; -#endif m_nextSlice = pSrc->m_nextSlice; -#if HEVC_DEPENDENT_SLICES - m_nextSliceSegment = pSrc->m_nextSliceSegment; -#endif m_clpRngs = pSrc->m_clpRngs; m_pendingRasInit = pSrc->m_pendingRasInit; @@ -846,28 +791,21 @@ void Slice::copySliceInfo(Slice *pSrc, bool cpyAlmostAll) } m_cabacInitFlag = pSrc->m_cabacInitFlag; + memcpy(m_alfApss, pSrc->m_alfApss, sizeof(m_alfApss)); // this might be quite unsafe + memcpy( m_tileGroupAlfEnabledFlag, pSrc->m_tileGroupAlfEnabledFlag, sizeof(m_tileGroupAlfEnabledFlag)); + m_tileGroupNumAps = pSrc->m_tileGroupNumAps; + m_tileGroupLumaApsId = pSrc->m_tileGroupLumaApsId; + m_tileGroupChromaApsId = pSrc->m_tileGroupChromaApsId; + m_disableSATDForRd = pSrc->m_disableSATDForRd; - m_bLMvdL1Zero = pSrc->m_bLMvdL1Zero; - m_LFCrossSliceBoundaryFlag = pSrc->m_LFCrossSliceBoundaryFlag; - m_enableTMVPFlag = pSrc->m_enableTMVPFlag; - m_maxNumMergeCand = pSrc->m_maxNumMergeCand; - m_maxNumAffineMergeCand = pSrc->m_maxNumAffineMergeCand; - m_disFracMMVD = pSrc->m_disFracMMVD; if( cpyAlmostAll ) m_encCABACTableIdx = pSrc->m_encCABACTableIdx; - m_splitConsOverrideFlag = pSrc->m_splitConsOverrideFlag; - m_uiMinQTSize = pSrc->m_uiMinQTSize; - m_uiMaxBTDepth = pSrc->m_uiMaxBTDepth; - m_uiMaxTTSize = pSrc->m_uiMaxTTSize; - m_uiMinQTSizeIChroma = pSrc->m_uiMinQTSizeIChroma; - m_uiMaxBTDepthIChroma = pSrc->m_uiMaxBTDepthIChroma; - m_uiMaxBTSizeIChroma = pSrc->m_uiMaxBTSizeIChroma; - m_uiMaxTTSizeIChroma = pSrc->m_uiMaxTTSizeIChroma; - m_uiMaxBTSize = pSrc->m_uiMaxBTSize; - - m_depQuantEnabledFlag = pSrc->m_depQuantEnabledFlag; - m_signDataHidingEnabledFlag = pSrc->m_signDataHidingEnabledFlag; - - m_sliceReshapeInfo = pSrc->m_sliceReshapeInfo; + for( int i = 0; i < NUM_REF_PIC_LIST_01; i ++ ) + { + for (int j = 0; j < MAX_NUM_REF_PICS; j ++ ) + { + m_scalingRatio[i][j] = pSrc->m_scalingRatio[i][j]; + } + } } @@ -899,7 +837,7 @@ bool Slice::isStepwiseTemporalLayerSwitchingPointCandidate(PicList& rcListPic) c while ( iterPic != rcListPic.end()) { const Picture* pcPic = *(iterPic++); - if( pcPic->referenced && pcPic->usedByCurr && pcPic->poc != getPOC()) + if( pcPic->referenced && pcPic->poc != getPOC()) { if( pcPic->layer >= getTLayer()) { @@ -919,59 +857,25 @@ void Slice::checkLeadingPictureRestrictions(PicList& rcListPic) const if(this->getAssociatedIRAPPOC() > this->getPOC()) { // Do not check IRAP pictures since they may get a POC lower than their associated IRAP -#if !JVET_M0101_HLS - if(nalUnitType < NAL_UNIT_CODED_SLICE_BLA_W_LP || - nalUnitType > NAL_UNIT_RESERVED_IRAP_VCL23) -#else if (nalUnitType < NAL_UNIT_CODED_SLICE_IDR_W_RADL || - nalUnitType > NAL_UNIT_RESERVED_IRAP_VCL13) -#endif + nalUnitType > NAL_UNIT_CODED_SLICE_CRA) { -#if !JVET_M0101_HLS - CHECK( nalUnitType != NAL_UNIT_CODED_SLICE_RASL_N && - nalUnitType != NAL_UNIT_CODED_SLICE_RASL_R && - nalUnitType != NAL_UNIT_CODED_SLICE_RADL_N && - nalUnitType != NAL_UNIT_CODED_SLICE_RADL_R, "Invalid NAL unit type"); -#else CHECK(nalUnitType != NAL_UNIT_CODED_SLICE_RASL && nalUnitType != NAL_UNIT_CODED_SLICE_RADL, "Invalid NAL unit type"); -#endif } } // When a picture is a trailing picture, it shall not be a RADL or RASL picture. if(this->getAssociatedIRAPPOC() < this->getPOC()) { -#if !JVET_M0101_HLS - CHECK( nalUnitType == NAL_UNIT_CODED_SLICE_RASL_N || - nalUnitType == NAL_UNIT_CODED_SLICE_RASL_R || - nalUnitType == NAL_UNIT_CODED_SLICE_RADL_N || - nalUnitType == NAL_UNIT_CODED_SLICE_RADL_R, "Invalid NAL unit type" ); -#else CHECK(nalUnitType == NAL_UNIT_CODED_SLICE_RASL || nalUnitType == NAL_UNIT_CODED_SLICE_RADL, "Invalid NAL unit type"); -#endif } -#if !JVET_M0101_HLS - // No RASL pictures shall be present in the bitstream that are associated - // with a BLA picture having nal_unit_type equal to BLA_W_RADL or BLA_N_LP. - if(nalUnitType == NAL_UNIT_CODED_SLICE_RASL_N || - nalUnitType == NAL_UNIT_CODED_SLICE_RASL_R) - { - CHECK (this->getAssociatedIRAPType() == NAL_UNIT_CODED_SLICE_BLA_W_RADL || - this->getAssociatedIRAPType() == NAL_UNIT_CODED_SLICE_BLA_N_LP, "Invalid NAL unit type"); - } -#endif // No RASL pictures shall be present in the bitstream that are associated with // an IDR picture. -#if !JVET_M0101_HLS - if(nalUnitType == NAL_UNIT_CODED_SLICE_RASL_N || - nalUnitType == NAL_UNIT_CODED_SLICE_RASL_R) -#else if (nalUnitType == NAL_UNIT_CODED_SLICE_RASL) -#endif { CHECK( this->getAssociatedIRAPType() == NAL_UNIT_CODED_SLICE_IDR_N_LP || this->getAssociatedIRAPType() == NAL_UNIT_CODED_SLICE_IDR_W_RADL, "Invalid NAL unit type"); @@ -980,23 +884,14 @@ void Slice::checkLeadingPictureRestrictions(PicList& rcListPic) const // No RADL pictures shall be present in the bitstream that are associated with // a BLA picture having nal_unit_type equal to BLA_N_LP or that are associated // with an IDR picture having nal_unit_type equal to IDR_N_LP. -#if !JVET_M0101_HLS - if(nalUnitType == NAL_UNIT_CODED_SLICE_RADL_N || - nalUnitType == NAL_UNIT_CODED_SLICE_RADL_R) -#else if (nalUnitType == NAL_UNIT_CODED_SLICE_RADL) -#endif { -#if !JVET_M0101_HLS - CHECK (this->getAssociatedIRAPType() == NAL_UNIT_CODED_SLICE_BLA_N_LP || - this->getAssociatedIRAPType() == NAL_UNIT_CODED_SLICE_IDR_N_LP, "Invalid NAL unit type"); -#else CHECK (this->getAssociatedIRAPType() == NAL_UNIT_CODED_SLICE_IDR_N_LP, "Invalid NAL unit type"); -#endif } // loop through all pictures in the reference picture buffer PicList::iterator iterPic = rcListPic.begin(); + int numLeadingPicsFound = 0; while ( iterPic != rcListPic.end()) { Picture* pcPic = *(iterPic++); @@ -1013,20 +908,11 @@ void Slice::checkLeadingPictureRestrictions(PicList& rcListPic) const // Any picture that has PicOutputFlag equal to 1 that precedes an IRAP picture // in decoding order shall precede the IRAP picture in output order. // (Note that any picture following in output order would be present in the DPB) - if(pcSlice->getPicOutputFlag() == 1 && !this->getNoOutputPriorPicsFlag()) - { -#if !JVET_M0101_HLS - if(nalUnitType == NAL_UNIT_CODED_SLICE_BLA_N_LP || - nalUnitType == NAL_UNIT_CODED_SLICE_BLA_W_LP || - nalUnitType == NAL_UNIT_CODED_SLICE_BLA_W_RADL || - nalUnitType == NAL_UNIT_CODED_SLICE_CRA || - nalUnitType == NAL_UNIT_CODED_SLICE_IDR_N_LP || - nalUnitType == NAL_UNIT_CODED_SLICE_IDR_W_RADL) -#else + if(pcSlice->getPicHeader()->getPicOutputFlag() == 1 && !this->getPicHeader()->getNoOutputOfPriorPicsFlag()) + { if (nalUnitType == NAL_UNIT_CODED_SLICE_CRA || nalUnitType == NAL_UNIT_CODED_SLICE_IDR_N_LP || nalUnitType == NAL_UNIT_CODED_SLICE_IDR_W_RADL) -#endif { CHECK(pcPic->poc >= this->getPOC(), "Invalid POC"); } @@ -1035,14 +921,9 @@ void Slice::checkLeadingPictureRestrictions(PicList& rcListPic) const // Any picture that has PicOutputFlag equal to 1 that precedes an IRAP picture // in decoding order shall precede any RADL picture associated with the IRAP // picture in output order. - if(pcSlice->getPicOutputFlag() == 1) + if(pcSlice->getPicHeader()->getPicOutputFlag() == 1) { -#if !JVET_M0101_HLS - if((nalUnitType == NAL_UNIT_CODED_SLICE_RADL_N || - nalUnitType == NAL_UNIT_CODED_SLICE_RADL_R)) -#else if (nalUnitType == NAL_UNIT_CODED_SLICE_RADL) -#endif { // rpcPic precedes the IRAP in decoding order if(this->getAssociatedIRAPPOC() > pcSlice->getAssociatedIRAPPOC()) @@ -1058,49 +939,27 @@ void Slice::checkLeadingPictureRestrictions(PicList& rcListPic) const // When a picture is a leading picture, it shall precede, in decoding order, // all trailing pictures that are associated with the same IRAP picture. -#if !JVET_M0101_HLS - if (nalUnitType == NAL_UNIT_CODED_SLICE_RASL_N || - nalUnitType == NAL_UNIT_CODED_SLICE_RASL_R || - nalUnitType == NAL_UNIT_CODED_SLICE_RADL_N || - nalUnitType == NAL_UNIT_CODED_SLICE_RADL_R) -#else - if (nalUnitType == NAL_UNIT_CODED_SLICE_RASL || - nalUnitType == NAL_UNIT_CODED_SLICE_RADL ) -#endif + if ((nalUnitType == NAL_UNIT_CODED_SLICE_RASL || nalUnitType == NAL_UNIT_CODED_SLICE_RADL) && + (pcSlice->getNalUnitType() != NAL_UNIT_CODED_SLICE_RASL && pcSlice->getNalUnitType() != NAL_UNIT_CODED_SLICE_RADL) ) + { + if (pcSlice->getAssociatedIRAPPOC() == this->getAssociatedIRAPPOC()) { - if(pcSlice->getAssociatedIRAPPOC() == this->getAssociatedIRAPPOC()) - { - // rpcPic is a picture that preceded the leading in decoding order since it exist in the DPB - // rpcPic would violate the constraint if it was a trailing picture - CHECK( pcPic->poc > this->getAssociatedIRAPPOC(), "Invalid POC"); - } + numLeadingPicsFound++; + int limitNonLP = 0; + if (pcSlice->getSPS()->getVuiParameters() && pcSlice->getSPS()->getVuiParameters()->getFieldSeqFlag()) + limitNonLP = 1; + CHECK(pcPic->poc > this->getAssociatedIRAPPOC() && numLeadingPicsFound > limitNonLP, "Invalid POC"); } + } // Any RASL picture associated with a CRA or BLA picture shall precede any // RADL picture associated with the CRA or BLA picture in output order -#if !JVET_M0101_HLS - if(nalUnitType == NAL_UNIT_CODED_SLICE_RASL_N || - nalUnitType == NAL_UNIT_CODED_SLICE_RASL_R) -#else if (nalUnitType == NAL_UNIT_CODED_SLICE_RASL) -#endif { -#if !JVET_M0101_HLS - if((this->getAssociatedIRAPType() == NAL_UNIT_CODED_SLICE_BLA_N_LP || - this->getAssociatedIRAPType() == NAL_UNIT_CODED_SLICE_BLA_W_LP || - this->getAssociatedIRAPType() == NAL_UNIT_CODED_SLICE_BLA_W_RADL || - this->getAssociatedIRAPType() == NAL_UNIT_CODED_SLICE_CRA) && -#else if ((this->getAssociatedIRAPType() == NAL_UNIT_CODED_SLICE_CRA) && -#endif this->getAssociatedIRAPPOC() == pcSlice->getAssociatedIRAPPOC()) { -#if !JVET_M0101_HLS - if(pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_RADL_N || - pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_RADL_R) -#else if (pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_RADL) -#endif { CHECK( pcPic->poc <= this->getPOC(), "Invalid POC"); } @@ -1109,22 +968,12 @@ void Slice::checkLeadingPictureRestrictions(PicList& rcListPic) const // Any RASL picture associated with a CRA picture shall follow, in output // order, any IRAP picture that precedes the CRA picture in decoding order. -#if !JVET_M0101_HLS - if(nalUnitType == NAL_UNIT_CODED_SLICE_RASL_N || - nalUnitType == NAL_UNIT_CODED_SLICE_RASL_R) -#else if (nalUnitType == NAL_UNIT_CODED_SLICE_RASL) -#endif { if(this->getAssociatedIRAPType() == NAL_UNIT_CODED_SLICE_CRA) { - if(pcSlice->getPOC() < this->getAssociatedIRAPPOC() && + if(pcSlice->getPOC() < this->getAssociatedIRAPPOC() && ( -#if !JVET_M0101_HLS - pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_BLA_N_LP || - pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_BLA_W_LP || - pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_BLA_W_RADL || -#endif pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_N_LP || pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_W_RADL || pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_CRA)) @@ -1138,455 +987,409 @@ void Slice::checkLeadingPictureRestrictions(PicList& rcListPic) const -/** Function for applying picture marking based on the Reference Picture Set in pReferencePictureSet. -*/ -void Slice::applyReferencePictureSet( PicList& rcListPic, const ReferencePictureSet *pReferencePictureSet) const + +//Function for applying picture marking based on the Reference Picture List +void Slice::applyReferencePictureListBasedMarking( PicList& rcListPic, const ReferencePictureList *pRPL0, const ReferencePictureList *pRPL1, const int layerId ) const { int i, isReference; - checkLeadingPictureRestrictions(rcListPic); + bool isNeedToCheck = (this->getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_N_LP || this->getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_W_RADL) ? false : true; + // loop through all pictures in the reference picture buffer PicList::iterator iterPic = rcListPic.begin(); - while ( iterPic != rcListPic.end()) + while (iterPic != rcListPic.end()) { Picture* pcPic = *(iterPic++); - if( ! pcPic->referenced) - { + if (!pcPic->referenced) continue; - } isReference = 0; // loop through all pictures in the Reference Picture Set // to see if the picture should be kept as reference picture - for(i=0;i<pReferencePictureSet->getNumberOfPositivePictures()+pReferencePictureSet->getNumberOfNegativePictures();i++) + for( i = 0; isNeedToCheck && !isReference && i < pRPL0->getNumberOfShorttermPictures() + pRPL0->getNumberOfLongtermPictures() + pRPL0->getNumberOfInterLayerPictures(); i++ ) { - if( ! pcPic->longTerm && pcPic->poc == this->getPOC() + pReferencePictureSet->getDeltaPOC(i)) + if( pRPL0->isInterLayerRefPic( i ) ) + { + // Diagonal inter-layer prediction is not allowed + CHECK( pRPL0->getRefPicIdentifier( i ), "ILRP identifier should be 0" ); + + if( pcPic->poc == m_iPOC ) + { + isReference = 1; + pcPic->longTerm = true; + } + } + else if (pcPic->layerId == layerId) + { + if (!(pRPL0->isRefPicLongterm(i))) + { + if (pcPic->poc == this->getPOC() - pRPL0->getRefPicIdentifier(i)) + { + isReference = 1; + pcPic->longTerm = false; + } + } + else { - isReference = 1; - pcPic->usedByCurr = pReferencePictureSet->getUsed(i); - pcPic->longTerm = false; + int pocCycle = 1 << (pcPic->cs->sps->getBitsForPOC()); + int curPoc = pcPic->poc & (pocCycle - 1); + if (pcPic->longTerm && curPoc == pRPL0->getRefPicIdentifier(i)) + { + isReference = 1; + pcPic->longTerm = true; + } + } } } - for(;i<pReferencePictureSet->getNumberOfPictures();i++) + + for( i = 0; isNeedToCheck && !isReference && i < pRPL1->getNumberOfShorttermPictures() + pRPL1->getNumberOfLongtermPictures() + pRPL1->getNumberOfInterLayerPictures(); i++ ) { - if(pReferencePictureSet->getCheckLTMSBPresent(i)==true) + if( pRPL1->isInterLayerRefPic( i ) ) + { + // Diagonal inter-layer prediction is not allowed + CHECK( pRPL1->getRefPicIdentifier( i ), "ILRP identifier should be 0" ); + + if( pcPic->poc == m_iPOC ) + { + isReference = 1; + pcPic->longTerm = true; + } + } + else if( pcPic->layerId == layerId ) { - if( pcPic->longTerm && pcPic->poc == pReferencePictureSet->getPOC(i)) + if (!(pRPL1->isRefPicLongterm(i))) + { + if (pcPic->poc == this->getPOC() - pRPL1->getRefPicIdentifier(i)) { isReference = 1; - pcPic->usedByCurr = pReferencePictureSet->getUsed(i); + pcPic->longTerm = false; } } else { - int pocCycle = 1 << pcPic->cs->sps->getBitsForPOC(); - int curPoc = pcPic->poc & (pocCycle-1); - int refPoc = pReferencePictureSet->getPOC(i) & (pocCycle-1); - if( pcPic->longTerm && curPoc == refPoc) + int pocCycle = 1 << (pcPic->cs->sps->getBitsForPOC()); + int curPoc = pcPic->poc & (pocCycle - 1); + if (pcPic->longTerm && curPoc == pRPL1->getRefPicIdentifier(i)) { isReference = 1; - pcPic->usedByCurr = pReferencePictureSet->getUsed(i); + pcPic->longTerm = true; } } + } } // mark the picture as "unused for reference" if it is not in - // the Reference Picture Set - if( pcPic->poc != this->getPOC() && isReference == 0) + // the Reference Picture List + if( pcPic->layerId == layerId && pcPic->poc != m_iPOC && isReference == 0 ) { pcPic->referenced = false; - pcPic->usedByCurr = false; - pcPic->longTerm = false; - pcPic->getHashMap()->clearAll(); + pcPic->longTerm = false; } // sanity checks - if( pcPic->referenced) + if (pcPic->referenced) { //check that pictures of higher temporal layers are not used - CHECK( pcPic->usedByCurr && !(pcPic->layer<=this->getTLayer()), "Invalid state"); - //check that pictures of higher or equal temporal layer are not in the RPS if the current picture is a TSA picture -#if !JVET_M0101_HLS - if( this->getNalUnitType() == NAL_UNIT_CODED_SLICE_TSA_R || this->getNalUnitType() == NAL_UNIT_CODED_SLICE_TSA_N) - { - CHECK( !(pcPic->layer<this->getTLayer()), "Invalid state"); - } -#endif -#if !JVET_M0101_HLS - //check that pictures marked as temporal layer non-reference pictures are not used for reference - if( pcPic->poc != this->getPOC() && (pcPic->layer == this->getTLayer())) - { - CHECK( pcPic->usedByCurr && pcPic->slices[0]->getTemporalLayerNonReferenceFlag(), "Invalid state"); - } -#endif + CHECK(pcPic->usedByCurr && !(pcPic->layer <= this->getTLayer()), "Invalid state"); } } } -/** Function for applying picture marking based on the Reference Picture Set in pReferencePictureSet. -*/ -int Slice::checkThatAllRefPicsAreAvailable( PicList& rcListPic, const ReferencePictureSet *pReferencePictureSet, bool printErrors, int pocRandomAccess, bool bUseRecoveryPoint) const +int Slice::checkThatAllRefPicsAreAvailable(PicList& rcListPic, const ReferencePictureList *pRPL, int rplIdx, bool printErrors) const { - int atLeastOneUnabledByRecoveryPoint = 0; - int atLeastOneFlushedByPreviousIDR = 0; Picture* rpcPic; - int i, isAvailable; - int atLeastOneLost = 0; - int atLeastOneRemoved = 0; - int iPocLost = 0; + int isAvailable = 0; + int notPresentPoc = 0; + + if (this->isIDRorBLA()) return 0; //Assume that all pic in the DPB will be flushed anyway so no need to check. - // loop through all long-term pictures in the Reference Picture Set - // to see if the picture should be kept as reference picture - for(i=pReferencePictureSet->getNumberOfNegativePictures()+pReferencePictureSet->getNumberOfPositivePictures();i<pReferencePictureSet->getNumberOfPictures();i++) + int numberOfPictures = pRPL->getNumberOfLongtermPictures() + pRPL->getNumberOfShorttermPictures() + pRPL->getNumberOfInterLayerPictures(); + //Check long term ref pics + for (int ii = 0; pRPL->getNumberOfLongtermPictures() > 0 && ii < numberOfPictures; ii++) { + if( !pRPL->isRefPicLongterm( ii ) || pRPL->isInterLayerRefPic( ii ) ) + { + continue; + } + + notPresentPoc = pRPL->getRefPicIdentifier(ii); isAvailable = 0; - // loop through all pictures in the reference picture buffer PicList::iterator iterPic = rcListPic.begin(); - while ( iterPic != rcListPic.end()) + while (iterPic != rcListPic.end()) { rpcPic = *(iterPic++); - if(pReferencePictureSet->getCheckLTMSBPresent(i)==true) + int pocCycle = 1 << (rpcPic->cs->sps->getBitsForPOC()); + int curPoc = rpcPic->getPOC() & (pocCycle - 1); + int refPoc = pRPL->getRefPicIdentifier(ii) & (pocCycle - 1); + if (rpcPic->longTerm && curPoc == refPoc && rpcPic->referenced) { - if(rpcPic->longTerm && (rpcPic->getPOC()) == pReferencePictureSet->getPOC(i) && rpcPic->referenced) - { - if(bUseRecoveryPoint && this->getPOC() > pocRandomAccess && this->getPOC() + pReferencePictureSet->getDeltaPOC(i) < pocRandomAccess) - { - isAvailable = 0; - } - else - { - isAvailable = 1; - } - } + isAvailable = 1; + break; } - else + } + // if there was no such long-term check the short terms + if (!isAvailable) + { + iterPic = rcListPic.begin(); + while (iterPic != rcListPic.end()) { - int pocCycle = 1<<rpcPic->cs->sps->getBitsForPOC(); - int curPoc = rpcPic->getPOC() & (pocCycle-1); - int refPoc = pReferencePictureSet->getPOC(i) & (pocCycle-1); - if(rpcPic->longTerm && curPoc == refPoc && rpcPic->referenced) + rpcPic = *(iterPic++); + int pocCycle = 1 << (rpcPic->cs->sps->getBitsForPOC()); + int curPoc = rpcPic->getPOC() & (pocCycle - 1); + int refPoc = pRPL->getRefPicIdentifier(ii) & (pocCycle - 1); + if (!rpcPic->longTerm && curPoc == refPoc && rpcPic->referenced) { - if(bUseRecoveryPoint && this->getPOC() > pocRandomAccess && this->getPOC() + pReferencePictureSet->getDeltaPOC(i) < pocRandomAccess) - { - isAvailable = 0; - } - else - { - isAvailable = 1; - } + isAvailable = 1; + rpcPic->longTerm = true; + break; } } } - // if there was no such long-term check the short terms - if(!isAvailable) + if (!isAvailable) { - iterPic = rcListPic.begin(); - while ( iterPic != rcListPic.end()) + if (printErrors) { - rpcPic = *(iterPic++); - - int pocCycle = 1 << rpcPic->cs->sps->getBitsForPOC(); - int curPoc = rpcPic->getPOC(); - int refPoc = pReferencePictureSet->getPOC(i); - if (!pReferencePictureSet->getCheckLTMSBPresent(i)) - { - curPoc = curPoc & (pocCycle - 1); - refPoc = refPoc & (pocCycle - 1); - } - - if (rpcPic->referenced && curPoc == refPoc) - { - if(bUseRecoveryPoint && this->getPOC() > pocRandomAccess && this->getPOC() + pReferencePictureSet->getDeltaPOC(i) < pocRandomAccess) - { - isAvailable = 0; - } - else - { - isAvailable = 1; - rpcPic->longTerm = true; - break; - } - } + msg(ERROR, "\nCurrent picture: %d Long-term reference picture with POC = %3d seems to have been removed or not correctly decoded.", this->getPOC(), notPresentPoc); } + return notPresentPoc; } - // report that a picture is lost if it is in the Reference Picture Set - // but not available as reference picture - if(isAvailable == 0) + } + //report that a picture is lost if it is in the Reference Picture List but not in the DPB + + isAvailable = 0; + //Check short term ref pics + for (int ii = 0; ii < numberOfPictures; ii++) + { + if (pRPL->isRefPicLongterm(ii)) + continue; + + notPresentPoc = this->getPOC() - pRPL->getRefPicIdentifier(ii); + isAvailable = 0; + PicList::iterator iterPic = rcListPic.begin(); + while (iterPic != rcListPic.end()) { - if (this->getPOC() + pReferencePictureSet->getDeltaPOC(i) >= pocRandomAccess) - { - if(!pReferencePictureSet->getUsed(i) ) - { - if(printErrors) - { - msg( ERROR, "\nLong-term reference picture with POC = %3d seems to have been removed or not correctly decoded.", this->getPOC() + pReferencePictureSet->getDeltaPOC(i)); - } - atLeastOneRemoved = 1; - } - else - { - if(printErrors) - { - msg( ERROR, "\nLong-term reference picture with POC = %3d is lost or not correctly decoded!", this->getPOC() + pReferencePictureSet->getDeltaPOC(i)); - } - atLeastOneLost = 1; - iPocLost=this->getPOC() + pReferencePictureSet->getDeltaPOC(i); - } - } - else if(bUseRecoveryPoint && this->getPOC() > pocRandomAccess) + rpcPic = *(iterPic++); + if (!rpcPic->longTerm && rpcPic->getPOC() == this->getPOC() - pRPL->getRefPicIdentifier(ii) && rpcPic->referenced) { - atLeastOneUnabledByRecoveryPoint = 1; + isAvailable = 1; + break; } - else if(bUseRecoveryPoint && (this->getAssociatedIRAPType()==NAL_UNIT_CODED_SLICE_IDR_N_LP || this->getAssociatedIRAPType()==NAL_UNIT_CODED_SLICE_IDR_W_RADL)) + } + //report that a picture is lost if it is in the Reference Picture List but not in the DPB + if (isAvailable == 0 && pRPL->getNumberOfShorttermPictures() > 0) + { + if (printErrors) { - atLeastOneFlushedByPreviousIDR = 1; + msg(ERROR, "\nCurrent picture: %d Short-term reference picture with POC = %3d seems to have been removed or not correctly decoded.", this->getPOC(), notPresentPoc); } + return notPresentPoc; } } - // loop through all short-term pictures in the Reference Picture Set - // to see if the picture should be kept as reference picture - for(i=0;i<pReferencePictureSet->getNumberOfNegativePictures()+pReferencePictureSet->getNumberOfPositivePictures();i++) + return 0; +} + +int Slice::checkThatAllRefPicsAreAvailable(PicList& rcListPic, const ReferencePictureList *pRPL, int rplIdx, bool printErrors, int *refPicIndex) const +{ + Picture* rpcPic; + int isAvailable = 0; + int notPresentPoc = 0; + *refPicIndex = 0; + + if (this->isIDRorBLA()) return 0; //Assume that all pic in the DPB will be flushed anyway so no need to check. + + int numberOfPictures = pRPL->getNumberOfLongtermPictures() + pRPL->getNumberOfShorttermPictures() + pRPL->getNumberOfInterLayerPictures(); + //Check long term ref pics + for (int ii = 0; pRPL->getNumberOfLongtermPictures() > 0 && ii < numberOfPictures; ii++) { + if( !pRPL->isRefPicLongterm( ii ) || pRPL->isInterLayerRefPic( ii ) ) + { + continue; + } + + notPresentPoc = pRPL->getRefPicIdentifier(ii); isAvailable = 0; - // loop through all pictures in the reference picture buffer PicList::iterator iterPic = rcListPic.begin(); - while ( iterPic != rcListPic.end()) + while (iterPic != rcListPic.end()) { rpcPic = *(iterPic++); - - if( ! rpcPic->longTerm && rpcPic->getPOC() == this->getPOC() + pReferencePictureSet->getDeltaPOC(i) && rpcPic->referenced) + int pocCycle = 1 << (rpcPic->cs->sps->getBitsForPOC()); + int curPoc = rpcPic->getPOC() & (pocCycle - 1); + int refPoc = pRPL->getRefPicIdentifier(ii) & (pocCycle - 1); + if (rpcPic->longTerm && curPoc == refPoc && rpcPic->referenced) { - if(bUseRecoveryPoint && this->getPOC() > pocRandomAccess && this->getPOC() + pReferencePictureSet->getDeltaPOC(i) < pocRandomAccess) - { - isAvailable = 0; - } - else + isAvailable = 1; + break; + } + } + // if there was no such long-term check the short terms + if (!isAvailable) + { + iterPic = rcListPic.begin(); + while (iterPic != rcListPic.end()) + { + rpcPic = *(iterPic++); + int pocCycle = 1 << (rpcPic->cs->sps->getBitsForPOC()); + int curPoc = rpcPic->getPOC() & (pocCycle - 1); + int refPoc = pRPL->getRefPicIdentifier(ii) & (pocCycle - 1); + if (!rpcPic->longTerm && curPoc == refPoc && rpcPic->referenced) { isAvailable = 1; + rpcPic->longTerm = true; + break; } } } - // report that a picture is lost if it is in the Reference Picture Set - // but not available as reference picture - if(isAvailable == 0) + if (!isAvailable) { - if (this->getPOC() + pReferencePictureSet->getDeltaPOC(i) >= pocRandomAccess) + if (printErrors) { - if(!pReferencePictureSet->getUsed(i) ) - { - if(printErrors) - { - msg( ERROR, "\nShort-term reference picture with POC = %3d seems to have been removed or not correctly decoded.", this->getPOC() + pReferencePictureSet->getDeltaPOC(i)); - } - atLeastOneRemoved = 1; - } - else - { - if(printErrors) - { - msg( ERROR, "\nShort-term reference picture with POC = %3d is lost or not correctly decoded!", this->getPOC() + pReferencePictureSet->getDeltaPOC(i)); - } - atLeastOneLost = 1; - iPocLost=this->getPOC() + pReferencePictureSet->getDeltaPOC(i); - } + msg(ERROR, "\nCurrent picture: %d Long-term reference picture with POC = %3d seems to have been removed or not correctly decoded.", this->getPOC(), notPresentPoc); } - else if(bUseRecoveryPoint && this->getPOC() > pocRandomAccess) + *refPicIndex = ii; + return notPresentPoc; + } + } + //report that a picture is lost if it is in the Reference Picture List but not in the DPB + + isAvailable = 0; + //Check short term ref pics + for (int ii = 0; ii < numberOfPictures; ii++) + { + if (pRPL->isRefPicLongterm(ii)) + continue; + + notPresentPoc = this->getPOC() - pRPL->getRefPicIdentifier(ii); + isAvailable = 0; + PicList::iterator iterPic = rcListPic.begin(); + while (iterPic != rcListPic.end()) + { + rpcPic = *(iterPic++); + if (!rpcPic->longTerm && rpcPic->getPOC() == this->getPOC() - pRPL->getRefPicIdentifier(ii) && rpcPic->referenced) { - atLeastOneUnabledByRecoveryPoint = 1; + isAvailable = 1; + break; } - else if(bUseRecoveryPoint && (this->getAssociatedIRAPType()==NAL_UNIT_CODED_SLICE_IDR_N_LP || this->getAssociatedIRAPType()==NAL_UNIT_CODED_SLICE_IDR_W_RADL)) + } + //report that a picture is lost if it is in the Reference Picture List but not in the DPB + if (isAvailable == 0 && pRPL->getNumberOfShorttermPictures() > 0) + { + if (printErrors) { - atLeastOneFlushedByPreviousIDR = 1; + msg(ERROR, "\nCurrent picture: %d Short-term reference picture with POC = %3d seems to have been removed or not correctly decoded.", this->getPOC(), notPresentPoc); } + *refPicIndex = ii; + return notPresentPoc; } } + return 0; +} - if(atLeastOneUnabledByRecoveryPoint || atLeastOneFlushedByPreviousIDR) - { - return -1; - } - if(atLeastOneLost) - { - return iPocLost+1; - } - if(atLeastOneRemoved) +bool Slice::isPOCInRefPicList(const ReferencePictureList *rpl, int poc ) +{ + for( int i = 0; i < rpl->getNumberOfLongtermPictures() + rpl->getNumberOfShorttermPictures() + rpl->getNumberOfInterLayerPictures(); i++ ) { - return -2; + if( rpl->isInterLayerRefPic( i ) ) + { + // Diagonal inter-layer prediction is not allowed + CHECK( rpl->getRefPicIdentifier( i ), "ILRP identifier should be 0" ); + + if( poc == m_iPOC ) + { + return true; + } + } + else + if (rpl->isRefPicLongterm(i)) + { + if (poc == rpl->getRefPicIdentifier(i)) + { + return true; + } + } + else + { + if (poc == getPOC() - rpl->getRefPicIdentifier(i)) + { + return true; + } + } } - else + return false; +} + +bool Slice::isPocRestrictedByDRAP( int poc, bool precedingDRAPInDecodingOrder ) +{ + if (!getEnableDRAPSEI()) { - return 0; + return false; } + return ( isDRAP() && poc != getAssociatedIRAPPOC() ) || + ( cvsHasPreviousDRAP() && getPOC() > getLatestDRAPPOC() && (precedingDRAPInDecodingOrder || poc < getLatestDRAPPOC()) ); } -/** Function for constructing an explicit Reference Picture Set out of the available pictures in a referenced Reference Picture Set -*/ -void Slice::createExplicitReferencePictureSetFromReference(PicList& rcListPic, const ReferencePictureSet *pReferencePictureSet, bool isRAP, int pocRandomAccess, bool bUseRecoveryPoint, const bool bEfficientFieldIRAPEnabled - , bool isEncodeLtRef, bool isCompositeRefEnable -) +void Slice::checkConformanceForDRAP( uint32_t temporalId ) { - Picture* rpcPic; - int i, j; - int k = 0; - int nrOfNegativePictures = 0; - int nrOfPositivePictures = 0; - ReferencePictureSet* pLocalRPS = this->getLocalRPS(); - (*pLocalRPS)=ReferencePictureSet(); - - bool irapIsInRPS = false; // Used when bEfficientFieldIRAPEnabled==true + if (!(isDRAP() || cvsHasPreviousDRAP())) + { + return; + } - // loop through all pictures in the Reference Picture Set - for(i=0;i<pReferencePictureSet->getNumberOfPictures();i++) + if (isDRAP()) { - j = 0; - // loop through all pictures in the reference picture buffer - PicList::iterator iterPic = rcListPic.begin(); - while ( iterPic != rcListPic.end()) + if (!(getNalUnitType() == NalUnitType::NAL_UNIT_CODED_SLICE_TRAIL || + getNalUnitType() == NalUnitType::NAL_UNIT_CODED_SLICE_STSA)) { - j++; - rpcPic = *(iterPic++); - - if(rpcPic->getPOC() == this->getPOC() + pReferencePictureSet->getDeltaPOC(i) && rpcPic->referenced) + msg( WARNING, "Warning, non-conforming bitstream. The DRAP picture should be a trailing picture.\n"); + } + if ( temporalId != 0) + { + msg( WARNING, "Warning, non-conforming bitstream. The DRAP picture shall have a temporal sublayer identifier equal to 0.\n"); + } + for (int i = 0; i < getNumRefIdx(REF_PIC_LIST_0); i++) + { + if (getRefPic(REF_PIC_LIST_0,i)->getPOC() != getAssociatedIRAPPOC()) { - // This picture exists as a reference picture - // and should be added to the explicit Reference Picture Set - pLocalRPS->setDeltaPOC(k, pReferencePictureSet->getDeltaPOC(i)); - pLocalRPS->setUsed(k, pReferencePictureSet->getUsed(i) && (!isRAP)); - if (bEfficientFieldIRAPEnabled) - { - pLocalRPS->setUsed(k, pLocalRPS->getUsed(k) && !(bUseRecoveryPoint && this->getPOC() > pocRandomAccess && this->getPOC() + pReferencePictureSet->getDeltaPOC(i) < pocRandomAccess) ); - } - - if(pLocalRPS->getDeltaPOC(k) < 0) - { - nrOfNegativePictures++; - } - else - { - if (bEfficientFieldIRAPEnabled && rpcPic->getPOC() == this->getAssociatedIRAPPOC() && this->getAssociatedIRAPPOC() == this->getPOC() + (isCompositeRefEnable ? 2 : 1)) - { - irapIsInRPS = true; - } - nrOfPositivePictures++; - } - k++; + msg( WARNING, "Warning, non-conforming bitstream. The DRAP picture shall not include any pictures in the active " + "entries of its reference picture lists except the preceding IRAP picture in decoding order.\n"); } } - } - - bool useNewRPS = false; - // if current picture is complimentary field associated to IRAP, add the IRAP to its RPS. - if(bEfficientFieldIRAPEnabled && m_pcPic->fieldPic && !irapIsInRPS) - { - PicList::iterator iterPic = rcListPic.begin(); - while ( iterPic != rcListPic.end()) + for (int i = 0; i < getNumRefIdx(REF_PIC_LIST_1); i++) { - rpcPic = *(iterPic++); - if (rpcPic->getPOC() == this->getAssociatedIRAPPOC() && this->getAssociatedIRAPPOC() == this->getPOC() + (isCompositeRefEnable ? 2 : 1)) + if (getRefPic(REF_PIC_LIST_1,i)->getPOC() != getAssociatedIRAPPOC()) { - pLocalRPS->setDeltaPOC(k, 1); - pLocalRPS->setUsed(k, true); - nrOfPositivePictures++; - k ++; - useNewRPS = true; + msg( WARNING, "Warning, non-conforming bitstream. The DRAP picture shall not include any pictures in the active " + "entries of its reference picture lists except the preceding IRAP picture in decoding order.\n"); } } } - if (isCompositeRefEnable && isEncodeLtRef) + + if (cvsHasPreviousDRAP() && getPOC() > getLatestDRAPPOC()) { - useNewRPS = true; - nrOfNegativePictures = 0; - nrOfPositivePictures = 0; - for (i = 0; i<pReferencePictureSet->getNumberOfPictures(); i++) + for (int i = 0; i < getNumRefIdx(REF_PIC_LIST_0); i++) { - j = 0; - k = 0; - - // loop through all pictures in the reference picture buffer - PicList::iterator iterPic = rcListPic.begin(); - while (iterPic != rcListPic.end()) + if (getRefPic(REF_PIC_LIST_0,i)->getPOC() < getLatestDRAPPOC() && getRefPic(REF_PIC_LIST_0,i)->getPOC() != getAssociatedIRAPPOC()) { - j++; - rpcPic = *(iterPic++); - - if (rpcPic->getPOC() == this->getPOC() + 1 + pReferencePictureSet->getDeltaPOC(i) && rpcPic->referenced) - { - // This picture exists as a reference picture - // and should be added to the explicit Reference Picture Set - pLocalRPS->setDeltaPOC(k, pReferencePictureSet->getDeltaPOC(i) + 1); - pLocalRPS->setUsed(k, pReferencePictureSet->getUsed(i) && (!isRAP)); - if (bEfficientFieldIRAPEnabled) - { - pLocalRPS->setUsed(k, pLocalRPS->getUsed(k) && !(bUseRecoveryPoint && this->getPOC() > pocRandomAccess && this->getPOC() + pReferencePictureSet->getDeltaPOC(i) + 1 < pocRandomAccess)); - } - - if (pLocalRPS->getDeltaPOC(k) < 0) - { - nrOfNegativePictures++; - } - else - { - if (bEfficientFieldIRAPEnabled && rpcPic->getPOC() == this->getAssociatedIRAPPOC() && this->getAssociatedIRAPPOC() == this->getPOC() + 2) - { - irapIsInRPS = true; - } - nrOfPositivePictures++; - } - k++; - } + msg( WARNING, "Warning, non-conforming bitstream. Any picture that follows the DRAP picture in both decoding order " + "and output order shall not include, in the active entries of its reference picture lists, any picture " + "that precedes the DRAP picture in decoding order or output order, with the exception of the preceding " + "IRAP picture in decoding order. Problem is POC %d in RPL0.\n", getRefPic(REF_PIC_LIST_0,i)->getPOC()); } } - } - pLocalRPS->setNumberOfNegativePictures(nrOfNegativePictures); - pLocalRPS->setNumberOfPositivePictures(nrOfPositivePictures); - pLocalRPS->setNumberOfPictures(nrOfNegativePictures+nrOfPositivePictures); - // This is a simplistic inter rps example. A smarter encoder will look for a better reference RPS to do the - // inter RPS prediction with. Here we just use the reference used by pReferencePictureSet. - // If pReferencePictureSet is not inter_RPS_predicted, then inter_RPS_prediction is for the current RPS also disabled. - if (!pReferencePictureSet->getInterRPSPrediction() || useNewRPS ) - { - pLocalRPS->setInterRPSPrediction(false); - pLocalRPS->setNumRefIdc(0); - } - else - { - int rIdx = this->getRPSidx() - pReferencePictureSet->getDeltaRIdxMinus1() - 1; - int deltaRPS = pReferencePictureSet->getDeltaRPS(); - const ReferencePictureSet* pcRefRPS = this->getSPS()->getRPSList()->getReferencePictureSet(rIdx); - int iRefPics = pcRefRPS->getNumberOfPictures(); - int iNewIdc=0; - for(i=0; i<= iRefPics; i++) + for (int i = 0; i < getNumRefIdx(REF_PIC_LIST_1); i++) { - int deltaPOC = ((i != iRefPics)? pcRefRPS->getDeltaPOC(i) : 0); // check if the reference abs POC is >= 0 - int iRefIdc = 0; - for (j=0; j < pLocalRPS->getNumberOfPictures(); j++) // loop through the pictures in the new RPS + if (getRefPic(REF_PIC_LIST_1,i)->getPOC() < getLatestDRAPPOC() && getRefPic(REF_PIC_LIST_1,i)->getPOC() != getAssociatedIRAPPOC()) { - if ( (deltaPOC + deltaRPS) == pLocalRPS->getDeltaPOC(j)) - { - if (pLocalRPS->getUsed(j)) - { - iRefIdc = 1; - } - else - { - iRefIdc = 2; - } - } + msg( WARNING, "Warning, non-conforming bitstream. Any picture that follows the DRAP picture in both decoding order " + "and output order shall not include, in the active entries of its reference picture lists, any picture " + "that precedes the DRAP picture in decoding order or output order, with the exception of the preceding " + "IRAP picture in decoding order. Problem is POC %d in RPL1", getRefPic(REF_PIC_LIST_1,i)->getPOC()); } - pLocalRPS->setRefIdc(i, iRefIdc); - iNewIdc++; } - pLocalRPS->setInterRPSPrediction(true); - pLocalRPS->setNumRefIdc(iNewIdc); - pLocalRPS->setDeltaRPS(deltaRPS); - pLocalRPS->setDeltaRIdxMinus1(pReferencePictureSet->getDeltaRIdxMinus1() + this->getSPS()->getRPSList()->getNumberOfReferencePictureSets() - this->getRPSidx()); } - - this->setRPS(pLocalRPS); - this->setRPSidx(-1); } + //! get AC and DC values for weighted pred void Slice::getWpAcDcParam(const WPACDCParam *&wp) const { @@ -1690,41 +1493,221 @@ unsigned Slice::getMinPictureDistance() const { for (int refIdx = 0; refIdx < getNumRefIdx(REF_PIC_LIST_1); refIdx++) { - minPicDist = std::min( minPicDist, std::abs(currPOC - getRefPic(REF_PIC_LIST_0, refIdx)->getPOC())); + minPicDist = std::min(minPicDist, std::abs(currPOC - getRefPic(REF_PIC_LIST_1, refIdx)->getPOC())); } } } return (unsigned) minPicDist; } -#if HEVC_VPS // ------------------------------------------------------------------------------------------------ // Video parameter set (VPS) // ------------------------------------------------------------------------------------------------ VPS::VPS() -: m_VPSId ( 0) -, m_uiMaxTLayers ( 1) -, m_uiMaxLayers ( 1) -, m_bTemporalIdNestingFlag (false) -, m_numHrdParameters ( 0) -, m_maxNuhReservedZeroLayerId ( 0) -, m_hrdParameters () -, m_hrdOpSetIdx () -, m_cprmsPresentFlag () + : m_VPSId(0) + , m_uiMaxLayers(1) + , m_vpsMaxSubLayers(1) + , m_vpsAllLayersSameNumSubLayersFlag (true) + , m_vpsAllIndependentLayersFlag(true) + , m_vpsEachLayerIsAnOlsFlag (1) + , m_vpsOlsModeIdc (0) + , m_vpsNumOutputLayerSets (1) +, m_vpsExtensionFlag() { - - for( int i = 0; i < MAX_TLAYER; i++) + for (int i = 0; i < MAX_VPS_LAYERS; i++) { - m_numReorderPics[i] = 0; - m_uiMaxDecPicBuffering[i] = 1; - m_uiMaxLatencyIncrease[i] = 0; + m_vpsLayerId[i] = 0; + m_vpsIndependentLayerFlag[i] = 1; + for (int j = 0; j < MAX_VPS_LAYERS; j++) + { + m_vpsDirectRefLayerFlag[i][j] = 0; + m_directRefLayerIdx[i][j] = MAX_VPS_LAYERS; + m_interLayerRefIdx[i][i] = NOT_VALID; + } + } + for (int i = 0; i < MAX_NUM_OLSS; i++) + { + for (int j = 0; j < MAX_VPS_LAYERS; j++) + { + m_vpsOlsOutputLayerFlag[i][j] = 0; + } } } VPS::~VPS() { } -#endif + +// ------------------------------------------------------------------------------------------------ +// Picture Header +// ------------------------------------------------------------------------------------------------ + +PicHeader::PicHeader() +: m_valid ( 0 ) +, m_nonReferencePictureFlag ( 0 ) +, m_gdrPicFlag ( 0 ) +, m_noOutputOfPriorPicsFlag ( 0 ) +, m_recoveryPocCnt ( 0 ) +, m_spsId ( -1 ) +, m_ppsId ( -1 ) +, m_subPicIdSignallingPresentFlag ( 0 ) +, m_subPicIdLen ( 0 ) +, m_loopFilterAcrossVirtualBoundariesDisabledFlag ( 0 ) +, m_numVerVirtualBoundaries ( 0 ) +, m_numHorVirtualBoundaries ( 0 ) +, m_colourPlaneId ( 0 ) +, m_picOutputFlag ( true ) +, m_picRplPresentFlag ( 0 ) +, m_pRPL0 ( 0 ) +, m_pRPL1 ( 0 ) +, m_rpl0Idx ( 0 ) +, m_rpl1Idx ( 0 ) +, m_splitConsOverrideFlag ( 0 ) +, m_cuQpDeltaSubdivIntra ( 0 ) +, m_cuQpDeltaSubdivInter ( 0 ) +, m_cuChromaQpOffsetSubdivIntra ( 0 ) +, m_cuChromaQpOffsetSubdivInter ( 0 ) +, m_enableTMVPFlag ( true ) +, m_mvdL1ZeroFlag ( 0 ) +, m_maxNumMergeCand ( MRG_MAX_NUM_CANDS ) +, m_maxNumAffineMergeCand ( AFFINE_MRG_MAX_NUM_CANDS ) +, m_disFracMMVD ( 0 ) +, m_disBdofFlag ( 0 ) +, m_disDmvrFlag ( 0 ) +, m_disProfFlag ( 0 ) +, m_maxNumTriangleCand ( 0 ) +, m_maxNumIBCMergeCand ( IBC_MRG_MAX_NUM_CANDS ) +, m_jointCbCrSignFlag ( 0 ) +, m_saoEnabledPresentFlag ( 0 ) +, m_alfEnabledPresentFlag ( 0 ) +, m_numAlfAps ( 0 ) +, m_alfApsId ( 0 ) +, m_alfChromaApsId ( 0 ) +, m_depQuantEnabledFlag ( 0 ) +, m_signDataHidingEnabledFlag ( 0 ) +, m_deblockingFilterOverridePresentFlag ( 0 ) +, m_deblockingFilterOverrideFlag ( 0 ) +, m_deblockingFilterDisable ( 0 ) +, m_deblockingFilterBetaOffsetDiv2 ( 0 ) +, m_deblockingFilterTcOffsetDiv2 ( 0 ) +, m_lmcsEnabledFlag ( 0 ) +, m_lmcsApsId ( -1 ) +, m_lmcsAps ( nullptr ) +, m_lmcsChromaResidualScaleFlag ( 0 ) +, m_scalingListPresentFlag ( 0 ) +, m_scalingListApsId ( -1 ) +, m_scalingListAps ( nullptr ) +{ + memset(m_subPicId, 0, sizeof(m_subPicId)); + memset(m_virtualBoundariesPosX, 0, sizeof(m_virtualBoundariesPosX)); + memset(m_virtualBoundariesPosY, 0, sizeof(m_virtualBoundariesPosY)); + memset(m_saoEnabledFlag, 0, sizeof(m_saoEnabledFlag)); + memset(m_alfEnabledFlag, 0, sizeof(m_alfEnabledFlag)); + memset(m_minQT, 0, sizeof(m_minQT)); + memset(m_maxMTTHierarchyDepth, 0, sizeof(m_maxMTTHierarchyDepth)); + memset(m_maxBTSize, 0, sizeof(m_maxBTSize)); + memset(m_maxTTSize, 0, sizeof(m_maxTTSize)); + + m_localRPL0.setNumberOfActivePictures(0); + m_localRPL0.setNumberOfShorttermPictures(0); + m_localRPL0.setNumberOfLongtermPictures(0); + m_localRPL0.setLtrpInSliceHeaderFlag(0); + m_localRPL0.setNumberOfInterLayerPictures( 0 ); + + m_localRPL1.setNumberOfActivePictures(0); + m_localRPL1.setNumberOfShorttermPictures(0); + m_localRPL1.setNumberOfLongtermPictures(0); + m_localRPL1.setLtrpInSliceHeaderFlag(0); + m_localRPL1.setNumberOfInterLayerPictures( 0 ); + + m_alfApsId.resize(0); +} + +PicHeader::~PicHeader() +{ + m_alfApsId.resize(0); +} + +/** + - initialize picture header to defaut state + */ +void PicHeader::initPicHeader() +{ + m_valid = 0; + m_nonReferencePictureFlag = 0; + m_gdrPicFlag = 0; + m_noOutputOfPriorPicsFlag = 0; + m_recoveryPocCnt = 0; + m_spsId = -1; + m_ppsId = -1; + m_subPicIdSignallingPresentFlag = 0; + m_subPicIdLen = 0; + m_loopFilterAcrossVirtualBoundariesDisabledFlag = 0; + m_numVerVirtualBoundaries = 0; + m_numHorVirtualBoundaries = 0; + m_colourPlaneId = 0; + m_picOutputFlag = true; + m_picRplPresentFlag = 0; + m_pRPL0 = 0; + m_pRPL1 = 0; + m_rpl0Idx = 0; + m_rpl1Idx = 0; + m_splitConsOverrideFlag = 0; + m_cuQpDeltaSubdivIntra = 0; + m_cuQpDeltaSubdivInter = 0; + m_cuChromaQpOffsetSubdivIntra = 0; + m_cuChromaQpOffsetSubdivInter = 0; + m_enableTMVPFlag = true; + m_mvdL1ZeroFlag = 0; + m_maxNumMergeCand = MRG_MAX_NUM_CANDS; + m_maxNumAffineMergeCand = AFFINE_MRG_MAX_NUM_CANDS; + m_disFracMMVD = 0; + m_disBdofFlag = 0; + m_disDmvrFlag = 0; + m_disProfFlag = 0; + m_maxNumTriangleCand = 0; + m_maxNumIBCMergeCand = IBC_MRG_MAX_NUM_CANDS; + m_jointCbCrSignFlag = 0; + m_saoEnabledPresentFlag = 0; + m_alfEnabledPresentFlag = 0; + m_numAlfAps = 0; + m_alfChromaApsId = 0; + m_depQuantEnabledFlag = 0; + m_signDataHidingEnabledFlag = 0; + m_deblockingFilterOverridePresentFlag = 0; + m_deblockingFilterOverrideFlag = 0; + m_deblockingFilterDisable = 0; + m_deblockingFilterBetaOffsetDiv2 = 0; + m_deblockingFilterTcOffsetDiv2 = 0; + m_lmcsEnabledFlag = 0; + m_lmcsApsId = -1; + m_lmcsAps = nullptr; + m_lmcsChromaResidualScaleFlag = 0; + m_scalingListPresentFlag = 0; + m_scalingListApsId = -1; + m_scalingListAps = nullptr; + memset(m_subPicId, 0, sizeof(m_subPicId)); + memset(m_virtualBoundariesPosX, 0, sizeof(m_virtualBoundariesPosX)); + memset(m_virtualBoundariesPosY, 0, sizeof(m_virtualBoundariesPosY)); + memset(m_saoEnabledFlag, 0, sizeof(m_saoEnabledFlag)); + memset(m_alfEnabledFlag, 0, sizeof(m_alfEnabledFlag)); + memset(m_minQT, 0, sizeof(m_minQT)); + memset(m_maxMTTHierarchyDepth, 0, sizeof(m_maxMTTHierarchyDepth)); + memset(m_maxBTSize, 0, sizeof(m_maxBTSize)); + memset(m_maxTTSize, 0, sizeof(m_maxTTSize)); + + m_localRPL0.setNumberOfActivePictures(0); + m_localRPL0.setNumberOfShorttermPictures(0); + m_localRPL0.setNumberOfLongtermPictures(0); + m_localRPL0.setLtrpInSliceHeaderFlag(0); + + m_localRPL1.setNumberOfActivePictures(0); + m_localRPL1.setNumberOfShorttermPictures(0); + m_localRPL1.setNumberOfLongtermPictures(0); + m_localRPL1.setLtrpInSliceHeaderFlag(0); + + m_alfApsId.resize(0); +} // ------------------------------------------------------------------------------------------------ // Sequence parameter set (SPS) @@ -1748,90 +1731,80 @@ SPSRExt::SPSRExt() SPS::SPS() : m_SPSId ( 0) -#if !JVET_M0101_HLS -, m_bIntraOnlyConstraintFlag (false) -, m_maxBitDepthConstraintIdc ( 0) -, m_maxChromaFormatConstraintIdc(CHROMA_420) -, m_bFrameConstraintFlag (false) -, m_bNoQtbttDualTreeIntraConstraintFlag(false) -, m_bNoSaoConstraintFlag (false) -, m_bNoAlfConstraintFlag (false) -, m_bNoPcmConstraintFlag (false) -, m_bNoRefWraparoundConstraintFlag(false) -, m_bNoTemporalMvpConstraintFlag(false) -, m_bNoSbtmvpConstraintFlag (false) -, m_bNoAmvrConstraintFlag (false) -, m_bNoBdofConstraintFlag (false) -, m_bNoCclmConstraintFlag (false) -, m_bNoMtsConstraintFlag (false) -, m_bNoAffineMotionConstraintFlag(false) -, m_bNoGbiConstraintFlag (false) -, m_bNoMhIntraConstraintFlag (false) -, m_bNoTriangleConstraintFlag (false) -, m_bNoLadfConstraintFlag (false) -, m_bNoCurrPicRefConstraintFlag(false) -, m_bNoQpDeltaConstraintFlag (false) -, m_bNoDepQuantConstraintFlag (false) -, m_bNoSignDataHidingConstraintFlag(false) -#endif +, m_decodingParameterSetId ( 0 ) +, m_VPSId ( 0 ) , m_affineAmvrEnabledFlag ( false ) , m_DMVR ( false ) +, m_MMVD ( false ) , m_SBT ( false ) -, m_MaxSbtSize ( 32 ) -#if HEVC_VPS -, m_VPSId ( 0) -#endif +, m_ISP ( false ) , m_chromaFormatIdc (CHROMA_420) +, m_separateColourPlaneFlag(0) , m_uiMaxTLayers ( 1) // Structure -, m_picWidthInLumaSamples (352) -, m_picHeightInLumaSamples (288) +, m_maxWidthInLumaSamples (352) +, m_maxHeightInLumaSamples (288) +, m_subPicPresentFlag (0) +, m_numSubPics(1) +, m_subPicIdPresentFlag(0) +, m_subPicIdSignallingPresentFlag(0) +, m_subPicIdLen(16) , m_log2MinCodingBlockSize ( 0) , m_log2DiffMaxMinCodingBlockSize(0) , m_CTUSize(0) , m_minQT{ 0, 0, 0 } -, m_maxBTDepth{ MAX_BT_DEPTH, MAX_BT_DEPTH_INTER, MAX_BT_DEPTH_C } +, m_maxMTTHierarchyDepth{ MAX_BT_DEPTH, MAX_BT_DEPTH_INTER, MAX_BT_DEPTH_C } , m_maxBTSize{ MAX_BT_SIZE, MAX_BT_SIZE_INTER, MAX_BT_SIZE_C } , m_maxTTSize{ MAX_TT_SIZE, MAX_TT_SIZE_INTER, MAX_TT_SIZE_C } , m_uiMaxCUWidth ( 32) , m_uiMaxCUHeight ( 32) , m_uiMaxCodingDepth ( 3) +, m_numRPL0 ( 0 ) +, m_numRPL1 ( 0 ) +, m_rpl1CopyFromRpl0Flag ( false ) +, m_rpl1IdxPresentFlag ( false ) +, m_allRplEntriesHasSameSignFlag ( true ) , m_bLongTermRefsPresent (false) // Tool list -, m_pcmEnabledFlag (false) -, m_pcmLog2MaxSize ( 5) -, m_uiPCMLog2MinSize ( 7) -, m_bPCMFilterDisableFlag (false) +, m_transformSkipEnabledFlag (false) +, m_BDPCMEnabled (0) +, m_JointCbCrEnabledFlag (false) , m_sbtmvpEnabledFlag (false) , m_bdofEnabledFlag (false) -, m_disFracMmvdEnabledFlag ( false ) +, m_fpelMmvdEnabledFlag ( false ) +, m_BdofControlPresentFlag ( false ) +, m_DmvrControlPresentFlag ( false ) +, m_ProfControlPresentFlag ( false ) , m_uiBitsForPOC ( 8) , m_numLongTermRefPicSPS ( 0) -#if MAX_TB_SIZE_SIGNALLING , m_log2MaxTbSize ( 6) -#endif +, m_useWeightPred (false) +, m_useWeightedBiPred (false) , m_saoEnabledFlag (false) , m_bTemporalIdNestingFlag (false) -#if HEVC_USE_SCALING_LISTS , m_scalingListEnabledFlag (false) -#endif -#if HEVC_USE_INTRA_SMOOTHING_T32 || HEVC_USE_INTRA_SMOOTHING_T64 -, m_useStrongIntraSmoothing (false) -#endif +, m_loopFilterAcrossVirtualBoundariesDisabledFlag(0) +, m_numVerVirtualBoundaries(0) +, m_numHorVirtualBoundaries(0) +, m_hrdParametersPresentFlag (false) , m_vuiParametersPresentFlag (false) , m_vuiParameters () , m_wrapAroundEnabledFlag (false) , m_wrapAroundOffset ( 0) , m_IBCFlag ( 0) -, m_lumaReshapeEnable (false) +, m_PLTMode ( 0) +, m_lmcsEnabled (false) , m_AMVREnabledFlag ( false ) , m_LMChroma ( false ) -, m_cclmCollocatedChromaFlag ( false ) +, m_horCollocatedChromaFlag ( true ) +, m_verCollocatedChromaFlag ( false ) , m_IntraMTS ( false ) , m_InterMTS ( false ) +, m_LFNST ( false ) , m_Affine ( false ) , m_AffineType ( false ) -, m_MHIntra ( false ) +, m_PROF ( false ) +, m_ciip ( false ) , m_Triangle ( false ) #if LUMA_ADAPTIVE_DEBLOCKING_FILTER_QP_OFFSET , m_LadfEnabled ( false ) @@ -1839,11 +1812,16 @@ SPS::SPS() , m_LadfQpOffset { 0 } , m_LadfIntervalLowerBound { 0 } #endif +, m_MRL ( false ) +, m_MIP ( false ) +, m_GDREnabledFlag ( true ) +, m_SubLayerCbpParametersPresentFlag ( true ) +, m_rprEnabledFlag ( false ) + { for(int ch=0; ch<MAX_NUM_CHANNEL_TYPE; ch++) { m_bitDepths.recon[ch] = 8; - m_pcmBitDepths[ch] = 8; m_qpBDOffset [ch] = 0; } @@ -1856,34 +1834,122 @@ SPS::SPS() ::memset(m_ltRefPicPocLsbSps, 0, sizeof(m_ltRefPicPocLsbSps)); ::memset(m_usedByCurrPicLtSPSFlag, 0, sizeof(m_usedByCurrPicLtSPSFlag)); + ::memset(m_virtualBoundariesPosX, 0, sizeof(m_virtualBoundariesPosX)); + ::memset(m_virtualBoundariesPosY, 0, sizeof(m_virtualBoundariesPosY)); } SPS::~SPS() { - m_RPSList.destroy(); } -void SPS::createRPSList( int numRPS ) +void SPS::createRPLList0(int numRPL) { - m_RPSList.destroy(); - m_RPSList.create(numRPS); + m_RPLList0.destroy(); + m_RPLList0.create(numRPL); + m_numRPL0 = numRPL; + m_rpl1IdxPresentFlag = (m_numRPL0 != m_numRPL1) ? true : false; } +void SPS::createRPLList1(int numRPL) +{ + m_RPLList1.destroy(); + m_RPLList1.create(numRPL); + m_numRPL1 = numRPL; + m_rpl1IdxPresentFlag = (m_numRPL0 != m_numRPL1) ? true : false; +} const int SPS::m_winUnitX[]={1,2,2,1}; const int SPS::m_winUnitY[]={1,2,1,1}; +void ChromaQpMappingTable::setParams(const ChromaQpMappingTableParams ¶ms, const int qpBdOffset) +{ + m_qpBdOffset = qpBdOffset; + m_sameCQPTableForAllChromaFlag = params.m_sameCQPTableForAllChromaFlag; + m_numQpTables = params.m_numQpTables; + + for (int i = 0; i < MAX_NUM_CQP_MAPPING_TABLES; i++) + { + m_numPtsInCQPTableMinus1[i] = params.m_numPtsInCQPTableMinus1[i]; + m_deltaQpInValMinus1[i] = params.m_deltaQpInValMinus1[i]; + m_qpTableStartMinus26[i] = params.m_qpTableStartMinus26[i]; + m_deltaQpOutVal[i] = params.m_deltaQpOutVal[i]; + } +} +void ChromaQpMappingTable::derivedChromaQPMappingTables() +{ + for (int i = 0; i < getNumQpTables(); i++) + { + const int qpBdOffsetC = m_qpBdOffset; + const int numPtsInCQPTableMinus1 = getNumPtsInCQPTableMinus1(i); + std::vector<int> qpInVal(numPtsInCQPTableMinus1 + 2), qpOutVal(numPtsInCQPTableMinus1 + 2); + + qpInVal[0] = getQpTableStartMinus26(i) + 26; + qpOutVal[0] = qpInVal[0]; + for (int j = 0; j <= getNumPtsInCQPTableMinus1(i); j++) + { + qpInVal[j + 1] = qpInVal[j] + getDeltaQpInValMinus1(i, j) + 1; + qpOutVal[j + 1] = qpOutVal[j] + getDeltaQpOutVal(i, j); + } + + for (int j = 0; j <= getNumPtsInCQPTableMinus1(i); j++) + { + CHECK(qpInVal[j] < -qpBdOffsetC || qpInVal[j] > MAX_QP, "qpInVal out of range"); + CHECK(qpOutVal[j] < -qpBdOffsetC || qpOutVal[j] > MAX_QP, "qpOutVal out of range"); + } + + m_chromaQpMappingTables[i][qpInVal[0]] = qpOutVal[0]; + for (int k = qpInVal[0] - 1; k >= -qpBdOffsetC; k--) + { + m_chromaQpMappingTables[i][k] = Clip3(-qpBdOffsetC, MAX_QP, m_chromaQpMappingTables[i][k + 1] - 1); + } + for (int j = 0; j <= numPtsInCQPTableMinus1; j++) + { + int sh = (getDeltaQpInValMinus1(i, j) + 1) >> 1; + for (int k = qpInVal[j] + 1, m = 1; k <= qpInVal[j + 1]; k++, m++) + { + m_chromaQpMappingTables[i][k] = m_chromaQpMappingTables[i][qpInVal[j]] + + ((qpOutVal[j + 1] - qpOutVal[j]) * m + sh) / (getDeltaQpInValMinus1(i, j) + 1); + } + } + for (int k = qpInVal[numPtsInCQPTableMinus1 + 1] + 1; k <= MAX_QP; k++) + { + m_chromaQpMappingTables[i][k] = Clip3(-qpBdOffsetC, MAX_QP, m_chromaQpMappingTables[i][k - 1] + 1); + } + } +} + +SliceMap::SliceMap() +: m_sliceID (0) +, m_numTilesInSlice (0) +, m_numCtuInSlice (0) +{ + m_ctuAddrInSlice.clear(); +} + +SliceMap::~SliceMap() +{ + m_numCtuInSlice = 0; + m_ctuAddrInSlice.clear(); +} + +RectSlice::RectSlice() +: m_tileIdx (0) +, m_sliceWidthInTiles (0) +, m_sliceHeightInTiles (0) +, m_numSlicesInTile (0) +, m_sliceHeightInCtu (0) +{ +} + +RectSlice::~RectSlice() +{ +} + PPSRExt::PPSRExt() -: m_log2MaxTransformSkipBlockSize (2) -, m_crossComponentPredictionEnabledFlag(false) -, m_cuChromaQpOffsetSubdiv (0) -, m_chromaQpOffsetListLen (0) -// m_ChromaQpAdjTableIncludingNullEntry initialized below +: m_crossComponentPredictionEnabledFlag(false) // m_log2SaoOffsetScale initialized below { - m_ChromaQpAdjTableIncludingNullEntry[0].u.comp.CbOffset = 0; // Array includes entry [0] for the null offset used when cu_chroma_qp_offset_flag=0. This is initialised here and never subsequently changed. - m_ChromaQpAdjTableIncludingNullEntry[0].u.comp.CrOffset = 0; for(int ch=0; ch<MAX_NUM_CHANNEL_TYPE; ch++) { m_log2SaoOffsetScale[ch] = 0; @@ -1895,215 +1961,447 @@ PPS::PPS() , m_SPSId (0) , m_picInitQPMinus26 (0) , m_useDQP (false) -, m_bConstrainedIntraPred (false) , m_bSliceChromaQpFlag (false) -, m_cuQpDeltaSubdiv (0) , m_chromaCbQpOffset (0) , m_chromaCrQpOffset (0) +, m_chromaCbCrQpOffset (0) +, m_chromaQpOffsetListLen (0) , m_numRefIdxL0DefaultActive (1) , m_numRefIdxL1DefaultActive (1) -, m_TransquantBypassEnabledFlag (false) -, m_useTransformSkip (false) -#if HEVC_DEPENDENT_SLICES -, m_dependentSliceSegmentsEnabledFlag(false) -#endif -#if HEVC_TILES_WPP -, m_tilesEnabledFlag (false) +, m_rpl1IdxPresentFlag (false) +, m_numSubPics (1) +, m_subPicIdSignallingPresentFlag (0) +, m_subPicIdLen (16) +, m_noPicPartitionFlag (1) +, m_log2CtuSize (0) +, m_ctuSize (0) +, m_picWidthInCtu (0) +, m_picHeightInCtu (0) +, m_numTileCols (1) +, m_numTileRows (1) +, m_rectSliceFlag (1) + , m_singleSlicePerSubPicFlag (0) +, m_numSlicesInPic (1) +, m_tileIdxDeltaPresentFlag (0) +, m_loopFilterAcrossTilesEnabledFlag (1) +, m_loopFilterAcrossSlicesEnabledFlag(0) +, m_log2MaxTransformSkipBlockSize (2) , m_entropyCodingSyncEnabledFlag (false) -, m_loopFilterAcrossTilesEnabledFlag (true) -, m_uniformSpacingFlag (false) -, m_numTileColumnsMinus1 (0) -, m_numTileRowsMinus1 (0) -#endif +, m_constantSliceHeaderParamsEnabledFlag (false) +, m_PPSDepQuantEnabledIdc (0) +, m_PPSRefPicListSPSIdc0 (0) +, m_PPSRefPicListSPSIdc1 (0) +, m_PPSMvdL1ZeroIdc (0) +, m_PPSCollocatedFromL0Idc (0) +, m_PPSSixMinusMaxNumMergeCandPlus1 (0) +, m_PPSMaxNumMergeCandMinusMaxNumTriangleCandPlus1 (0) , m_cabacInitPresentFlag (false) +, m_pictureHeaderExtensionPresentFlag(0) , m_sliceHeaderExtensionPresentFlag (false) -, m_loopFilterAcrossSlicesEnabledFlag(false) , m_listsModificationPresentFlag (0) -, m_numExtraSliceHeaderBits (0) +, m_picWidthInLumaSamples( 352 ) +, m_picHeightInLumaSamples( 288 ) , m_ppsRangeExtension () , pcv (NULL) { + m_ChromaQpAdjTableIncludingNullEntry[0].u.comp.CbOffset = 0; // Array includes entry [0] for the null offset used when cu_chroma_qp_offset_flag=0. This is initialised here and never subsequently changed. + m_ChromaQpAdjTableIncludingNullEntry[0].u.comp.CrOffset = 0; + m_ChromaQpAdjTableIncludingNullEntry[0].u.comp.JointCbCrOffset = 0; + m_tileColWidth.clear(); + m_tileRowHeight.clear(); + m_tileColBd.clear(); + m_tileRowBd.clear(); + m_ctuToTileCol.clear(); + m_ctuToTileRow.clear(); + m_ctuToSubPicIdx.clear(); + m_rectSlices.clear(); + m_sliceMap.clear(); +} + +PPS::~PPS() +{ + m_tileColWidth.clear(); + m_tileRowHeight.clear(); + m_tileColBd.clear(); + m_tileRowBd.clear(); + m_ctuToTileCol.clear(); + m_ctuToTileRow.clear(); + m_ctuToSubPicIdx.clear(); + m_rectSlices.clear(); + m_sliceMap.clear(); + + delete pcv; +} + +/** + - reset tile and slice parameters and lists + */ +void PPS::resetTileSliceInfo() +{ + m_numExpTileCols = 0; + m_numExpTileRows = 0; + m_numTileCols = 0; + m_numTileRows = 0; + m_numSlicesInPic = 0; + m_tileColWidth.clear(); + m_tileRowHeight.clear(); + m_tileColBd.clear(); + m_tileRowBd.clear(); + m_ctuToTileCol.clear(); + m_ctuToTileRow.clear(); + m_ctuToSubPicIdx.clear(); + m_rectSlices.clear(); + m_sliceMap.clear(); +} + +/** + - initialize tile row/column sizes and boundaries + */ +void PPS::initTiles() +{ + int colIdx, rowIdx; + int ctuX, ctuY; + + // check explicit tile column sizes + uint32_t remainingWidthInCtu = m_picWidthInCtu; + for( colIdx = 0; colIdx < m_numExpTileCols; colIdx++ ) + { + CHECK(m_tileColWidth[colIdx] > remainingWidthInCtu, "Tile column width exceeds picture width"); + remainingWidthInCtu -= m_tileColWidth[colIdx]; + } + + // divide remaining picture width into uniform tile columns + uint32_t uniformTileColWidth = m_tileColWidth[colIdx-1]; + while( remainingWidthInCtu > 0 ) + { + CHECK(colIdx >= MAX_TILE_COLS, "Number of tile columns exceeds valid range"); + uniformTileColWidth = std::min(remainingWidthInCtu, uniformTileColWidth); + m_tileColWidth.push_back( uniformTileColWidth ); + remainingWidthInCtu -= uniformTileColWidth; + colIdx++; + } + m_numTileCols = colIdx; + + // check explicit tile row sizes + uint32_t remainingHeightInCtu = m_picHeightInCtu; + for( rowIdx = 0; rowIdx < m_numExpTileRows; rowIdx++ ) + { + CHECK(m_tileRowHeight[rowIdx] > remainingHeightInCtu, "Tile row height exceeds picture height"); + remainingHeightInCtu -= m_tileRowHeight[rowIdx]; + } + + // divide remaining picture height into uniform tile rows + uint32_t uniformTileRowHeight = m_tileRowHeight[rowIdx - 1]; + while( remainingHeightInCtu > 0 ) + { + CHECK(rowIdx >= MAX_TILE_ROWS, "Number of tile rows exceeds valid range"); + uniformTileRowHeight = std::min(remainingHeightInCtu, uniformTileRowHeight); + m_tileRowHeight.push_back( uniformTileRowHeight ); + remainingHeightInCtu -= uniformTileRowHeight; + rowIdx++; + } + m_numTileRows = rowIdx; + + // set left column bounaries + m_tileColBd.push_back( 0 ); + for( colIdx = 0; colIdx < m_numTileCols; colIdx++ ) + { + m_tileColBd.push_back( m_tileColBd[ colIdx ] + m_tileColWidth[ colIdx ] ); + } + + // set top row bounaries + m_tileRowBd.push_back( 0 ); + for( rowIdx = 0; rowIdx < m_numTileRows; rowIdx++ ) + { + m_tileRowBd.push_back( m_tileRowBd[ rowIdx ] + m_tileRowHeight[ rowIdx ] ); + } + + // set mapping between horizontal CTU address and tile column index + colIdx = 0; + for( ctuX = 0; ctuX <= m_picWidthInCtu; ctuX++ ) + { + if( ctuX == m_tileColBd[ colIdx + 1 ] ) + { + colIdx++; + } + m_ctuToTileCol.push_back( colIdx ); + } + + // set mapping between vertical CTU address and tile row index + rowIdx = 0; + for( ctuY = 0; ctuY <= m_picHeightInCtu; ctuY++ ) + { + if( ctuY == m_tileRowBd[ rowIdx + 1 ] ) + { + rowIdx++; + } + m_ctuToTileRow.push_back( rowIdx ); + } +} + +/** + - initialize memory for rectangular slice parameters + */ +void PPS::initRectSlices() +{ + CHECK(m_numSlicesInPic > MAX_SLICES, "Number of slices in picture exceeds valid range"); + m_rectSlices.resize(m_numSlicesInPic); +} + +/** + - initialize mapping between rectangular slices and CTUs + */ +void PPS::initRectSliceMap() +{ + uint32_t ctuY; + uint32_t tileX, tileY; + + // allocate new memory for slice list + CHECK(m_numSlicesInPic > MAX_SLICES, "Number of slices in picture exceeds valid range"); + m_sliceMap.resize( m_numSlicesInPic ); + if ((getNumSubPics() > 0) && getSingleSlicePerSubPicFlag()) + { + for (uint32_t i = 0; i <= getNumSubPics() - 1; i++) + { + m_sliceMap[i].initSliceMap(); + } + uint32_t picSizeInCtu = getPicWidthInCtu() * getPicHeightInCtu(); + uint32_t sliceIdx; + for (uint32_t i = 0; i < picSizeInCtu; i++) + { + sliceIdx = getCtuToSubPicIdx(i); + m_sliceMap[sliceIdx].pushToCtuAddrInSlice(i); + } + } + else + { + // generate CTU maps for all rectangular slices in picture + for( uint32_t i = 0; i < m_numSlicesInPic; i++ ) + { + m_sliceMap[ i ].initSliceMap(); + + // get position of first tile in slice + tileX = m_rectSlices[ i ].getTileIdx() % m_numTileCols; + tileY = m_rectSlices[ i ].getTileIdx() / m_numTileCols; + + // infer slice size for last slice in picture + if( i == m_numSlicesInPic-1 ) + { + m_rectSlices[ i ].setSliceWidthInTiles ( m_numTileCols - tileX ); + m_rectSlices[ i ].setSliceHeightInTiles( m_numTileRows - tileY ); + m_rectSlices[ i ].setNumSlicesInTile( 1 ); + } + + // set slice index + m_sliceMap[ i ].setSliceID(i); + + // complete tiles within a single slice case + if( m_rectSlices[ i ].getSliceWidthInTiles( ) > 1 || m_rectSlices[ i ].getSliceHeightInTiles( ) > 1) + { + for( uint32_t j = 0; j < m_rectSlices[ i ].getSliceHeightInTiles( ); j++ ) + { + for( uint32_t k = 0; k < m_rectSlices[ i ].getSliceWidthInTiles( ); k++ ) + { + m_sliceMap[ i ].addCtusToSlice( getTileColumnBd(tileX + k), getTileColumnBd(tileX + k +1), + getTileRowBd(tileY + j), getTileRowBd(tileY + j +1), m_picWidthInCtu); + } + } + } + // multiple slices within a single tile case + else + { + uint32_t numSlicesInTile = m_rectSlices[ i ].getNumSlicesInTile( ); + + ctuY = getTileRowBd( tileY ); + for( uint32_t j = 0; j < numSlicesInTile-1; j++ ) + { + m_sliceMap[ i ].addCtusToSlice( getTileColumnBd(tileX), getTileColumnBd(tileX+1), + ctuY, ctuY + m_rectSlices[ i ].getSliceHeightInCtu(), m_picWidthInCtu); + ctuY += m_rectSlices[ i ].getSliceHeightInCtu(); + i++; + m_sliceMap[ i ].setSliceID(i); + } + + // infer slice height for last slice in tile + CHECK( ctuY >= getTileRowBd( tileY + 1 ), "Invalid rectangular slice signalling"); + m_rectSlices[ i ].setSliceHeightInCtu( getTileRowBd( tileY + 1 ) - ctuY ); + m_sliceMap[ i ].addCtusToSlice( getTileColumnBd(tileX), getTileColumnBd(tileX+1), + ctuY, getTileRowBd( tileY + 1 ), m_picWidthInCtu); + } + } + } + // check for valid rectangular slice map + checkSliceMap(); +} + +void PPS::initRasterSliceMap( std::vector<uint32_t> numTilesInSlice ) +{ + uint32_t tileIdx = 0; + setNumSlicesInPic( (uint32_t) numTilesInSlice.size() ); + + // allocate new memory for slice list + CHECK(m_numSlicesInPic > MAX_SLICES, "Number of slices in picture exceeds valid range"); + m_sliceMap.resize( m_numSlicesInPic ); + + for( uint32_t sliceIdx = 0; sliceIdx < numTilesInSlice.size(); sliceIdx++ ) + { + m_sliceMap[sliceIdx].initSliceMap(); + m_sliceMap[sliceIdx].setSliceID( tileIdx ); + m_sliceMap[sliceIdx].setNumTilesInSlice( numTilesInSlice[sliceIdx] ); + for( uint32_t idx = 0; idx < numTilesInSlice[sliceIdx]; idx++ ) + { + uint32_t tileX = tileIdx % getNumTileColumns(); + uint32_t tileY = tileIdx / getNumTileColumns(); + CHECK(tileY >= getNumTileRows(), "Number of tiles in slice exceeds the remaining number of tiles in picture"); + + m_sliceMap[sliceIdx].addCtusToSlice(getTileColumnBd(tileX), getTileColumnBd(tileX + 1), + getTileRowBd(tileY), getTileRowBd(tileY + 1), + getPicWidthInCtu()); + tileIdx++; + } + } + + // check for valid raster-scan slice map + checkSliceMap(); } -PPS::~PPS() +/** + - check if slice map covers the entire picture without skipping or duplicating any CTU positions + */ +void PPS::checkSliceMap() { - delete pcv; + uint32_t i; + std::vector<uint32_t> ctuList, sliceList; + uint32_t picSizeInCtu = getPicWidthInCtu() * getPicHeightInCtu(); + for( i = 0; i < m_numSlicesInPic; i++ ) + { + sliceList = m_sliceMap[ i ].getCtuAddrList(); + ctuList.insert( ctuList.end(), sliceList.begin(), sliceList.end() ); + } + CHECK( ctuList.size() < picSizeInCtu, "Slice map contains too few CTUs"); + CHECK( ctuList.size() > picSizeInCtu, "Slice map contains too many CTUs"); + std::sort( ctuList.begin(), ctuList.end() ); + for( i = 1; i < ctuList.size(); i++ ) + { + CHECK( ctuList[i] > ctuList[i-1]+1, "CTU missing in slice map"); + CHECK( ctuList[i] == ctuList[i-1], "CTU duplicated in slice map"); + } } APS::APS() : m_APSId(0) +, m_temporalId( 0 ) +, m_layerId( 0 ) { } APS::~APS() { } -ReferencePictureSet::ReferencePictureSet() -: m_numberOfPictures (0) -, m_numberOfNegativePictures (0) -, m_numberOfPositivePictures (0) -, m_numberOfLongtermPictures (0) -, m_interRPSPrediction (0) -, m_deltaRIdxMinus1 (0) -, m_deltaRPS (0) -, m_numRefIdc (0) -{ - ::memset( m_deltaPOC, 0, sizeof(m_deltaPOC) ); - ::memset( m_POC, 0, sizeof(m_POC) ); - ::memset( m_used, 0, sizeof(m_used) ); - ::memset( m_refIdc, 0, sizeof(m_refIdc) ); - ::memset( m_bCheckLTMSB, 0, sizeof(m_bCheckLTMSB) ); - ::memset( m_pocLSBLT, 0, sizeof(m_pocLSBLT) ); - ::memset( m_deltaPOCMSBCycleLT, 0, sizeof(m_deltaPOCMSBCycleLT) ); - ::memset( m_deltaPocMSBPresentFlag, 0, sizeof(m_deltaPocMSBPresentFlag) ); -} -ReferencePictureSet::~ReferencePictureSet() +ReferencePictureList::ReferencePictureList( const bool interLayerPicPresentFlag ) + : m_numberOfShorttermPictures(0) + , m_numberOfLongtermPictures(0) + , m_numberOfActivePictures(MAX_INT) + , m_ltrp_in_slice_header_flag(0) + , m_interLayerPresentFlag( interLayerPicPresentFlag ) + , m_numberOfInterLayerPictures( 0 ) { + ::memset(m_isLongtermRefPic, 0, sizeof(m_isLongtermRefPic)); + ::memset(m_refPicIdentifier, 0, sizeof(m_refPicIdentifier)); + ::memset(m_POC, 0, sizeof(m_POC)); + ::memset( m_isInterLayerRefPic, 0, sizeof( m_isInterLayerRefPic ) ); + ::memset( m_interLayerRefPicIdx, 0, sizeof( m_interLayerRefPicIdx ) ); } -void ReferencePictureSet::setUsed(int bufferNum, bool used) +ReferencePictureList::~ReferencePictureList() { - m_used[bufferNum] = used; } -void ReferencePictureSet::setDeltaPOC(int bufferNum, int deltaPOC) +void ReferencePictureList::setRefPicIdentifier( int idx, int identifier, bool isLongterm, bool isInterLayerRefPic, int interLayerIdx ) { - m_deltaPOC[bufferNum] = deltaPOC; -} + m_refPicIdentifier[idx] = identifier; + m_isLongtermRefPic[idx] = isLongterm; -void ReferencePictureSet::setNumberOfPictures(int numberOfPictures) -{ - m_numberOfPictures = numberOfPictures; -} + m_deltaPocMSBPresentFlag[idx] = false; + m_deltaPOCMSBCycleLT[idx] = 0; -int ReferencePictureSet::getUsed(int bufferNum) const -{ - return m_used[bufferNum]; + m_isInterLayerRefPic[idx] = isInterLayerRefPic; + m_interLayerRefPicIdx[idx] = interLayerIdx; } -int ReferencePictureSet::getDeltaPOC(int bufferNum) const +int ReferencePictureList::getRefPicIdentifier(int idx) const { - return m_deltaPOC[bufferNum]; + return m_refPicIdentifier[idx]; } -int ReferencePictureSet::getNumberOfPictures() const -{ - return m_numberOfPictures; -} -int ReferencePictureSet::getPOC(int bufferNum) const +bool ReferencePictureList::isRefPicLongterm(int idx) const { - return m_POC[bufferNum]; + return m_isLongtermRefPic[idx]; } -void ReferencePictureSet::setPOC(int bufferNum, int POC) +void ReferencePictureList::setNumberOfShorttermPictures(int numberOfStrp) { - m_POC[bufferNum] = POC; + m_numberOfShorttermPictures = numberOfStrp; } -bool ReferencePictureSet::getCheckLTMSBPresent(int bufferNum) const +int ReferencePictureList::getNumberOfShorttermPictures() const { - return m_bCheckLTMSB[bufferNum]; + return m_numberOfShorttermPictures; } -void ReferencePictureSet::setCheckLTMSBPresent(int bufferNum, bool b) +void ReferencePictureList::setNumberOfLongtermPictures(int numberOfLtrp) { - m_bCheckLTMSB[bufferNum] = b; + m_numberOfLongtermPictures = numberOfLtrp; } -//! set the reference idc value at uiBufferNum entry to the value of iRefIdc -void ReferencePictureSet::setRefIdc(int bufferNum, int refIdc) +int ReferencePictureList::getNumberOfLongtermPictures() const { - m_refIdc[bufferNum] = refIdc; + return m_numberOfLongtermPictures; } -//! get the reference idc value at uiBufferNum -int ReferencePictureSet::getRefIdc(int bufferNum) const +void ReferencePictureList::setPOC(int idx, int POC) { - return m_refIdc[bufferNum]; + m_POC[idx] = POC; } -/** Sorts the deltaPOC and Used by current values in the RPS based on the deltaPOC values. - * deltaPOC values are sorted with -ve values before the +ve values. -ve values are in decreasing order. - * +ve values are in increasing order. - * \returns void - */ -void ReferencePictureSet::sortDeltaPOC() +int ReferencePictureList::getPOC(int idx) const { - // sort in increasing order (smallest first) - for(int j=1; j < getNumberOfPictures(); j++) - { - int deltaPOC = getDeltaPOC(j); - bool used = getUsed(j); - for (int k=j-1; k >= 0; k--) - { - int temp = getDeltaPOC(k); - if (deltaPOC < temp) - { - setDeltaPOC(k+1, temp); - setUsed(k+1, getUsed(k)); - setDeltaPOC(k, deltaPOC); - setUsed(k, used); - } - } - } - // flip the negative values to largest first - int numNegPics = getNumberOfNegativePictures(); - for(int j=0, k=numNegPics-1; j < numNegPics>>1; j++, k--) - { - int deltaPOC = getDeltaPOC(j); - bool used = getUsed(j); - setDeltaPOC(j, getDeltaPOC(k)); - setUsed(j, getUsed(k)); - setDeltaPOC(k, deltaPOC); - setUsed(k, used); - } + return m_POC[idx]; } -/** Prints the deltaPOC and RefIdc (if available) values in the RPS. - * A "*" is added to the deltaPOC value if it is Used bu current. - * \returns void - */ -void ReferencePictureSet::printDeltaPOC() const +void ReferencePictureList::setNumberOfActivePictures(int numberActive) { - DTRACE( g_trace_ctx, D_RPSINFO, "DeltaPOC = { " ); - for(int j=0; j < getNumberOfPictures(); j++) - { - DTRACE( g_trace_ctx, D_RPSINFO, "%d%s ", getDeltaPOC( j ), ( getUsed( j ) == 1 ) ? "*" : "" ); - } - if (getInterRPSPrediction()) - { - DTRACE( g_trace_ctx, D_RPSINFO, "}, RefIdc = { " ); - for(int j=0; j < getNumRefIdc(); j++) - { - DTRACE( g_trace_ctx, D_RPSINFO, "%d ", getRefIdc( j ) ); - } - } - DTRACE( g_trace_ctx, D_RPSINFO, "}\n" ); + m_numberOfActivePictures = numberActive; } -RefPicListModification::RefPicListModification() -: m_refPicListModificationFlagL0 (false) -, m_refPicListModificationFlagL1 (false) +int ReferencePictureList::getNumberOfActivePictures() const { - ::memset( m_RefPicSetIdxL0, 0, sizeof(m_RefPicSetIdxL0) ); - ::memset( m_RefPicSetIdxL1, 0, sizeof(m_RefPicSetIdxL1) ); + return m_numberOfActivePictures; } -RefPicListModification::~RefPicListModification() +void ReferencePictureList::printRefPicInfo() const { + //DTRACE(g_trace_ctx, D_RPSINFO, "RefPics = { "); + printf("RefPics = { "); + int numRefPic = getNumberOfShorttermPictures() + getNumberOfLongtermPictures(); + for (int ii = 0; ii < numRefPic; ii++) + { + //DTRACE(g_trace_ctx, D_RPSINFO, "%d%s ", m_refPicIdentifier[ii], (m_isLongtermRefPic[ii] == 1) ? "[LT]" : "[ST]"); + printf("%d%s ", m_refPicIdentifier[ii], (m_isLongtermRefPic[ii] == 1) ? "[LT]" : "[ST]"); + } + //DTRACE(g_trace_ctx, D_RPSINFO, "}\n"); + printf("}\n"); } -#if HEVC_USE_SCALING_LISTS ScalingList::ScalingList() { - for(uint32_t sizeId = 0; sizeId < SCALING_LIST_SIZE_NUM; sizeId++) + m_disableScalingMatrixForLfnstBlks = true; + for (uint32_t scalingListId = 0; scalingListId < 28; scalingListId++) { - for(uint32_t listId = 0; listId < SCALING_LIST_NUM; listId++) - { - m_scalingListCoef[sizeId][listId].resize(std::min<int>(MAX_MATRIX_COEF_NUM,(int)g_scalingListSize[sizeId])); - } + int matrixSize = (scalingListId < SCALING_LIST_1D_START_4x4) ? 2 : (scalingListId < SCALING_LIST_1D_START_8x8) ? 4 : 8; + m_scalingListCoef[scalingListId].resize(matrixSize*matrixSize); } } @@ -2111,34 +2409,40 @@ ScalingList::ScalingList() */ void ScalingList::setDefaultScalingList() { - for(uint32_t sizeId = 0; sizeId < SCALING_LIST_SIZE_NUM; sizeId++) + for (uint32_t scalingListId = 0; scalingListId < 28; scalingListId++) { - for(uint32_t listId=0;listId<SCALING_LIST_NUM;listId++) - { - processDefaultMatrix(sizeId, listId); - } + processDefaultMatrix(scalingListId); } } /** check if use default quantization matrix - * \returns true if use default quantization matrix in all size + * \returns true if the scaling list is not equal to the default quantization matrix */ -bool ScalingList::checkDefaultScalingList() +bool ScalingList::isNotDefaultScalingList() { - uint32_t defaultCounter=0; - - for( uint32_t sizeId = 0; sizeId < SCALING_LIST_SIZE_NUM; sizeId++ ) + bool isAllDefault = true; + for (uint32_t scalingListId = 0; scalingListId < 28; scalingListId++) { - for(uint32_t listId=0;listId<SCALING_LIST_NUM;listId++) + int matrixSize = (scalingListId < SCALING_LIST_1D_START_4x4) ? 2 : (scalingListId < SCALING_LIST_1D_START_8x8) ? 4 : 8; + if (scalingListId < SCALING_LIST_1D_START_16x16) + { + if (::memcmp(getScalingListAddress(scalingListId), getScalingListDefaultAddress(scalingListId), sizeof(int) * matrixSize * matrixSize)) + { + isAllDefault = false; + break; + } + } + else { - if( !::memcmp(getScalingListAddress(sizeId,listId), getScalingListDefaultAddress(sizeId, listId),sizeof(int)*std::min(MAX_MATRIX_COEF_NUM,(int)g_scalingListSize[sizeId])) // check value of matrix - && ((sizeId < SCALING_LIST_16x16) || (getScalingListDC(sizeId,listId) == 16))) // check DC value + if ((::memcmp(getScalingListAddress(scalingListId), getScalingListDefaultAddress(scalingListId), sizeof(int) * MAX_MATRIX_COEF_NUM)) || (getScalingListDC(scalingListId) != 16)) { - defaultCounter++; + isAllDefault = false; + break; } } + if (!isAllDefault) break; } - return (defaultCounter == (SCALING_LIST_NUM * SCALING_LIST_SIZE_NUM )) ? false : true; + return !isAllDefault; } /** get scaling matrix from RefMatrixID @@ -2146,27 +2450,167 @@ bool ScalingList::checkDefaultScalingList() * \param listId index of input matrix * \param refListId index of reference matrix */ -void ScalingList::processRefMatrix( uint32_t sizeId, uint32_t listId , uint32_t refListId ) +int ScalingList::lengthUvlc(int uiCode) { - ::memcpy(getScalingListAddress(sizeId, listId),((listId == refListId)? getScalingListDefaultAddress(sizeId, refListId): getScalingListAddress(sizeId, refListId)),sizeof(int)*std::min(MAX_MATRIX_COEF_NUM,(int)g_scalingListSize[sizeId])); + if (uiCode < 0) printf("Error UVLC! \n"); + + int uiLength = 1; + int uiTemp = ++uiCode; + + CHECK(!uiTemp, "Integer overflow"); + + while (1 != uiTemp) + { + uiTemp >>= 1; + uiLength += 2; + } + return (uiLength >> 1) + ((uiLength + 1) >> 1); } +int ScalingList::lengthSvlc(int uiCode) +{ + uint32_t uiCode2 = uint32_t(uiCode <= 0 ? (-uiCode) << 1 : (uiCode << 1) - 1); + int uiLength = 1; + int uiTemp = ++uiCode2; -void ScalingList::checkPredMode(uint32_t sizeId, uint32_t listId) + CHECK(!uiTemp, "Integer overflow"); + + while (1 != uiTemp) + { + uiTemp >>= 1; + uiLength += 2; + } + return (uiLength >> 1) + ((uiLength + 1) >> 1); +} +void ScalingList::codePredScalingList(int* scalingList, const int* scalingListPred, int scalingListDC, int scalingListPredDC, int scalingListId, int& bitsCost) //sizeId, listId is current to-be-coded matrix idx { - int predListStep = (sizeId == SCALING_LIST_32x32? (SCALING_LIST_NUM/NUMBER_OF_PREDICTION_MODES) : 1); // if 32x32, skip over chroma entries. + int deltaValue = 0; + int matrixSize = (scalingListId < SCALING_LIST_1D_START_4x4) ? 2 : (scalingListId < SCALING_LIST_1D_START_8x8) ? 4 : 8; + int coefNum = matrixSize*matrixSize; + ScanElement *scan = g_scanOrder[SCAN_UNGROUPED][SCAN_DIAG][gp_sizeIdxInfo->idxFrom(matrixSize)][gp_sizeIdxInfo->idxFrom(matrixSize)]; + int nextCoef = 0; - for(int predListIdx = (int)listId ; predListIdx >= 0; predListIdx-=predListStep) + int8_t data; + const int *src = scalingList; + const int *srcPred = scalingListPred; + if (scalingListDC!=-1 && scalingListPredDC!=-1) { - if( !::memcmp(getScalingListAddress(sizeId,listId),((listId == predListIdx) ? - getScalingListDefaultAddress(sizeId, predListIdx): getScalingListAddress(sizeId, predListIdx)),sizeof(int)*std::min(MAX_MATRIX_COEF_NUM,(int)g_scalingListSize[sizeId])) // check value of matrix - && ((sizeId < SCALING_LIST_16x16) || (getScalingListDC(sizeId,listId) == getScalingListDC(sizeId,predListIdx)))) // check DC value - { - setRefMatrixId(sizeId, listId, predListIdx); - setScalingListPredModeFlag(sizeId, listId, false); + bitsCost += lengthSvlc((int8_t)(scalingListDC - scalingListPredDC - nextCoef)); + nextCoef = scalingListDC - scalingListPredDC; + } + else if ((scalingListDC != -1 && scalingListPredDC == -1)) + { + bitsCost += lengthSvlc((int8_t)(scalingListDC - srcPred[scan[0].idx] - nextCoef)); + nextCoef = scalingListDC - srcPred[scan[0].idx]; + } + else if ((scalingListDC == -1 && scalingListPredDC == -1)) + { + } + else + { + printf("Predictor DC mismatch! \n"); + } + for (int i = 0; i < coefNum; i++) + { + if (scalingListId >= SCALING_LIST_1D_START_64x64 && scan[i].x >= 4 && scan[i].y >= 4) + continue; + deltaValue = (src[scan[i].idx] - srcPred[scan[i].idx]); + data = (int8_t)(deltaValue - nextCoef); + nextCoef = deltaValue; + + bitsCost += lengthSvlc(data); + } +} +void ScalingList::codeScalingList(int* scalingList, int scalingListDC, int scalingListId, int& bitsCost) //sizeId, listId is current to-be-coded matrix idx +{ + int matrixSize = (scalingListId < SCALING_LIST_1D_START_4x4) ? 2 : (scalingListId < SCALING_LIST_1D_START_8x8) ? 4 : 8; + int coefNum = matrixSize * matrixSize; + ScanElement *scan = g_scanOrder[SCAN_UNGROUPED][SCAN_DIAG][gp_sizeIdxInfo->idxFrom(matrixSize)][gp_sizeIdxInfo->idxFrom(matrixSize)]; + int nextCoef = SCALING_LIST_START_VALUE; + int8_t data; + const int *src = scalingList; + + if (scalingListId >= SCALING_LIST_1D_START_16x16) + { + bitsCost += lengthSvlc(int8_t(getScalingListDC(scalingListId) - nextCoef)); + nextCoef = getScalingListDC(scalingListId); + } + + for (int i = 0; i < coefNum; i++) + { + if (scalingListId >= SCALING_LIST_1D_START_64x64 && scan[i].x >= 4 && scan[i].y >= 4) + continue; + data = int8_t(src[scan[i].idx] - nextCoef); + nextCoef = src[scan[i].idx]; + + bitsCost += lengthSvlc(data); + } +} +void ScalingList::CheckBestPredScalingList(int scalingListId, int predListId, int& BitsCount) +{ + //check previously coded matrix as a predictor, code "lengthUvlc" function + int *scalingList = getScalingListAddress(scalingListId); + const int *scalingListPred = (scalingListId == predListId) ? ((predListId < SCALING_LIST_1D_START_8x8) ? g_quantTSDefault4x4 : g_quantIntraDefault8x8) : getScalingListAddress(predListId); + int scalingListDC = (scalingListId >= SCALING_LIST_1D_START_16x16) ? getScalingListDC(scalingListId) : -1; + int scalingListPredDC = (predListId >= SCALING_LIST_1D_START_16x16) ? ((scalingListId == predListId) ? 16 : getScalingListDC(predListId)) : -1; + + int bitsCost = 0; + int matrixSize = (scalingListId < SCALING_LIST_1D_START_4x4) ? 2 : (scalingListId < SCALING_LIST_1D_START_8x8) ? 4 : 8; + int predMatrixSize = (predListId < SCALING_LIST_1D_START_4x4) ? 2 : (predListId < SCALING_LIST_1D_START_8x8) ? 4 : 8; + + if (matrixSize != predMatrixSize) printf("Predictor size mismatch! \n"); + + bitsCost = 2 + lengthUvlc(scalingListId - predListId); + //copy-flag + predictor-mode-flag + deltaListId + codePredScalingList(scalingList, scalingListPred, scalingListDC, scalingListPredDC, scalingListId, bitsCost); + BitsCount = bitsCost; +} +void ScalingList::processRefMatrix(uint32_t scalinListId, uint32_t refListId) +{ + int matrixSize = (scalinListId < SCALING_LIST_1D_START_4x4) ? 2 : (scalinListId < SCALING_LIST_1D_START_8x8) ? 4 : 8; + ::memcpy(getScalingListAddress(scalinListId), ((scalinListId == refListId) ? getScalingListDefaultAddress(refListId) : getScalingListAddress(refListId)), sizeof(int)*matrixSize*matrixSize); +} +void ScalingList::checkPredMode(uint32_t scalingListId) +{ + int bestBitsCount = MAX_INT; + int BitsCount = 2; + setScalingListPreditorModeFlag(scalingListId, false); + codeScalingList(getScalingListAddress(scalingListId), ((scalingListId >= SCALING_LIST_1D_START_16x16) ? getScalingListDC(scalingListId) : -1), scalingListId, BitsCount); + bestBitsCount = BitsCount; + + for (int predListIdx = (int)scalingListId; predListIdx >= 0; predListIdx--) + { + + int matrixSize = (scalingListId < SCALING_LIST_1D_START_4x4) ? 2 : (scalingListId < SCALING_LIST_1D_START_8x8) ? 4 : 8; + int predMatrixSize = (predListIdx < SCALING_LIST_1D_START_4x4) ? 2 : (predListIdx < SCALING_LIST_1D_START_8x8) ? 4 : 8; + if (((scalingListId == SCALING_LIST_1D_START_2x2 || scalingListId == SCALING_LIST_1D_START_4x4 || scalingListId == SCALING_LIST_1D_START_8x8) && predListIdx != (int)scalingListId) || matrixSize != predMatrixSize) + continue; + const int* refScalingList = (scalingListId == predListIdx) ? getScalingListDefaultAddress(predListIdx) : getScalingListAddress(predListIdx); + const int refDC = (predListIdx < SCALING_LIST_1D_START_16x16) ? refScalingList[0] : (scalingListId == predListIdx) ? 16 : getScalingListDC(predListIdx); + if (!::memcmp(getScalingListAddress(scalingListId), refScalingList, sizeof(int)*matrixSize*matrixSize) // check value of matrix + // check DC value + && (scalingListId < SCALING_LIST_1D_START_16x16 || getScalingListDC(scalingListId) == refDC)) + { + //copy mode + setRefMatrixId(scalingListId, predListIdx); + setScalingListCopyModeFlag(scalingListId, true); + setScalingListPreditorModeFlag(scalingListId, false); return; } + else + { + //predictor mode + //use previously coded matrix as a predictor + CheckBestPredScalingList(scalingListId, predListIdx, BitsCount); + if (BitsCount < bestBitsCount) + { + bestBitsCount = BitsCount; + setScalingListCopyModeFlag(scalingListId, false); + setScalingListPreditorModeFlag(scalingListId, true); + setRefMatrixId(scalingListId, predListIdx); + } + } } - setScalingListPredModeFlag(sizeId, listId, true); + setScalingListCopyModeFlag(scalingListId, false); } static void outputScalingListHelp(std::ostream &os) @@ -2187,11 +2631,11 @@ static void outputScalingListHelp(std::ostream &os) " <value>\n"; os << "The permitted matrix names are:\n"; - for(uint32_t sizeIdc = 0; sizeIdc < SCALING_LIST_SIZE_NUM; sizeIdc++) + for (uint32_t sizeIdc = SCALING_LIST_2x2; sizeIdc <= SCALING_LIST_64x64; sizeIdc++) { - for(uint32_t listIdc = 0; listIdc < SCALING_LIST_NUM; listIdc++) + for (uint32_t listIdc = 0; listIdc < SCALING_LIST_NUM; listIdc++) { - if ((sizeIdc!=SCALING_LIST_32x32) || (listIdc%(SCALING_LIST_NUM/NUMBER_OF_PREDICTION_MODES) == 0)) + if (!(((sizeIdc == SCALING_LIST_64x64) && (listIdc % (SCALING_LIST_NUM / SCALING_LIST_PRED_MODES) != 0)) || ((sizeIdc == SCALING_LIST_2x2) && (listIdc % (SCALING_LIST_NUM / SCALING_LIST_PRED_MODES) == 0)))) { os << " " << MatrixType[sizeIdc][listIdc] << '\n'; } @@ -2201,14 +2645,15 @@ static void outputScalingListHelp(std::ostream &os) void ScalingList::outputScalingLists(std::ostream &os) const { - for(uint32_t sizeIdc = 0; sizeIdc < SCALING_LIST_SIZE_NUM; sizeIdc++) + int scalingListId = 0; + for (uint32_t sizeIdc = SCALING_LIST_2x2; sizeIdc <= SCALING_LIST_64x64; sizeIdc++) { - const uint32_t size = std::min(8,4<<(sizeIdc)); + const uint32_t size = (sizeIdc == 1) ? 2 : ((sizeIdc == 2) ? 4 : 8); for(uint32_t listIdc = 0; listIdc < SCALING_LIST_NUM; listIdc++) { - if ((sizeIdc!=SCALING_LIST_32x32) || (listIdc%(SCALING_LIST_NUM/NUMBER_OF_PREDICTION_MODES) == 0)) + if (!((sizeIdc== SCALING_LIST_64x64 && listIdc % (SCALING_LIST_NUM / SCALING_LIST_PRED_MODES) != 0) || (sizeIdc == SCALING_LIST_2x2 && listIdc < 4))) { - const int *src = getScalingListAddress(sizeIdc, listIdc); + const int *src = getScalingListAddress(scalingListId); os << (MatrixType[sizeIdc][listIdc]) << " =\n "; for(uint32_t y=0; y<size; y++) { @@ -2220,9 +2665,10 @@ void ScalingList::outputScalingLists(std::ostream &os) const } if(sizeIdc > SCALING_LIST_8x8) { - os << MatrixType_DC[sizeIdc][listIdc] << " = \n " << std::setw(3) << getScalingListDC(sizeIdc, listIdc) << "\n"; + os << MatrixType_DC[sizeIdc][listIdc] << " = \n " << std::setw(3) << getScalingListDC(scalingListId) << "\n"; } os << "\n"; + scalingListId++; } } } @@ -2248,25 +2694,21 @@ bool ScalingList::xParseScalingList(const std::string &fileName) return true; } - for(uint32_t sizeIdc = SCALING_LIST_FIRST_CODED; sizeIdc < SCALING_LIST_SIZE_NUM; sizeIdc++) + int scalingListId = 0; + for (uint32_t sizeIdc = SCALING_LIST_2x2; sizeIdc <= SCALING_LIST_64x64; sizeIdc++)//2x2-128x128 { const uint32_t size = std::min(MAX_MATRIX_COEF_NUM,(int)g_scalingListSize[sizeIdc]); for(uint32_t listIdc = 0; listIdc < SCALING_LIST_NUM; listIdc++) { - int * const src = getScalingListAddress(sizeIdc, listIdc); - if ((sizeIdc==SCALING_LIST_32x32) && (listIdc%(SCALING_LIST_NUM/NUMBER_OF_PREDICTION_MODES) != 0)) // derive chroma32x32 from chroma16x16 + if ((sizeIdc == SCALING_LIST_64x64 && listIdc % (SCALING_LIST_NUM / SCALING_LIST_PRED_MODES) != 0) || (sizeIdc == SCALING_LIST_2x2 && listIdc < 4)) { - const int *srcNextSmallerSize = getScalingListAddress(sizeIdc-1, listIdc); - for(uint32_t i=0; i<size; i++) - { - src[i] = srcNextSmallerSize[i]; - } - setScalingListDC(sizeIdc,listIdc,(sizeIdc > SCALING_LIST_8x8) ? getScalingListDC(sizeIdc-1, listIdc) : src[0]); + continue; } else { + int * const src = getScalingListAddress(scalingListId); { fseek(fp, 0, SEEK_SET); bool bFound=false; @@ -2284,6 +2726,7 @@ bool ScalingList::xParseScalingList(const std::string &fileName) { msg( ERROR, "Error: cannot find Matrix %s from scaling list file %s\n", MatrixType[sizeIdc][listIdc], fileName.c_str()); return true; + } } for (uint32_t i=0; i<size; i++) @@ -2303,7 +2746,7 @@ bool ScalingList::xParseScalingList(const std::string &fileName) } //set DC value for default matrix check - setScalingListDC(sizeIdc,listIdc,src[0]); + setScalingListDC(scalingListId, src[0]); if(sizeIdc > SCALING_LIST_8x8) { @@ -2338,9 +2781,10 @@ bool ScalingList::xParseScalingList(const std::string &fileName) return true; } //overwrite DC value when size of matrix is larger than 16x16 - setScalingListDC(sizeIdc,listIdc,data); + setScalingListDC(scalingListId, data); } } + scalingListId++; } } // std::cout << "\n\nRead scaling lists of:\n\n"; @@ -2356,11 +2800,13 @@ bool ScalingList::xParseScalingList(const std::string &fileName) * \param listId list index * \returns pointer of quantization matrix */ -const int* ScalingList::getScalingListDefaultAddress(uint32_t sizeId, uint32_t listId) +const int* ScalingList::getScalingListDefaultAddress(uint32_t scalingListId) { const int *src = 0; - switch(sizeId) + int sizeId = (scalingListId < SCALING_LIST_1D_START_8x8) ? 2 : 3; + switch (sizeId) { + case SCALING_LIST_1x1: case SCALING_LIST_2x2: case SCALING_LIST_4x4: src = g_quantTSDefault4x4; @@ -2370,7 +2816,7 @@ const int* ScalingList::getScalingListDefaultAddress(uint32_t sizeId, uint32_t l case SCALING_LIST_32x32: case SCALING_LIST_64x64: case SCALING_LIST_128x128: - src = (listId < (SCALING_LIST_NUM/NUMBER_OF_PREDICTION_MODES) ) ? g_quantIntraDefault8x8 : g_quantInterDefault8x8; + src = g_quantInterDefault8x8; break; default: THROW( "Invalid scaling list" ); @@ -2384,43 +2830,36 @@ const int* ScalingList::getScalingListDefaultAddress(uint32_t sizeId, uint32_t l * \param sizeId size index * \param listId index of input matrix */ -void ScalingList::processDefaultMatrix(uint32_t sizeId, uint32_t listId) +void ScalingList::processDefaultMatrix(uint32_t scalingListId) { - ::memcpy(getScalingListAddress(sizeId, listId),getScalingListDefaultAddress(sizeId,listId),sizeof(int)*std::min(MAX_MATRIX_COEF_NUM,(int)g_scalingListSize[sizeId])); - setScalingListDC(sizeId,listId,SCALING_LIST_DC); + int matrixSize = (scalingListId < SCALING_LIST_1D_START_4x4) ? 2 : (scalingListId < SCALING_LIST_1D_START_8x8) ? 4 : 8; + ::memcpy(getScalingListAddress(scalingListId), getScalingListDefaultAddress(scalingListId), sizeof(int)*matrixSize*matrixSize); + setScalingListDC(scalingListId, SCALING_LIST_DC); } /** check DC value of matrix for default matrix signaling */ void ScalingList::checkDcOfMatrix() { - for(uint32_t sizeId = 0; sizeId < SCALING_LIST_SIZE_NUM; sizeId++) + for (uint32_t scalingListId = 0; scalingListId < 28; scalingListId++) { - for(uint32_t listId = 0; listId < SCALING_LIST_NUM; listId++) + //check default matrix? + if (getScalingListDC(scalingListId) == 0) { - //check default matrix? - if(getScalingListDC(sizeId,listId) == 0) - { - processDefaultMatrix(sizeId, listId); - } + processDefaultMatrix(scalingListId); } } } -#endif ParameterSetManager::ParameterSetManager() -#if HEVC_VPS -: m_vpsMap(MAX_NUM_VPS) -, m_spsMap(MAX_NUM_SPS) -#else : m_spsMap(MAX_NUM_SPS) -#endif , m_ppsMap(MAX_NUM_PPS) -, m_apsMap(MAX_NUM_APS) -#if HEVC_VPS -, m_activeVPSId(-1) -#endif +, m_apsMap(MAX_NUM_APS * MAX_NUM_APS_TYPE) +, m_dpsMap(MAX_NUM_DPS) +, m_vpsMap(MAX_NUM_VPS) +, m_activeDPSId(-1) , m_activeSPSId(-1) +, m_activeVPSId(-1) { } @@ -2456,11 +2895,7 @@ ParameterSetManager::~ParameterSetManager() // return false; //} -#if HEVC_VPS -//! activate a PPS and depending on isIDR parameter also SPS and VPS -#else //! activate a PPS and depending on isIDR parameter also SPS -#endif //! \returns true, if activation is successful bool ParameterSetManager::activatePPS(int ppsId, bool isIRAP) { @@ -2477,36 +2912,62 @@ bool ParameterSetManager::activatePPS(int ppsId, bool isIRAP) SPS *sps = m_spsMap.getPS(spsId); if (sps) { - -#if HEVC_VPS - int vpsId = sps->getVPSId(); - if (!isIRAP && (vpsId != m_activeVPSId )) + int dpsId = sps->getDecodingParameterSetId(); + if ((m_activeDPSId!=-1) && (dpsId != m_activeDPSId )) { - msg( WARNING, "Warning: tried to activate PPS referring to a inactive VPS at non-IDR."); + msg( WARNING, "Warning: tried to activate DPS with different ID than the currently active DPS. This should not happen within the same bitstream!"); } else { -#endif - m_spsMap.setActive(spsId); -#if HEVC_VPS - VPS *vps =m_vpsMap.getPS(vpsId); - if (vps) + if (dpsId != 0) { - m_activeVPSId = vpsId; - m_activeSPSId = spsId; - m_ppsMap.setActive(ppsId); - return true; + DPS *dps =m_dpsMap.getPS(dpsId); + if (dps) + { + m_activeDPSId = dpsId; + m_dpsMap.setActive(dpsId); + } + else + { + msg( WARNING, "Warning: tried to activate PPS that refers to a non-existing DPS."); + } } else { - msg( WARNING, "Warning: tried to activate PPS that refers to a non-existing VPS."); + // set zero as active DPS ID (special reserved value, no actual DPS) + m_activeDPSId = dpsId; + m_dpsMap.setActive(dpsId); } } -#else + + int vpsId = sps->getVPSId(); + if(vpsId != 0) + { + VPS *vps = m_vpsMap.getPS(vpsId); + if(vps) + { + m_activeVPSId = vpsId; + m_vpsMap.setActive(vpsId); + } + else + { + msg( WARNING, "Warning: tried to activate PPS that refers to non-existing VPS." ); + } + } + else + { + m_vpsMap.clear(); + m_vpsMap.allocatePS(0); + m_activeVPSId = 0; + m_vpsMap.setActive(0); + } + + m_spsMap.clear(); + m_spsMap.setActive(spsId); m_activeSPSId = spsId; + m_ppsMap.clear(); m_ppsMap.setActive(ppsId); return true; -#endif } else { @@ -2521,18 +2982,16 @@ bool ParameterSetManager::activatePPS(int ppsId, bool isIRAP) // Failed to activate if reach here. m_activeSPSId=-1; -#if HEVC_VPS - m_activeVPSId=-1; -#endif + m_activeDPSId=-1; return false; } -bool ParameterSetManager::activateAPS(int apsId) +bool ParameterSetManager::activateAPS(int apsId, int apsType) { - APS *aps = m_apsMap.getPS(apsId); + APS *aps = m_apsMap.getPS(apsId + (MAX_NUM_APS * apsType)); if (aps) { - m_apsMap.setActive(apsId); + m_apsMap.setActive(apsId + (MAX_NUM_APS * apsType)); return true; } else @@ -2559,35 +3018,22 @@ void ParameterSetMap<SPS>::setID(SPS* parameterSet, const int psId) parameterSet->setSPSId(psId); } -#if !JVET_M0101_HLS -ProfileTierLevel::ProfileTierLevel() - : m_profileSpace (0) - , m_tierFlag (Level::MAIN) - , m_profileIdc (Profile::NONE) - , m_levelIdc (Level::NONE) - , m_progressiveSourceFlag (false) - , m_interlacedSourceFlag (false) - , m_nonPackedConstraintFlag(false) - , m_frameOnlyConstraintFlag(false) +template <> +void ParameterSetMap<VPS>::setID(VPS* parameterSet, const int psId) { - ::memset(m_profileCompatibilityFlag, 0, sizeof(m_profileCompatibilityFlag)); + parameterSet->setVPSId(psId); } -PTL::PTL() -{ - ::memset(m_subLayerProfilePresentFlag, 0, sizeof(m_subLayerProfilePresentFlag)); - ::memset(m_subLayerLevelPresentFlag, 0, sizeof(m_subLayerLevelPresentFlag )); -} -#else ProfileTierLevel::ProfileTierLevel() : m_tierFlag (Level::MAIN) , m_profileIdc (Profile::NONE) + , m_numSubProfile(0) + , m_subProfileIdc(0) , m_levelIdc (Level::NONE) { ::memset(m_subLayerLevelPresentFlag, 0, sizeof(m_subLayerLevelPresentFlag )); ::memset(m_subLayerLevelIdc, Level::NONE, sizeof(m_subLayerLevelIdc )); } -#endif void calculateParameterSetChangedFlag(bool &bChanged, const std::vector<uint8_t> *pOldData, const std::vector<uint8_t> *pNewData) { @@ -2621,13 +3067,13 @@ void calculateParameterSetChangedFlag(bool &bChanged, const std::vector<uint8_t> uint32_t PreCalcValues::getValIdx( const Slice &slice, const ChannelType chType ) const { - return slice.isIRAP() ? ( ISingleTree ? 0 : ( chType << 1 ) ) : 1; + return slice.isIntra() ? ( ISingleTree ? 0 : ( chType << 1 ) ) : 1; } uint32_t PreCalcValues::getMaxBtDepth( const Slice &slice, const ChannelType chType ) const { - if ( slice.getSplitConsOverrideFlag() ) - return (!slice.isIRAP() || isLuma(chType) || ISingleTree) ? slice.getMaxBTDepth() : slice.getMaxBTDepthIChroma(); + if ( slice.getPicHeader()->getSplitConsOverrideFlag() ) + return slice.getPicHeader()->getMaxMTTHierarchyDepth( slice.getSliceType(), ISingleTree ? CHANNEL_TYPE_LUMA : chType); else return maxBtDepth[getValIdx( slice, chType )]; } @@ -2639,8 +3085,8 @@ uint32_t PreCalcValues::getMinBtSize( const Slice &slice, const ChannelType chTy uint32_t PreCalcValues::getMaxBtSize( const Slice &slice, const ChannelType chType ) const { - if (slice.getSplitConsOverrideFlag()) - return (!slice.isIRAP() || isLuma(chType) || ISingleTree) ? slice.getMaxBTSize() : slice.getMaxBTSizeIChroma(); + if (slice.getPicHeader()->getSplitConsOverrideFlag()) + return slice.getPicHeader()->getMaxBTSize( slice.getSliceType(), ISingleTree ? CHANNEL_TYPE_LUMA : chType); else return maxBtSize[getValIdx(slice, chType)]; } @@ -2652,26 +3098,209 @@ uint32_t PreCalcValues::getMinTtSize( const Slice &slice, const ChannelType chTy uint32_t PreCalcValues::getMaxTtSize( const Slice &slice, const ChannelType chType ) const { - if ( slice.getSplitConsOverrideFlag() ) - return (!slice.isIRAP() || isLuma(chType) || ISingleTree) ? slice.getMaxTTSize() : slice.getMaxTTSizeIChroma(); + if (slice.getPicHeader()->getSplitConsOverrideFlag()) + return slice.getPicHeader()->getMaxTTSize( slice.getSliceType(), ISingleTree ? CHANNEL_TYPE_LUMA : chType); else return maxTtSize[getValIdx( slice, chType )]; } uint32_t PreCalcValues::getMinQtSize( const Slice &slice, const ChannelType chType ) const { - if ( slice.getSplitConsOverrideFlag() ) - return (!slice.isIRAP() || isLuma(chType) || ISingleTree) ? slice.getMinQTSize() : slice.getMinQTSizeIChroma(); + if (slice.getPicHeader()->getSplitConsOverrideFlag()) + return slice.getPicHeader()->getMinQTSize( slice.getSliceType(), ISingleTree ? CHANNEL_TYPE_LUMA : chType); else return minQtSize[getValIdx( slice, chType )]; } +void Slice::scaleRefPicList( Picture *scaledRefPic[ ], PicHeader *picHeader, APS** apss, APS* lmcsAps, APS* scalingListAps, const bool isDecoder ) +{ + int i; + const SPS* sps = getSPS(); + const PPS* pps = getPPS(); + + bool refPicIsSameRes = false; + + // this is needed for IBC + m_pcPic->unscaledPic = m_pcPic; + + if( m_eSliceType == I_SLICE ) + { + return; + } + + freeScaledRefPicList( scaledRefPic ); + + for( int refList = 0; refList < NUM_REF_PIC_LIST_01; refList++ ) + { + if( refList == 1 && m_eSliceType != B_SLICE ) + { + continue; + } + + for( int rIdx = 0; rIdx < m_aiNumRefIdx[refList]; rIdx++ ) + { + // if rescaling is needed, otherwise just reuse the original picture pointer; it is needed for motion field, otherwise motion field requires a copy as well + // reference resampling for the whole picture is not applied at decoder + + int xScale, yScale; + CU::getRprScaling( sps, pps, m_apcRefPicList[refList][rIdx], xScale, yScale ); + m_scalingRatio[refList][rIdx] = std::pair<int, int>( xScale, yScale ); + +#if JVET_Q0487_SCALING_WINDOW_ISSUES + if( m_apcRefPicList[refList][rIdx]->isRefScaled( pps ) == false ) +#else + if( m_scalingRatio[refList][rIdx] == SCALE_1X && pps->getPicWidthInLumaSamples() == m_apcRefPicList[refList][rIdx]->getPicWidthInLumaSamples() && pps->getPicHeightInLumaSamples() == m_apcRefPicList[refList][rIdx]->getPicHeightInLumaSamples() ) +#endif + { + refPicIsSameRes = true; + } + + if( m_scalingRatio[refList][rIdx] == SCALE_1X || isDecoder ) + { + m_scaledRefPicList[refList][rIdx] = m_apcRefPicList[refList][rIdx]; + } + else + { + int poc = m_apcRefPicList[refList][rIdx]->getPOC(); + // check whether the reference picture has already been scaled + for( i = 0; i < MAX_NUM_REF; i++ ) + { + if( scaledRefPic[i] != nullptr && scaledRefPic[i]->poc == poc ) + { + break; + } + } + + if( i == MAX_NUM_REF ) + { + int j; + // search for unused Picture structure in scaledRefPic + for( j = 0; j < MAX_NUM_REF; j++ ) + { + if( scaledRefPic[j] == nullptr ) + { + break; + } + } + + CHECK( j >= MAX_NUM_REF, "scaledRefPic can not hold all reference pictures!" ); + + if( j >= MAX_NUM_REF ) + { + j = 0; + } + + if( scaledRefPic[j] == nullptr ) + { + scaledRefPic[j] = new Picture; + + scaledRefPic[j]->setBorderExtension( false ); + scaledRefPic[j]->reconstructed = false; + scaledRefPic[j]->referenced = true; + + scaledRefPic[j]->finalInit( m_pcPic->cs->vps, *sps, *pps, picHeader, apss, lmcsAps, scalingListAps ); + + scaledRefPic[j]->poc = -1; + + scaledRefPic[j]->create( sps->getChromaFormatIdc(), Size( pps->getPicWidthInLumaSamples(), pps->getPicHeightInLumaSamples() ), sps->getMaxCUWidth(), sps->getMaxCUWidth() + 16, isDecoder, m_pcPic->layerId ); + } + + scaledRefPic[j]->poc = poc; + scaledRefPic[j]->longTerm = m_apcRefPicList[refList][rIdx]->longTerm; + + // rescale the reference picture + const bool downsampling = m_apcRefPicList[refList][rIdx]->getRecoBuf().Y().width >= scaledRefPic[j]->getRecoBuf().Y().width && m_apcRefPicList[refList][rIdx]->getRecoBuf().Y().height >= scaledRefPic[j]->getRecoBuf().Y().height; + Picture::rescalePicture( m_scalingRatio[refList][rIdx], + m_apcRefPicList[refList][rIdx]->getRecoBuf(), m_apcRefPicList[refList][rIdx]->slices[0]->getPPS()->getScalingWindow(), + scaledRefPic[j]->getRecoBuf(), pps->getScalingWindow(), + sps->getChromaFormatIdc(), sps->getBitDepths(), true, downsampling, + sps->getHorCollocatedChromaFlag(), sps->getVerCollocatedChromaFlag() ); + scaledRefPic[j]->extendPicBorder(); + + m_scaledRefPicList[refList][rIdx] = scaledRefPic[j]; + } + else + { + m_scaledRefPicList[refList][rIdx] = scaledRefPic[i]; + } + } + } + } + + // make the scaled reference picture list as the default reference picture list + for( int refList = 0; refList < NUM_REF_PIC_LIST_01; refList++ ) + { + if( refList == 1 && m_eSliceType != B_SLICE ) + { + continue; + } + + for( int rIdx = 0; rIdx < m_aiNumRefIdx[refList]; rIdx++ ) + { + m_savedRefPicList[refList][rIdx] = m_apcRefPicList[refList][rIdx]; + m_apcRefPicList[refList][rIdx] = m_scaledRefPicList[refList][rIdx]; + + // allow the access of the unscaled version in xPredInterBlk() + m_apcRefPicList[refList][rIdx]->unscaledPic = m_savedRefPicList[refList][rIdx]; + } + } + + //Make sure that TMVP is disabled when there are no reference pictures with the same resolution + if(!refPicIsSameRes) + { + CHECK(getPicHeader()->getEnableTMVPFlag() != 0, "TMVP cannot be enabled in pictures that have no reference pictures with the same resolution") + } +} + +void Slice::freeScaledRefPicList( Picture *scaledRefPic[] ) +{ + if( m_eSliceType == I_SLICE ) + { + return; + } + for( int i = 0; i < MAX_NUM_REF; i++ ) + { + if( scaledRefPic[i] != nullptr ) + { + scaledRefPic[i]->destroy(); + scaledRefPic[i] = nullptr; + } + } +} + +bool Slice::checkRPR() +{ + const PPS* pps = getPPS(); + + for( int refList = 0; refList < NUM_REF_PIC_LIST_01; refList++ ) + { + + if( refList == 1 && m_eSliceType != B_SLICE ) + { + continue; + } + + for( int rIdx = 0; rIdx < m_aiNumRefIdx[refList]; rIdx++ ) + { + if( m_scaledRefPicList[refList][rIdx]->cs->pcv->lumaWidth != pps->getPicWidthInLumaSamples() || m_scaledRefPicList[refList][rIdx]->cs->pcv->lumaHeight != pps->getPicHeightInLumaSamples() ) + { + return true; + } + } + } + + return false; +} + #if ENABLE_TRACING -#if HEVC_VPS void xTraceVPSHeader() { DTRACE( g_trace_ctx, D_HEADER, "=========== Video Parameter Set ===========\n" ); } -#endif + +void xTraceDPSHeader() +{ + DTRACE( g_trace_ctx, D_HEADER, "=========== Decoding Parameter Set ===========\n" ); +} void xTraceSPSHeader() { @@ -2688,6 +3317,11 @@ void xTraceAPSHeader() DTRACE(g_trace_ctx, D_HEADER, "=========== Adaptation Parameter Set ===========\n"); } +void xTracePictureHeader() +{ + DTRACE( g_trace_ctx, D_HEADER, "=========== Picture Header ===========\n" ); +} + void xTraceSliceHeader() { DTRACE( g_trace_ctx, D_HEADER, "=========== Slice ===========\n" ); diff --git a/source/Lib/CommonLib/Slice.h b/source/Lib/CommonLib/Slice.h index 3ccca6122a9690e81be248ce587ec8d6b9ddcedb..fa723a03d733cc43496646418f2c66d28ed43eac 100644 --- a/source/Lib/CommonLib/Slice.h +++ b/source/Lib/CommonLib/Slice.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -46,6 +46,9 @@ #include "Rom.h" #include "ChromaFormat.h" #include "Common.h" +#include "HRD.h" +#include <unordered_map> +#include "AlfParameters.h" //! \ingroup CommonLib //! \{ @@ -68,130 +71,161 @@ typedef std::list<Picture*> PicList; // Class definition // ==================================================================================================================== -/// Reference Picture Set class -class ReferencePictureSet + +class ReferencePictureList { private: - int m_numberOfPictures; - int m_numberOfNegativePictures; - int m_numberOfPositivePictures; - int m_numberOfLongtermPictures; - int m_deltaPOC[MAX_NUM_REF_PICS]; - int m_POC[MAX_NUM_REF_PICS]; - bool m_used[MAX_NUM_REF_PICS]; - bool m_interRPSPrediction; - int m_deltaRIdxMinus1; - int m_deltaRPS; - int m_numRefIdc; - int m_refIdc[MAX_NUM_REF_PICS+1]; - bool m_bCheckLTMSB[MAX_NUM_REF_PICS]; - int m_pocLSBLT[MAX_NUM_REF_PICS]; - int m_deltaPOCMSBCycleLT[MAX_NUM_REF_PICS]; - bool m_deltaPocMSBPresentFlag[MAX_NUM_REF_PICS]; + int m_numberOfShorttermPictures; + int m_numberOfLongtermPictures; + int m_isLongtermRefPic[MAX_NUM_REF_PICS]; + int m_refPicIdentifier[MAX_NUM_REF_PICS]; //This can be delta POC for STRP or POC LSB for LTRP + int m_POC[MAX_NUM_REF_PICS]; + int m_numberOfActivePictures; + bool m_deltaPocMSBPresentFlag[MAX_NUM_REF_PICS]; + int m_deltaPOCMSBCycleLT[MAX_NUM_REF_PICS]; + bool m_ltrp_in_slice_header_flag; + bool m_interLayerPresentFlag; + bool m_isInterLayerRefPic[MAX_NUM_REF_PICS]; + int m_interLayerRefPicIdx[MAX_NUM_REF_PICS]; + int m_numberOfInterLayerPictures; public: - ReferencePictureSet(); - virtual ~ReferencePictureSet(); - int getPocLSBLT(int i) const { return m_pocLSBLT[i]; } - void setPocLSBLT(int i, int x) { m_pocLSBLT[i] = x; } - int getDeltaPocMSBCycleLT(int i) const { return m_deltaPOCMSBCycleLT[i]; } - void setDeltaPocMSBCycleLT(int i, int x) { m_deltaPOCMSBCycleLT[i] = x; } - bool getDeltaPocMSBPresentFlag(int i) const { return m_deltaPocMSBPresentFlag[i]; } - void setDeltaPocMSBPresentFlag(int i, bool x) { m_deltaPocMSBPresentFlag[i] = x; } - void setUsed(int bufferNum, bool used); - void setDeltaPOC(int bufferNum, int deltaPOC); - void setPOC(int bufferNum, int deltaPOC); - void setNumberOfPictures(int numberOfPictures); - void setCheckLTMSBPresent(int bufferNum, bool b ); - bool getCheckLTMSBPresent(int bufferNum) const; - - int getUsed(int bufferNum) const; - int getDeltaPOC(int bufferNum) const; - int getPOC(int bufferNum) const; - int getNumberOfPictures() const; - - void setNumberOfNegativePictures(int number) { m_numberOfNegativePictures = number; } - int getNumberOfNegativePictures() const { return m_numberOfNegativePictures; } - void setNumberOfPositivePictures(int number) { m_numberOfPositivePictures = number; } - int getNumberOfPositivePictures() const { return m_numberOfPositivePictures; } - void setNumberOfLongtermPictures(int number) { m_numberOfLongtermPictures = number; } - int getNumberOfLongtermPictures() const { return m_numberOfLongtermPictures; } - - void setInterRPSPrediction(bool flag) { m_interRPSPrediction = flag; } - bool getInterRPSPrediction() const { return m_interRPSPrediction; } - void setDeltaRIdxMinus1(int x) { m_deltaRIdxMinus1 = x; } - int getDeltaRIdxMinus1() const { return m_deltaRIdxMinus1; } - void setDeltaRPS(int x) { m_deltaRPS = x; } - int getDeltaRPS() const { return m_deltaRPS; } - void setNumRefIdc(int x) { m_numRefIdc = x; } - int getNumRefIdc() const { return m_numRefIdc; } - - void setRefIdc(int bufferNum, int refIdc); - int getRefIdc(int bufferNum) const ; - - void sortDeltaPOC(); - void printDeltaPOC() const; + ReferencePictureList( const bool interLayerPicPresentFlag = false ); + virtual ~ReferencePictureList(); + + void setRefPicIdentifier( int idx, int identifier, bool isLongterm, bool isInterLayerRefPic, int interLayerIdx ); + int getRefPicIdentifier(int idx) const; + bool isRefPicLongterm(int idx) const; + + void setNumberOfShorttermPictures(int numberOfStrp); + int getNumberOfShorttermPictures() const; + + void setNumberOfLongtermPictures(int numberOfLtrp); + int getNumberOfLongtermPictures() const; + + void setLtrpInSliceHeaderFlag(bool flag) { m_ltrp_in_slice_header_flag = flag; } + bool getLtrpInSliceHeaderFlag() const { return m_ltrp_in_slice_header_flag; } + + void setNumberOfInterLayerPictures( const int numberOfIlrp ) { m_numberOfInterLayerPictures = numberOfIlrp; } + int getNumberOfInterLayerPictures() const { return m_numberOfInterLayerPictures; } + + int getNumRefEntries() const { return m_numberOfShorttermPictures + m_numberOfLongtermPictures + m_numberOfInterLayerPictures; } + + void setPOC(int idx, int POC); + int getPOC(int idx) const; + + void setNumberOfActivePictures(int numberOfLtrp); + int getNumberOfActivePictures() const; + + int getDeltaPocMSBCycleLT(int i) const { return m_deltaPOCMSBCycleLT[i]; } + void setDeltaPocMSBCycleLT(int i, int x) { m_deltaPOCMSBCycleLT[i] = x; } + bool getDeltaPocMSBPresentFlag(int i) const { return m_deltaPocMSBPresentFlag[i]; } + void setDeltaPocMSBPresentFlag(int i, bool x) { m_deltaPocMSBPresentFlag[i] = x; } + + void printRefPicInfo() const; + + bool getInterLayerPresentFlag() const { return m_interLayerPresentFlag; } + void setInterLayerPresentFlag( bool b ) { m_interLayerPresentFlag = b; } + bool isInterLayerRefPic( int idx ) const { return m_isInterLayerRefPic[idx]; } + int getInterLayerRefPicIdx( int idx ) const { return m_interLayerRefPicIdx[idx]; } + void setInterLayerRefPicIdx( int idx, int layerIdc ) { m_interLayerRefPicIdx[idx] = layerIdc; } }; -/// Reference Picture Set set class -class RPSList +/// Reference Picture List set class +class RPLList { private: - std::vector<ReferencePictureSet> m_referencePictureSets; + std::vector<ReferencePictureList> m_referencePictureLists; public: - RPSList() { } - virtual ~RPSList() { } + RPLList() { } + virtual ~RPLList() { } - void create (int numberOfEntries) { m_referencePictureSets.resize(numberOfEntries); } - void destroy () { } + void create(int numberOfEntries) { m_referencePictureLists.resize(numberOfEntries); } + void destroy() { } - ReferencePictureSet* getReferencePictureSet(int referencePictureSetNum) { return &m_referencePictureSets[referencePictureSetNum]; } - const ReferencePictureSet* getReferencePictureSet(int referencePictureSetNum) const { return &m_referencePictureSets[referencePictureSetNum]; } + ReferencePictureList* getReferencePictureList(int referencePictureListIdx) { return &m_referencePictureLists[referencePictureListIdx]; } + const ReferencePictureList* getReferencePictureList(int referencePictureListIdx) const { return &m_referencePictureLists[referencePictureListIdx]; } - int getNumberOfReferencePictureSets() const { return int(m_referencePictureSets.size()); } + int getNumberOfReferencePictureLists() const { return int(m_referencePictureLists.size()); } }; -#if HEVC_USE_SCALING_LISTS /// SCALING_LIST class class ScalingList { public: ScalingList(); virtual ~ScalingList() { } - int* getScalingListAddress(uint32_t sizeId, uint32_t listId) { return &(m_scalingListCoef[sizeId][listId][0]); } //!< get matrix coefficient - const int* getScalingListAddress(uint32_t sizeId, uint32_t listId) const { return &(m_scalingListCoef[sizeId][listId][0]); } //!< get matrix coefficient - void checkPredMode(uint32_t sizeId, uint32_t listId); - - void setRefMatrixId(uint32_t sizeId, uint32_t listId, uint32_t u) { m_refMatrixId[sizeId][listId] = u; } //!< set reference matrix ID - uint32_t getRefMatrixId(uint32_t sizeId, uint32_t listId) const { return m_refMatrixId[sizeId][listId]; } //!< get reference matrix ID + bool getDisableScalingMatrixForLfnstBlks() const { return m_disableScalingMatrixForLfnstBlks;} + void setDisableScalingMatrixForLfnstBlks(bool flag) { m_disableScalingMatrixForLfnstBlks = flag;} + int* getScalingListAddress(uint32_t scalingListId) { return &(m_scalingListCoef[scalingListId][0]); } //!< get matrix coefficient + const int* getScalingListAddress(uint32_t scalingListId) const { return &(m_scalingListCoef[scalingListId][0]); } //!< get matrix coefficient + void checkPredMode(uint32_t scalingListId); + + void setRefMatrixId(uint32_t scalingListId, uint32_t u) { m_refMatrixId[scalingListId] = u; } //!< set reference matrix ID + uint32_t getRefMatrixId(uint32_t scalingListId) const { return m_refMatrixId[scalingListId]; } //!< get reference matrix ID + + static const int* getScalingListDefaultAddress(uint32_t scalinListId); //!< get default matrix coefficient + void processDefaultMatrix(uint32_t scalinListId); - const int* getScalingListDefaultAddress(uint32_t sizeId, uint32_t listId); //!< get default matrix coefficient - void processDefaultMatrix(uint32_t sizeId, uint32_t listId); + void setScalingListDC(uint32_t scalinListId, uint32_t u) { m_scalingListDC[scalinListId] = u; } //!< set DC value + int getScalingListDC(uint32_t scalinListId) const { return m_scalingListDC[scalinListId]; } //!< get DC value - void setScalingListDC(uint32_t sizeId, uint32_t listId, uint32_t u) { m_scalingListDC[sizeId][listId] = u; } //!< set DC value - int getScalingListDC(uint32_t sizeId, uint32_t listId) const { return m_scalingListDC[sizeId][listId]; } //!< get DC value + void setScalingListCopyModeFlag(uint32_t scalinListId, bool bIsCopy) { m_scalingListPredModeFlagIsCopy[scalinListId] = bIsCopy; } + bool getScalingListCopyModeFlag(uint32_t scalinListId) const { return m_scalingListPredModeFlagIsCopy[scalinListId]; } //getScalingListPredModeFlag + void processRefMatrix(uint32_t scalingListId, uint32_t refListId); - void setScalingListPredModeFlag(uint32_t sizeId, uint32_t listId, bool bIsDPCM) { m_scalingListPredModeFlagIsDPCM[sizeId][listId] = bIsDPCM; } - bool getScalingListPredModeFlag(uint32_t sizeId, uint32_t listId) const { return m_scalingListPredModeFlagIsDPCM[sizeId][listId]; } + int lengthUvlc(int uiCode); + int lengthSvlc(int uiCode); + void CheckBestPredScalingList(int scalingListId, int predListIdx, int& BitsCount); + void codePredScalingList(int* scalingList, const int* scalingListPred, int scalingListDC, int scalingListPredDC, int scalinListId, int& bitsCost); + void codeScalingList(int* scalingList, int scalingListDC, int scalinListId, int& bitsCost); + void setScalingListPreditorModeFlag(uint32_t scalingListId, bool bIsPred) { m_scalingListPreditorModeFlag[scalingListId] = bIsPred; } + bool getScalingListPreditorModeFlag(uint32_t scalingListId) const { return m_scalingListPreditorModeFlag[scalingListId]; } void checkDcOfMatrix(); - void processRefMatrix(uint32_t sizeId, uint32_t listId , uint32_t refListId ); bool xParseScalingList(const std::string &fileName); void setDefaultScalingList(); - bool checkDefaultScalingList(); + bool isNotDefaultScalingList(); + + bool operator==( const ScalingList& other ) + { + if (memcmp(m_scalingListPredModeFlagIsCopy, other.m_scalingListPredModeFlagIsCopy, sizeof(m_scalingListPredModeFlagIsCopy))) + { + return false; + } + if( memcmp( m_scalingListDC, other.m_scalingListDC, sizeof( m_scalingListDC ) ) ) + { + return false; + } + if( memcmp( m_refMatrixId, other.m_refMatrixId, sizeof( m_refMatrixId ) ) ) + { + return false; + } + if( memcmp( m_scalingListCoef, other.m_scalingListCoef, sizeof( m_scalingListCoef ) ) ) + { + return false; + } + + return true; + } + + bool operator!=( const ScalingList& other ) + { + return !( *this == other ); + } private: - void outputScalingLists(std::ostream &os) const; - bool m_scalingListPredModeFlagIsDPCM [SCALING_LIST_SIZE_NUM][SCALING_LIST_NUM]; //!< reference list index - int m_scalingListDC [SCALING_LIST_SIZE_NUM][SCALING_LIST_NUM]; //!< the DC value of the matrix coefficient for 16x16 - uint32_t m_refMatrixId [SCALING_LIST_SIZE_NUM][SCALING_LIST_NUM]; //!< RefMatrixID - std::vector<int> m_scalingListCoef [SCALING_LIST_SIZE_NUM][SCALING_LIST_NUM]; //!< quantization matrix + void outputScalingLists(std::ostream &os) const; + bool m_disableScalingMatrixForLfnstBlks; + bool m_scalingListPredModeFlagIsCopy [30]; //!< reference list index + int m_scalingListDC [30]; //!< the DC value of the matrix coefficient for 16x16 + uint32_t m_refMatrixId [30]; //!< RefMatrixID + bool m_scalingListPreditorModeFlag [30]; //!< reference list index + std::vector<int> m_scalingListCoef [30]; //!< quantization matrix }; -#endif -#if JVET_M0101_HLS class ConstraintInfo { bool m_progressiveSourceFlag; @@ -205,25 +239,39 @@ class ConstraintInfo bool m_lowerBitRateConstraintFlag; bool m_noQtbttDualTreeIntraConstraintFlag; + bool m_noPartitionConstraintsOverrideConstraintFlag; bool m_noSaoConstraintFlag; bool m_noAlfConstraintFlag; - bool m_noPcmConstraintFlag; bool m_noRefWraparoundConstraintFlag; bool m_noTemporalMvpConstraintFlag; bool m_noSbtmvpConstraintFlag; bool m_noAmvrConstraintFlag; bool m_noBdofConstraintFlag; + bool m_noDmvrConstraintFlag; bool m_noCclmConstraintFlag; bool m_noMtsConstraintFlag; + bool m_noSbtConstraintFlag; bool m_noAffineMotionConstraintFlag; - bool m_noGbiConstraintFlag; - bool m_noMhIntraConstraintFlag; + bool m_noBcwConstraintFlag; + bool m_noIbcConstraintFlag; + bool m_noCiipConstraintFlag; + bool m_noFPelMmvdConstraintFlag; bool m_noTriangleConstraintFlag; bool m_noLadfConstraintFlag; - bool m_noCurrPicRefConstraintFlag; + bool m_noTransformSkipConstraintFlag; + bool m_noBDPCMConstraintFlag; + bool m_noJointCbCrConstraintFlag; bool m_noQpDeltaConstraintFlag; bool m_noDepQuantConstraintFlag; bool m_noSignDataHidingConstraintFlag; + bool m_noTrailConstraintFlag; + bool m_noStsaConstraintFlag; + bool m_noRaslConstraintFlag; + bool m_noRadlConstraintFlag; + bool m_noIdrConstraintFlag; + bool m_noCraConstraintFlag; + bool m_noGdrConstraintFlag; + bool m_noApsConstraintFlag; public: ConstraintInfo() @@ -235,27 +283,41 @@ public: , m_maxBitDepthConstraintIdc ( 0) , m_maxChromaFormatConstraintIdc(CHROMA_420) , m_noQtbttDualTreeIntraConstraintFlag(false) + , m_noPartitionConstraintsOverrideConstraintFlag(false) , m_noSaoConstraintFlag (false) , m_noAlfConstraintFlag (false) - , m_noPcmConstraintFlag (false) , m_noRefWraparoundConstraintFlag(false) , m_noTemporalMvpConstraintFlag(false) , m_noSbtmvpConstraintFlag (false) , m_noAmvrConstraintFlag (false) , m_noBdofConstraintFlag (false) + , m_noDmvrConstraintFlag (false) , m_noCclmConstraintFlag (false) , m_noMtsConstraintFlag (false) + , m_noSbtConstraintFlag (false) , m_noAffineMotionConstraintFlag(false) - , m_noGbiConstraintFlag (false) - , m_noMhIntraConstraintFlag (false) + , m_noBcwConstraintFlag (false) + , m_noIbcConstraintFlag (false) + , m_noCiipConstraintFlag (false) + , m_noFPelMmvdConstraintFlag (false) , m_noTriangleConstraintFlag (false) , m_noLadfConstraintFlag (false) - , m_noCurrPicRefConstraintFlag(false) + , m_noTransformSkipConstraintFlag(false) + , m_noBDPCMConstraintFlag (false) + , m_noJointCbCrConstraintFlag (false) , m_noQpDeltaConstraintFlag (false) , m_noDepQuantConstraintFlag (false) , m_noSignDataHidingConstraintFlag(false) + , m_noTrailConstraintFlag (false) + , m_noStsaConstraintFlag (false) + , m_noRaslConstraintFlag (false) + , m_noRadlConstraintFlag (false) + , m_noIdrConstraintFlag (false) + , m_noCraConstraintFlag (false) + , m_noGdrConstraintFlag (false) + , m_noApsConstraintFlag (false) {} - + bool getProgressiveSourceFlag() const { return m_progressiveSourceFlag; } void setProgressiveSourceFlag(bool b) { m_progressiveSourceFlag = b; } @@ -285,12 +347,14 @@ public: bool getNoQtbttDualTreeIntraConstraintFlag() const { return m_noQtbttDualTreeIntraConstraintFlag; } void setNoQtbttDualTreeIntraConstraintFlag(bool bVal) { m_noQtbttDualTreeIntraConstraintFlag = bVal; } + bool getNoPartitionConstraintsOverrideConstraintFlag() const { return m_noPartitionConstraintsOverrideConstraintFlag; } + void setNoPartitionConstraintsOverrideConstraintFlag(bool bVal) { m_noPartitionConstraintsOverrideConstraintFlag = bVal; } bool getNoSaoConstraintFlag() const { return m_noSaoConstraintFlag; } void setNoSaoConstraintFlag(bool bVal) { m_noSaoConstraintFlag = bVal; } bool getNoAlfConstraintFlag() const { return m_noAlfConstraintFlag; } void setNoAlfConstraintFlag(bool bVal) { m_noAlfConstraintFlag = bVal; } - bool getNoPcmConstraintFlag() const { return m_noPcmConstraintFlag; } - void setNoPcmConstraintFlag(bool bVal) { m_noPcmConstraintFlag = bVal; } + bool getNoJointCbCrConstraintFlag() const { return m_noJointCbCrConstraintFlag; } + void setNoJointCbCrConstraintFlag(bool bVal) { m_noJointCbCrConstraintFlag = bVal; } bool getNoRefWraparoundConstraintFlag() const { return m_noRefWraparoundConstraintFlag; } void setNoRefWraparoundConstraintFlag(bool bVal) { m_noRefWraparoundConstraintFlag = bVal; } bool getNoTemporalMvpConstraintFlag() const { return m_noTemporalMvpConstraintFlag; } @@ -301,65 +365,70 @@ public: void setNoAmvrConstraintFlag(bool bVal) { m_noAmvrConstraintFlag = bVal; } bool getNoBdofConstraintFlag() const { return m_noBdofConstraintFlag; } void setNoBdofConstraintFlag(bool bVal) { m_noBdofConstraintFlag = bVal; } + bool getNoDmvrConstraintFlag() const { return m_noDmvrConstraintFlag; } + void setNoDmvrConstraintFlag(bool bVal) { m_noDmvrConstraintFlag = bVal; } bool getNoCclmConstraintFlag() const { return m_noCclmConstraintFlag; } void setNoCclmConstraintFlag(bool bVal) { m_noCclmConstraintFlag = bVal; } bool getNoMtsConstraintFlag() const { return m_noMtsConstraintFlag; } void setNoMtsConstraintFlag(bool bVal) { m_noMtsConstraintFlag = bVal; } + bool getNoSbtConstraintFlag() const { return m_noSbtConstraintFlag; } + void setNoSbtConstraintFlag(bool bVal) { m_noSbtConstraintFlag = bVal; } bool getNoAffineMotionConstraintFlag() const { return m_noAffineMotionConstraintFlag; } void setNoAffineMotionConstraintFlag(bool bVal) { m_noAffineMotionConstraintFlag = bVal; } - bool getNoGbiConstraintFlag() const { return m_noGbiConstraintFlag; } - void setNoGbiConstraintFlag(bool bVal) { m_noGbiConstraintFlag = bVal; } - bool getNoMhIntraConstraintFlag() const { return m_noMhIntraConstraintFlag; } - void setNoMhIntraConstraintFlag(bool bVal) { m_noMhIntraConstraintFlag = bVal; } + bool getNoBcwConstraintFlag() const { return m_noBcwConstraintFlag; } + void setNoBcwConstraintFlag(bool bVal) { m_noBcwConstraintFlag = bVal; } + bool getNoIbcConstraintFlag() const { return m_noIbcConstraintFlag; } + void setNoIbcConstraintFlag(bool bVal) { m_noIbcConstraintFlag = bVal; } + bool getNoCiipConstraintFlag() const { return m_noCiipConstraintFlag; } + void setNoCiipConstraintFlag(bool bVal) { m_noCiipConstraintFlag = bVal; } + bool getNoFPelMmvdConstraintFlag() const { return m_noFPelMmvdConstraintFlag; } + void setNoFPelMmvdConstraintFlag(bool bVal) { m_noFPelMmvdConstraintFlag = bVal; } bool getNoTriangleConstraintFlag() const { return m_noTriangleConstraintFlag; } void setNoTriangleConstraintFlag(bool bVal) { m_noTriangleConstraintFlag = bVal; } bool getNoLadfConstraintFlag() const { return m_noLadfConstraintFlag; } void setNoLadfConstraintFlag(bool bVal) { m_noLadfConstraintFlag = bVal; } - bool getNoCurrPicRefConstraintFlag() const { return m_noCurrPicRefConstraintFlag; } - void setNoCurrPicRefConstraintFlag(bool bVal) { m_noCurrPicRefConstraintFlag = bVal; } + bool getNoTransformSkipConstraintFlag() const { return m_noTransformSkipConstraintFlag; } + void setNoTransformSkipConstraintFlag(bool bVal) { m_noTransformSkipConstraintFlag = bVal; } + bool getNoBDPCMConstraintFlag() const { return m_noBDPCMConstraintFlag; } + void setNoBDPCMConstraintFlag(bool bVal) { m_noBDPCMConstraintFlag = bVal; } bool getNoQpDeltaConstraintFlag() const { return m_noQpDeltaConstraintFlag; } void setNoQpDeltaConstraintFlag(bool bVal) { m_noQpDeltaConstraintFlag = bVal; } bool getNoDepQuantConstraintFlag() const { return m_noDepQuantConstraintFlag; } void setNoDepQuantConstraintFlag(bool bVal) { m_noDepQuantConstraintFlag = bVal; } bool getNoSignDataHidingConstraintFlag() const { return m_noSignDataHidingConstraintFlag; } void setNoSignDataHidingConstraintFlag(bool bVal) { m_noSignDataHidingConstraintFlag = bVal; } + bool getNoTrailConstraintFlag() const { return m_noTrailConstraintFlag; } + void setNoTrailConstraintFlag(bool bVal) { m_noTrailConstraintFlag = bVal; } + bool getNoStsaConstraintFlag() const { return m_noStsaConstraintFlag; } + void setNoStsaConstraintFlag(bool bVal) { m_noStsaConstraintFlag = bVal; } + bool getNoRaslConstraintFlag() const { return m_noRaslConstraintFlag; } + void setNoRaslConstraintFlag(bool bVal) { m_noRaslConstraintFlag = bVal; } + bool getNoRadlConstraintFlag() const { return m_noRadlConstraintFlag; } + void setNoRadlConstraintFlag(bool bVal) { m_noRadlConstraintFlag = bVal; } + bool getNoIdrConstraintFlag() const { return m_noIdrConstraintFlag; } + void setNoIdrConstraintFlag(bool bVal) { m_noIdrConstraintFlag = bVal; } + bool getNoCraConstraintFlag() const { return m_noCraConstraintFlag; } + void setNoCraConstraintFlag(bool bVal) { m_noCraConstraintFlag = bVal; } + bool getNoGdrConstraintFlag() const { return m_noGdrConstraintFlag; } + void setNoGdrConstraintFlag(bool bVal) { m_noGdrConstraintFlag = bVal; } + bool getNoApsConstraintFlag() const { return m_noApsConstraintFlag; } + void setNoApsConstraintFlag(bool bVal) { m_noApsConstraintFlag = bVal; } }; -#endif class ProfileTierLevel { -#if !JVET_M0101_HLS - int m_profileSpace; -#endif Level::Tier m_tierFlag; Profile::Name m_profileIdc; -#if !JVET_M0101_HLS - bool m_profileCompatibilityFlag[32]; -#endif + uint8_t m_numSubProfile; + std::vector<uint32_t> m_subProfileIdc; Level::Name m_levelIdc; -#if !JVET_M0101_HLS - bool m_progressiveSourceFlag; - bool m_interlacedSourceFlag; - bool m_nonPackedConstraintFlag; - bool m_frameOnlyConstraintFlag; - uint32_t m_bitDepthConstraintValue; - ChromaFormat m_chromaFormatConstraintValue; - bool m_intraConstraintFlag; - bool m_onePictureOnlyConstraintFlag; - bool m_lowerBitRateConstraintFlag; -#else ConstraintInfo m_constraintInfo; bool m_subLayerLevelPresentFlag[MAX_TLAYER - 1]; - Level::Name m_subLayerLevelIdc[MAX_TLAYER - 1]; -#endif + Level::Name m_subLayerLevelIdc[MAX_TLAYER]; public: ProfileTierLevel(); -#if !JVET_M0101_HLS - int getProfileSpace() const { return m_profileSpace; } - void setProfileSpace(int x) { m_profileSpace = x; } -#endif Level::Tier getTierFlag() const { return m_tierFlag; } void setTierFlag(Level::Tier x) { m_tierFlag = x; } @@ -367,44 +436,16 @@ public: Profile::Name getProfileIdc() const { return m_profileIdc; } void setProfileIdc(Profile::Name x) { m_profileIdc = x; } -#if !JVET_M0101_HLS - bool getProfileCompatibilityFlag(int i) const { return m_profileCompatibilityFlag[i]; } - void setProfileCompatibilityFlag(int i, bool x) { m_profileCompatibilityFlag[i] = x; } -#endif + uint32_t getSubProfileIdc(int i) const { return m_subProfileIdc[i]; } + void setSubProfileIdc(int i, uint32_t x) { m_subProfileIdc[i] = x; } + + uint8_t getNumSubProfile() const { return m_numSubProfile; } + void setNumSubProfile(uint8_t x) { m_numSubProfile = x; m_subProfileIdc.resize(m_numSubProfile); } Level::Name getLevelIdc() const { return m_levelIdc; } void setLevelIdc(Level::Name x) { m_levelIdc = x; } -#if !JVET_M0101_HLS - bool getProgressiveSourceFlag() const { return m_progressiveSourceFlag; } - void setProgressiveSourceFlag(bool b) { m_progressiveSourceFlag = b; } - - bool getInterlacedSourceFlag() const { return m_interlacedSourceFlag; } - void setInterlacedSourceFlag(bool b) { m_interlacedSourceFlag = b; } - - bool getNonPackedConstraintFlag() const { return m_nonPackedConstraintFlag; } - void setNonPackedConstraintFlag(bool b) { m_nonPackedConstraintFlag = b; } - - bool getFrameOnlyConstraintFlag() const { return m_frameOnlyConstraintFlag; } - void setFrameOnlyConstraintFlag(bool b) { m_frameOnlyConstraintFlag = b; } - - uint32_t getBitDepthConstraint() const { return m_bitDepthConstraintValue; } - void setBitDepthConstraint(uint32_t bitDepth) { m_bitDepthConstraintValue=bitDepth; } - ChromaFormat getChromaFormatConstraint() const { return m_chromaFormatConstraintValue; } - void setChromaFormatConstraint(ChromaFormat fmt) { m_chromaFormatConstraintValue=fmt; } - - bool getIntraConstraintFlag() const { return m_intraConstraintFlag; } - void setIntraConstraintFlag(bool b) { m_intraConstraintFlag = b; } - - bool getOnePictureOnlyConstraintFlag() const { return m_onePictureOnlyConstraintFlag;} - void setOnePictureOnlyConstraintFlag(bool b) { m_onePictureOnlyConstraintFlag = b; } - - bool getLowerBitRateConstraintFlag() const { return m_lowerBitRateConstraintFlag; } - void setLowerBitRateConstraintFlag(bool b) { m_lowerBitRateConstraintFlag = b; } -#endif - -#if JVET_M0101_HLS ConstraintInfo* getConstraintInfo() { return &m_constraintInfo; } const ConstraintInfo* getConstraintInfo() const { return &m_constraintInfo; } @@ -413,47 +454,10 @@ public: Level::Name getSubLayerLevelIdc(int i) const { return m_subLayerLevelIdc[i]; } void setSubLayerLevelIdc(int i, Level::Name x) { m_subLayerLevelIdc[i] = x; } -#endif }; -#if !JVET_M0101_HLS -class PTL -{ - ProfileTierLevel m_generalPTL; - ProfileTierLevel m_subLayerPTL [MAX_TLAYER-1]; // max. value of max_sub_layers_minus1 is MAX_TLAYER-1 (= 6) - bool m_subLayerProfilePresentFlag [MAX_TLAYER-1]; - bool m_subLayerLevelPresentFlag [MAX_TLAYER-1]; - -public: - PTL(); - bool getSubLayerProfilePresentFlag(int i) const { return m_subLayerProfilePresentFlag[i]; } - void setSubLayerProfilePresentFlag(int i, bool x) { m_subLayerProfilePresentFlag[i] = x; } - - bool getSubLayerLevelPresentFlag(int i) const { return m_subLayerLevelPresentFlag[i]; } - void setSubLayerLevelPresentFlag(int i, bool x) { m_subLayerLevelPresentFlag[i] = x; } - - ProfileTierLevel* getGeneralPTL() { return &m_generalPTL; } - const ProfileTierLevel* getGeneralPTL() const { return &m_generalPTL; } - ProfileTierLevel* getSubLayerPTL(int i) { return &m_subLayerPTL[i]; } - const ProfileTierLevel* getSubLayerPTL(int i) const { return &m_subLayerPTL[i]; } -}; -#endif - -struct HrdSubLayerInfo -{ - bool fixedPicRateFlag; - bool fixedPicRateWithinCvsFlag; - uint32_t picDurationInTcMinus1; - bool lowDelayHrdFlag; - uint32_t cpbCntMinus1; - uint32_t bitRateValueMinus1[MAX_CPB_CNT][2]; - uint32_t cpbSizeValue [MAX_CPB_CNT][2]; - uint32_t ducpbSizeValue [MAX_CPB_CNT][2]; - bool cbrFlag [MAX_CPB_CNT][2]; - uint32_t duBitRateValue [MAX_CPB_CNT][2]; -}; class SliceReshapeInfo { @@ -465,19 +469,65 @@ public: uint32_t reshaperModelMaxBinIdx; int reshaperModelBinCWDelta[PIC_CODE_CW_BINS]; int maxNbitsNeededDeltaCW; + int chrResScalingOffset; void setUseSliceReshaper(bool b) { sliceReshaperEnableFlag = b; } bool getUseSliceReshaper() const { return sliceReshaperEnableFlag; } void setSliceReshapeModelPresentFlag(bool b) { sliceReshaperModelPresentFlag = b; } bool getSliceReshapeModelPresentFlag() const { return sliceReshaperModelPresentFlag; } void setSliceReshapeChromaAdj(unsigned adj) { enableChromaAdj = adj; } unsigned getSliceReshapeChromaAdj() const { return enableChromaAdj; } + + bool operator==( const SliceReshapeInfo& other ) + { + if( sliceReshaperEnableFlag != other.sliceReshaperEnableFlag ) + { + return false; + } + if( sliceReshaperModelPresentFlag != other.sliceReshaperModelPresentFlag ) + { + return false; + } + if( enableChromaAdj != other.enableChromaAdj ) + { + return false; + } + if( reshaperModelMinBinIdx != other.reshaperModelMinBinIdx ) + { + return false; + } + if( reshaperModelMaxBinIdx != other.reshaperModelMaxBinIdx ) + { + return false; + } + if( maxNbitsNeededDeltaCW != other.maxNbitsNeededDeltaCW ) + { + return false; + } + if (chrResScalingOffset != other.chrResScalingOffset) + { + return false; + } + if( memcmp( reshaperModelBinCWDelta, other.reshaperModelBinCWDelta, sizeof( reshaperModelBinCWDelta ) ) ) + { + return false; + } + + return true; + } + + bool operator!=( const SliceReshapeInfo& other ) + { + return !( *this == other ); + } }; struct ReshapeCW { std::vector<uint32_t> binCW; + int updateCtrl; + int adpOption; + uint32_t initialCW; int rspPicSize; - int rspIntraPeriod; int rspFps; int rspBaseQP; int rspTid; @@ -485,235 +535,234 @@ struct ReshapeCW int rspFpsToIp; }; -class HRD +struct ChromaQpAdj { -private: - bool m_nalHrdParametersPresentFlag; - bool m_vclHrdParametersPresentFlag; - bool m_subPicCpbParamsPresentFlag; - uint32_t m_tickDivisorMinus2; - uint32_t m_duCpbRemovalDelayLengthMinus1; - bool m_subPicCpbParamsInPicTimingSEIFlag; - uint32_t m_dpbOutputDelayDuLengthMinus1; - uint32_t m_bitRateScale; - uint32_t m_cpbSizeScale; - uint32_t m_ducpbSizeScale; - uint32_t m_initialCpbRemovalDelayLengthMinus1; - uint32_t m_cpbRemovalDelayLengthMinus1; - uint32_t m_dpbOutputDelayLengthMinus1; - HrdSubLayerInfo m_HRD[MAX_TLAYER]; - -public: - HRD() - :m_nalHrdParametersPresentFlag (0) - ,m_vclHrdParametersPresentFlag (0) - ,m_subPicCpbParamsPresentFlag (false) - ,m_tickDivisorMinus2 (0) - ,m_duCpbRemovalDelayLengthMinus1 (0) - ,m_subPicCpbParamsInPicTimingSEIFlag (false) - ,m_dpbOutputDelayDuLengthMinus1 (0) - ,m_bitRateScale (0) - ,m_cpbSizeScale (0) - ,m_initialCpbRemovalDelayLengthMinus1(23) - ,m_cpbRemovalDelayLengthMinus1 (23) - ,m_dpbOutputDelayLengthMinus1 (23) - {} - - virtual ~HRD() {} - - void setNalHrdParametersPresentFlag( bool flag ) { m_nalHrdParametersPresentFlag = flag; } - bool getNalHrdParametersPresentFlag( ) const { return m_nalHrdParametersPresentFlag; } - - void setVclHrdParametersPresentFlag( bool flag ) { m_vclHrdParametersPresentFlag = flag; } - bool getVclHrdParametersPresentFlag( ) const { return m_vclHrdParametersPresentFlag; } - - void setSubPicCpbParamsPresentFlag( bool flag ) { m_subPicCpbParamsPresentFlag = flag; } - bool getSubPicCpbParamsPresentFlag( ) const { return m_subPicCpbParamsPresentFlag; } - - void setTickDivisorMinus2( uint32_t value ) { m_tickDivisorMinus2 = value; } - uint32_t getTickDivisorMinus2( ) const { return m_tickDivisorMinus2; } - - void setDuCpbRemovalDelayLengthMinus1( uint32_t value ) { m_duCpbRemovalDelayLengthMinus1 = value; } - uint32_t getDuCpbRemovalDelayLengthMinus1( ) const { return m_duCpbRemovalDelayLengthMinus1; } - - void setSubPicCpbParamsInPicTimingSEIFlag( bool flag) { m_subPicCpbParamsInPicTimingSEIFlag = flag; } - bool getSubPicCpbParamsInPicTimingSEIFlag( ) const { return m_subPicCpbParamsInPicTimingSEIFlag; } - - void setDpbOutputDelayDuLengthMinus1(uint32_t value ) { m_dpbOutputDelayDuLengthMinus1 = value; } - uint32_t getDpbOutputDelayDuLengthMinus1( ) const { return m_dpbOutputDelayDuLengthMinus1; } - - void setBitRateScale( uint32_t value ) { m_bitRateScale = value; } - uint32_t getBitRateScale( ) const { return m_bitRateScale; } - - void setCpbSizeScale( uint32_t value ) { m_cpbSizeScale = value; } - uint32_t getCpbSizeScale( ) const { return m_cpbSizeScale; } - void setDuCpbSizeScale( uint32_t value ) { m_ducpbSizeScale = value; } - uint32_t getDuCpbSizeScale( ) const { return m_ducpbSizeScale; } - - void setInitialCpbRemovalDelayLengthMinus1( uint32_t value ) { m_initialCpbRemovalDelayLengthMinus1 = value; } - uint32_t getInitialCpbRemovalDelayLengthMinus1( ) const { return m_initialCpbRemovalDelayLengthMinus1; } - - void setCpbRemovalDelayLengthMinus1( uint32_t value ) { m_cpbRemovalDelayLengthMinus1 = value; } - uint32_t getCpbRemovalDelayLengthMinus1( ) const { return m_cpbRemovalDelayLengthMinus1; } - - void setDpbOutputDelayLengthMinus1( uint32_t value ) { m_dpbOutputDelayLengthMinus1 = value; } - uint32_t getDpbOutputDelayLengthMinus1( ) const { return m_dpbOutputDelayLengthMinus1; } - - void setFixedPicRateFlag( int layer, bool flag ) { m_HRD[layer].fixedPicRateFlag = flag; } - bool getFixedPicRateFlag( int layer ) const { return m_HRD[layer].fixedPicRateFlag; } - - void setFixedPicRateWithinCvsFlag( int layer, bool flag ) { m_HRD[layer].fixedPicRateWithinCvsFlag = flag; } - bool getFixedPicRateWithinCvsFlag( int layer ) const { return m_HRD[layer].fixedPicRateWithinCvsFlag; } - - void setPicDurationInTcMinus1( int layer, uint32_t value ) { m_HRD[layer].picDurationInTcMinus1 = value; } - uint32_t getPicDurationInTcMinus1( int layer ) const { return m_HRD[layer].picDurationInTcMinus1; } + union + { + struct { + int CbOffset; + int CrOffset; + int JointCbCrOffset; + } comp; + int offset[3]; + } u; +}; +struct ChromaQpMappingTableParams { + int m_qpBdOffset; + bool m_sameCQPTableForAllChromaFlag; + int m_numQpTables; + int m_qpTableStartMinus26[MAX_NUM_CQP_MAPPING_TABLES]; + int m_numPtsInCQPTableMinus1[MAX_NUM_CQP_MAPPING_TABLES]; + std::vector<int> m_deltaQpInValMinus1[MAX_NUM_CQP_MAPPING_TABLES]; + std::vector<int> m_deltaQpOutVal[MAX_NUM_CQP_MAPPING_TABLES]; + + ChromaQpMappingTableParams() + { + m_qpBdOffset = 12; + m_sameCQPTableForAllChromaFlag = true; + m_numQpTables = 1; + m_numPtsInCQPTableMinus1[0] = 0; + m_qpTableStartMinus26[0] = 0; + m_deltaQpInValMinus1[0] = { 0 }; + m_deltaQpOutVal[0] = { 0 }; + } - void setLowDelayHrdFlag( int layer, bool flag ) { m_HRD[layer].lowDelayHrdFlag = flag; } - bool getLowDelayHrdFlag( int layer ) const { return m_HRD[layer].lowDelayHrdFlag; } + void setSameCQPTableForAllChromaFlag(bool b) { m_sameCQPTableForAllChromaFlag = b; } + bool getSameCQPTableForAllChromaFlag() const { return m_sameCQPTableForAllChromaFlag; } + void setNumQpTables(int n) { m_numQpTables = n; } + int getNumQpTables() const { return m_numQpTables; } + void setQpTableStartMinus26(int tableIdx, int n) { m_qpTableStartMinus26[tableIdx] = n; } + int getQpTableStartMinus26(int tableIdx) const { return m_qpTableStartMinus26[tableIdx]; } + void setNumPtsInCQPTableMinus1(int tableIdx, int n) { m_numPtsInCQPTableMinus1[tableIdx] = n; } + int getNumPtsInCQPTableMinus1(int tableIdx) const { return m_numPtsInCQPTableMinus1[tableIdx]; } + void setDeltaQpInValMinus1(int tableIdx, std::vector<int> &inVals) { m_deltaQpInValMinus1[tableIdx] = inVals; } + void setDeltaQpInValMinus1(int tableIdx, int idx, int n) { m_deltaQpInValMinus1[tableIdx][idx] = n; } + int getDeltaQpInValMinus1(int tableIdx, int idx) const { return m_deltaQpInValMinus1[tableIdx][idx]; } + void setDeltaQpOutVal(int tableIdx, std::vector<int> &outVals) { m_deltaQpOutVal[tableIdx] = outVals; } + void setDeltaQpOutVal(int tableIdx, int idx, int n) { m_deltaQpOutVal[tableIdx][idx] = n; } + int getDeltaQpOutVal(int tableIdx, int idx) const { return m_deltaQpOutVal[tableIdx][idx]; } +}; +struct ChromaQpMappingTable : ChromaQpMappingTableParams +{ + std::map<int, int> m_chromaQpMappingTables[MAX_NUM_CQP_MAPPING_TABLES]; - void setCpbCntMinus1( int layer, uint32_t value ) { m_HRD[layer].cpbCntMinus1 = value; } - uint32_t getCpbCntMinus1( int layer ) const { return m_HRD[layer].cpbCntMinus1; } + int getMappedChromaQpValue(ComponentID compID, const int qpVal) const { return m_chromaQpMappingTables[m_sameCQPTableForAllChromaFlag ? 0 : (int)compID - 1].at(qpVal); } + void derivedChromaQPMappingTables(); + void setParams(const ChromaQpMappingTableParams ¶ms, const int qpBdOffset); +}; - void setBitRateValueMinus1( int layer, int cpbcnt, int nalOrVcl, uint32_t value ) { m_HRD[layer].bitRateValueMinus1[cpbcnt][nalOrVcl] = value; } - uint32_t getBitRateValueMinus1( int layer, int cpbcnt, int nalOrVcl ) const { return m_HRD[layer].bitRateValueMinus1[cpbcnt][nalOrVcl]; } +class SliceMap +{ +private: + uint32_t m_sliceID; //!< slice identifier (slice index for rectangular slices, slice address for raser-scan slices) + uint32_t m_numTilesInSlice; //!< number of tiles in slice (raster-scan slices only) + uint32_t m_numCtuInSlice; //!< number of CTUs in the slice + std::vector<uint32_t> m_ctuAddrInSlice; //!< raster-scan addresses of all the CTUs in the slice - void setCpbSizeValueMinus1( int layer, int cpbcnt, int nalOrVcl, uint32_t value ) { m_HRD[layer].cpbSizeValue[cpbcnt][nalOrVcl] = value; } - uint32_t getCpbSizeValueMinus1( int layer, int cpbcnt, int nalOrVcl ) const { return m_HRD[layer].cpbSizeValue[cpbcnt][nalOrVcl]; } - void setDuCpbSizeValueMinus1( int layer, int cpbcnt, int nalOrVcl, uint32_t value ) { m_HRD[layer].ducpbSizeValue[cpbcnt][nalOrVcl] = value; } - uint32_t getDuCpbSizeValueMinus1( int layer, int cpbcnt, int nalOrVcl ) const { return m_HRD[layer].ducpbSizeValue[cpbcnt][nalOrVcl]; } - void setDuBitRateValueMinus1( int layer, int cpbcnt, int nalOrVcl, uint32_t value ) { m_HRD[layer].duBitRateValue[cpbcnt][nalOrVcl] = value; } - uint32_t getDuBitRateValueMinus1(int layer, int cpbcnt, int nalOrVcl ) const { return m_HRD[layer].duBitRateValue[cpbcnt][nalOrVcl]; } - void setCbrFlag( int layer, int cpbcnt, int nalOrVcl, bool value ) { m_HRD[layer].cbrFlag[cpbcnt][nalOrVcl] = value; } - bool getCbrFlag( int layer, int cpbcnt, int nalOrVcl ) const { return m_HRD[layer].cbrFlag[cpbcnt][nalOrVcl]; } +public: + SliceMap(); + virtual ~SliceMap(); + + void setSliceID( uint32_t u ) { m_sliceID = u; } + uint32_t getSliceID() const { return m_sliceID; } + void setNumTilesInSlice( uint32_t u ) { m_numTilesInSlice = u; } + uint32_t getNumTilesInSlice() const { return m_numTilesInSlice; } + void setNumCtuInSlice( uint32_t u ) { m_numCtuInSlice = u; } + uint32_t getNumCtuInSlice() const { return m_numCtuInSlice; } + std::vector<uint32_t> getCtuAddrList( ) const { return m_ctuAddrInSlice; } + uint32_t getCtuAddrInSlice( int idx ) const { CHECK(idx >= m_ctuAddrInSlice.size(), "CTU index exceeds number of CTUs in slice."); return m_ctuAddrInSlice[idx]; } + void pushToCtuAddrInSlice( uint32_t u ) { m_ctuAddrInSlice.push_back(u); m_numCtuInSlice++;} + + void initSliceMap() + { + m_sliceID = 0; + m_numTilesInSlice = 0; + m_numCtuInSlice = 0; + m_ctuAddrInSlice.clear(); + } - bool getCpbDpbDelaysPresentFlag( ) const { return getNalHrdParametersPresentFlag() || getVclHrdParametersPresentFlag(); } + void addCtusToSlice( uint32_t startX, uint32_t stopX, uint32_t startY, uint32_t stopY, uint32_t picWidthInCtbsY ) + { + CHECK( startX >= stopX || startY >= stopY, "Invalid slice definition"); + for( uint32_t ctbY = startY; ctbY < stopY; ctbY++ ) + { + for( uint32_t ctbX = startX; ctbX < stopX; ctbX++ ) + { + m_ctuAddrInSlice.push_back( ctbY * picWidthInCtbsY + ctbX ); + m_numCtuInSlice++; + } + } + } }; -class TimingInfo +class RectSlice { - bool m_timingInfoPresentFlag; - uint32_t m_numUnitsInTick; - uint32_t m_timeScale; - bool m_pocProportionalToTimingFlag; - int m_numTicksPocDiffOneMinus1; +private: + uint32_t m_tileIdx; //!< tile index corresponding to the first CTU in the slice + uint32_t m_sliceWidthInTiles; //!< slice width in units of tiles + uint32_t m_sliceHeightInTiles; //!< slice height in units of tiles + uint32_t m_numSlicesInTile; //!< number of slices in current tile for the special case of multiple slices inside a single tile + uint32_t m_sliceHeightInCtu; //!< slice height in units of CTUs for the special case of multiple slices inside a single tile + public: - TimingInfo() - : m_timingInfoPresentFlag (false) - , m_numUnitsInTick (1001) - , m_timeScale (60000) - , m_pocProportionalToTimingFlag(false) - , m_numTicksPocDiffOneMinus1 (0) - {} + RectSlice(); + virtual ~RectSlice(); + + void setSliceWidthInTiles( uint32_t u ) { m_sliceWidthInTiles = u; } + uint32_t getSliceWidthInTiles( ) const { return m_sliceWidthInTiles; } + void setSliceHeightInTiles( uint32_t u ) { m_sliceHeightInTiles = u; } + uint32_t getSliceHeightInTiles( ) const { return m_sliceHeightInTiles; } + void setNumSlicesInTile( uint32_t u ) { m_numSlicesInTile = u; } + uint32_t getNumSlicesInTile( ) const { return m_numSlicesInTile; } + void setSliceHeightInCtu( uint32_t u ) { m_sliceHeightInCtu = u; } + uint32_t getSliceHeightInCtu( ) const { return m_sliceHeightInCtu; } + void setTileIdx( uint32_t u ) { m_tileIdx = u; } + uint32_t getTileIdx( ) const { return m_tileIdx; } - void setTimingInfoPresentFlag( bool flag ) { m_timingInfoPresentFlag = flag; } - bool getTimingInfoPresentFlag( ) const { return m_timingInfoPresentFlag; } +}; - void setNumUnitsInTick( uint32_t value ) { m_numUnitsInTick = value; } - uint32_t getNumUnitsInTick( ) const { return m_numUnitsInTick; } +class DPS +{ +private: + int m_decodingParameterSetId; + int m_maxSubLayersMinus1; + std::vector<ProfileTierLevel> m_profileTierLevel; - void setTimeScale( uint32_t value ) { m_timeScale = value; } - uint32_t getTimeScale( ) const { return m_timeScale; } +public: + DPS() + : m_decodingParameterSetId(-1) + , m_maxSubLayersMinus1 (0) + {}; - void setPocProportionalToTimingFlag(bool x) { m_pocProportionalToTimingFlag = x; } - bool getPocProportionalToTimingFlag( ) const { return m_pocProportionalToTimingFlag; } + virtual ~DPS() {}; - void setNumTicksPocDiffOneMinus1(int x) { m_numTicksPocDiffOneMinus1 = x; } - int getNumTicksPocDiffOneMinus1( ) const { return m_numTicksPocDiffOneMinus1; } -}; + int getDecodingParameterSetId() const { return m_decodingParameterSetId; } + void setDecodingParameterSetId(int val) { m_decodingParameterSetId = val; } + int getMaxSubLayersMinus1() const { return m_maxSubLayersMinus1; } + void setMaxSubLayersMinus1(int val) { m_maxSubLayersMinus1 = val; } -struct ChromaQpAdj -{ - union - { - struct { - int CbOffset; - int CrOffset; - } comp; - int offset[2]; /* two chroma components */ - } u; + size_t getNumPTLs() const { return m_profileTierLevel.size(); } + void setProfileTierLevel(const std::vector<ProfileTierLevel> &val) { m_profileTierLevel = val; } + const ProfileTierLevel& getProfileTierLevel(int idx) const { return m_profileTierLevel[idx]; } }; -#if HEVC_VPS + + class VPS { private: int m_VPSId; - uint32_t m_uiMaxTLayers; - uint32_t m_uiMaxLayers; - bool m_bTemporalIdNestingFlag; - - uint32_t m_numReorderPics[MAX_TLAYER]; - uint32_t m_uiMaxDecPicBuffering[MAX_TLAYER]; - uint32_t m_uiMaxLatencyIncrease[MAX_TLAYER]; // Really max latency increase plus 1 (value 0 expresses no limit) - - uint32_t m_numHrdParameters; - uint32_t m_maxNuhReservedZeroLayerId; - std::vector<HRD> m_hrdParameters; - std::vector<uint32_t> m_hrdOpSetIdx; - std::vector<bool> m_cprmsPresentFlag; - uint32_t m_numOpSets; - bool m_layerIdIncludedFlag[MAX_VPS_OP_SETS_PLUS1][MAX_VPS_NUH_RESERVED_ZERO_LAYER_ID_PLUS1]; - - PTL m_pcPTL; - TimingInfo m_timingInfo; + uint32_t m_uiMaxLayers; + + uint32_t m_vpsMaxSubLayers; + uint32_t m_vpsLayerId[MAX_VPS_LAYERS]; + bool m_vpsAllLayersSameNumSubLayersFlag; + bool m_vpsAllIndependentLayersFlag; + bool m_vpsIndependentLayerFlag[MAX_VPS_LAYERS]; + bool m_vpsDirectRefLayerFlag[MAX_VPS_LAYERS][MAX_VPS_LAYERS]; + bool m_vpsEachLayerIsAnOlsFlag; + uint32_t m_vpsOlsModeIdc; + uint32_t m_vpsNumOutputLayerSets; + bool m_vpsOlsOutputLayerFlag[MAX_NUM_OLSS][MAX_VPS_LAYERS]; + uint32_t m_directRefLayerIdx[MAX_VPS_LAYERS][MAX_VPS_LAYERS]; + uint32_t m_generalLayerIdx[MAX_VPS_LAYERS]; + + // stores index ( ilrp_idx within 0 .. NumDirectRefLayers ) of the dependent reference layers + uint32_t m_interLayerRefIdx[MAX_VPS_LAYERS][MAX_VPS_LAYERS]; + bool m_vpsExtensionFlag; public: VPS(); virtual ~VPS(); - void createHrdParamBuffer() - { - m_hrdParameters .resize(getNumHrdParameters()); - m_hrdOpSetIdx .resize(getNumHrdParameters()); - m_cprmsPresentFlag.resize(getNumHrdParameters()); - } - - HRD* getHrdParameters( uint32_t i ) { return &m_hrdParameters[ i ]; } - const HRD* getHrdParameters( uint32_t i ) const { return &m_hrdParameters[ i ]; } - uint32_t getHrdOpSetIdx( uint32_t i ) const { return m_hrdOpSetIdx[ i ]; } - void setHrdOpSetIdx( uint32_t val, uint32_t i ) { m_hrdOpSetIdx[ i ] = val; } - bool getCprmsPresentFlag( uint32_t i ) const { return m_cprmsPresentFlag[ i ]; } - void setCprmsPresentFlag( bool val, uint32_t i ) { m_cprmsPresentFlag[ i ] = val; } - int getVPSId() const { return m_VPSId; } void setVPSId(int i) { m_VPSId = i; } - uint32_t getMaxTLayers() const { return m_uiMaxTLayers; } - void setMaxTLayers(uint32_t t) { m_uiMaxTLayers = t; } + uint32_t getMaxLayers() const { return m_uiMaxLayers; } + void setMaxLayers(uint32_t l) { m_uiMaxLayers = l; } + + uint32_t getMaxSubLayers() const { return m_vpsMaxSubLayers; } + void setMaxSubLayers(uint32_t value) { m_vpsMaxSubLayers = value; } + bool getAllLayersSameNumSublayersFlag() const { return m_vpsAllLayersSameNumSubLayersFlag; } + void setAllLayersSameNumSublayersFlag(bool t) { m_vpsAllLayersSameNumSubLayersFlag = t; } + + uint32_t getLayerId(uint32_t layerIdx) const { return m_vpsLayerId[layerIdx]; } + void setLayerId(uint32_t layerIdx, uint32_t layerId) { m_vpsLayerId[layerIdx] = layerId; } + + bool getAllIndependentLayersFlag() const { return m_vpsAllIndependentLayersFlag; } + void setAllIndependentLayersFlag(bool t) { m_vpsAllIndependentLayersFlag = t; } + + bool getIndependentLayerFlag(uint32_t layerIdx) const { return m_vpsIndependentLayerFlag[layerIdx]; } + void setIndependentLayerFlag(uint32_t layerIdx, bool t) { m_vpsIndependentLayerFlag[layerIdx] = t; } - uint32_t getMaxLayers() const { return m_uiMaxLayers; } - void setMaxLayers(uint32_t l) { m_uiMaxLayers = l; } + bool getDirectRefLayerFlag(uint32_t layerIdx, uint32_t refLayerIdx) const { return m_vpsDirectRefLayerFlag[layerIdx][refLayerIdx]; } + void setDirectRefLayerFlag(uint32_t layerIdx, uint32_t refLayerIdx, bool t) { m_vpsDirectRefLayerFlag[layerIdx][refLayerIdx] = t; } - bool getTemporalNestingFlag() const { return m_bTemporalIdNestingFlag; } - void setTemporalNestingFlag(bool t) { m_bTemporalIdNestingFlag = t; } + uint32_t getDirectRefLayerIdx( uint32_t layerIdx, uint32_t refLayerIdc ) const { return m_directRefLayerIdx[layerIdx][refLayerIdc]; } + void setDirectRefLayerIdx( uint32_t layerIdx, uint32_t refLayerIdc, uint32_t refLayerIdx ) { m_directRefLayerIdx[layerIdx][refLayerIdc] = refLayerIdx; } - void setNumReorderPics(uint32_t v, uint32_t tLayer) { m_numReorderPics[tLayer] = v; } - uint32_t getNumReorderPics(uint32_t tLayer) const { return m_numReorderPics[tLayer]; } + uint32_t getInterLayerRefIdc( uint32_t layerIdx, uint32_t refLayerIdx ) const { return m_interLayerRefIdx[layerIdx][refLayerIdx]; } + void setInterLayerRefIdc( uint32_t layerIdx, uint32_t refLayerIdx, uint32_t refLayerIdc ) { m_interLayerRefIdx[layerIdx][refLayerIdx] = refLayerIdc; } - void setMaxDecPicBuffering(uint32_t v, uint32_t tLayer) { CHECK(tLayer >= MAX_TLAYER, "Invalid T-layer"); m_uiMaxDecPicBuffering[tLayer] = v; } - uint32_t getMaxDecPicBuffering(uint32_t tLayer) const { return m_uiMaxDecPicBuffering[tLayer]; } + uint32_t getGeneralLayerIdx(uint32_t layerId) const { return m_generalLayerIdx[layerId]; } + void setGeneralLayerIdx(uint32_t layerId, uint32_t layerIdc) { m_generalLayerIdx[layerId] = layerIdc; } - void setMaxLatencyIncrease(uint32_t v, uint32_t tLayer) { m_uiMaxLatencyIncrease[tLayer] = v; } - uint32_t getMaxLatencyIncrease(uint32_t tLayer) const { return m_uiMaxLatencyIncrease[tLayer]; } + bool getEachLayerIsAnOlsFlag() const { return m_vpsEachLayerIsAnOlsFlag; } + void setEachLayerIsAnOlsFlag(bool t) { m_vpsEachLayerIsAnOlsFlag = t; } - uint32_t getNumHrdParameters() const { return m_numHrdParameters; } - void setNumHrdParameters(uint32_t v) { m_numHrdParameters = v; } + uint32_t getOlsModeIdc() const { return m_vpsOlsModeIdc; } + void setOlsModeIdc(uint32_t t) { m_vpsOlsModeIdc = t; } - uint32_t getMaxNuhReservedZeroLayerId() const { return m_maxNuhReservedZeroLayerId; } - void setMaxNuhReservedZeroLayerId(uint32_t v) { m_maxNuhReservedZeroLayerId = v; } + uint32_t getNumOutputLayerSets() const { return m_vpsNumOutputLayerSets; } + void setNumOutputLayerSets(uint8_t t) { m_vpsNumOutputLayerSets = t; } - uint32_t getMaxOpSets() const { return m_numOpSets; } - void setMaxOpSets(uint32_t v) { m_numOpSets = v; } - bool getLayerIdIncludedFlag(uint32_t opsIdx, uint32_t id) const { return m_layerIdIncludedFlag[opsIdx][id]; } - void setLayerIdIncludedFlag(bool v, uint32_t opsIdx, uint32_t id) { m_layerIdIncludedFlag[opsIdx][id] = v; } + bool getOlsOutputLayerFlag(uint32_t ols, uint32_t layer) const { return m_vpsOlsOutputLayerFlag[ols][layer]; } + void setOlsOutputLayerFlag(uint32_t ols, uint32_t layer, bool t) { m_vpsOlsOutputLayerFlag[ols][layer] = t; } - PTL* getPTL() { return &m_pcPTL; } - const PTL* getPTL() const { return &m_pcPTL; } - TimingInfo* getTimingInfo() { return &m_timingInfo; } - const TimingInfo* getTimingInfo() const { return &m_timingInfo; } + bool getVPSExtensionFlag() const { return m_vpsExtensionFlag; } + void setVPSExtensionFlag(bool t) { m_vpsExtensionFlag = t; } }; -#endif class Window { @@ -757,79 +806,50 @@ class VUI { private: bool m_aspectRatioInfoPresentFlag; + bool m_aspectRatioConstantFlag; int m_aspectRatioIdc; int m_sarWidth; int m_sarHeight; - bool m_overscanInfoPresentFlag; - bool m_overscanAppropriateFlag; - bool m_videoSignalTypePresentFlag; - int m_videoFormat; - bool m_videoFullRangeFlag; bool m_colourDescriptionPresentFlag; int m_colourPrimaries; int m_transferCharacteristics; int m_matrixCoefficients; + bool m_fieldSeqFlag; bool m_chromaLocInfoPresentFlag; int m_chromaSampleLocTypeTopField; int m_chromaSampleLocTypeBottomField; - bool m_neutralChromaIndicationFlag; - bool m_fieldSeqFlag; - Window m_defaultDisplayWindow; - bool m_frameFieldInfoPresentFlag; - bool m_hrdParametersPresentFlag; - bool m_bitstreamRestrictionFlag; -#if HEVC_TILES_WPP - bool m_tilesFixedStructureFlag; -#endif - bool m_motionVectorsOverPicBoundariesFlag; - bool m_restrictedRefPicListsFlag; - int m_minSpatialSegmentationIdc; - int m_maxBytesPerPicDenom; - int m_maxBitsPerMinCuDenom; - int m_log2MaxMvLengthHorizontal; - int m_log2MaxMvLengthVertical; - HRD m_hrdParameters; - TimingInfo m_timingInfo; + int m_chromaSampleLocType; + bool m_overscanInfoPresentFlag; + bool m_overscanAppropriateFlag; + bool m_videoFullRangeFlag; public: VUI() : m_aspectRatioInfoPresentFlag (false) //TODO: This initialiser list contains magic numbers + , m_aspectRatioConstantFlag (true) , m_aspectRatioIdc (0) , m_sarWidth (0) , m_sarHeight (0) - , m_overscanInfoPresentFlag (false) - , m_overscanAppropriateFlag (false) - , m_videoSignalTypePresentFlag (false) - , m_videoFormat (5) - , m_videoFullRangeFlag (false) , m_colourDescriptionPresentFlag (false) , m_colourPrimaries (2) , m_transferCharacteristics (2) , m_matrixCoefficients (2) + , m_fieldSeqFlag (false) , m_chromaLocInfoPresentFlag (false) , m_chromaSampleLocTypeTopField (0) , m_chromaSampleLocTypeBottomField (0) - , m_neutralChromaIndicationFlag (false) - , m_fieldSeqFlag (false) - , m_frameFieldInfoPresentFlag (false) - , m_hrdParametersPresentFlag (false) - , m_bitstreamRestrictionFlag (false) -#if HEVC_TILES_WPP - , m_tilesFixedStructureFlag (false) -#endif - , m_motionVectorsOverPicBoundariesFlag(true) - , m_restrictedRefPicListsFlag (1) - , m_minSpatialSegmentationIdc (0) - , m_maxBytesPerPicDenom (2) - , m_maxBitsPerMinCuDenom (1) - , m_log2MaxMvLengthHorizontal (15) - , m_log2MaxMvLengthVertical (15) + , m_chromaSampleLocType (0) + , m_overscanInfoPresentFlag (false) + , m_overscanAppropriateFlag (false) + , m_videoFullRangeFlag (false) {} virtual ~VUI() {} bool getAspectRatioInfoPresentFlag() const { return m_aspectRatioInfoPresentFlag; } void setAspectRatioInfoPresentFlag(bool i) { m_aspectRatioInfoPresentFlag = i; } + bool getAspectRatioConstantFlag() const { return m_aspectRatioConstantFlag; } + void setAspectRatioConstantFlag(bool b) { m_aspectRatioConstantFlag = b; } int getAspectRatioIdc() const { return m_aspectRatioIdc; } void setAspectRatioIdc(int i) { m_aspectRatioIdc = i; } @@ -840,21 +860,6 @@ public: int getSarHeight() const { return m_sarHeight; } void setSarHeight(int i) { m_sarHeight = i; } - bool getOverscanInfoPresentFlag() const { return m_overscanInfoPresentFlag; } - void setOverscanInfoPresentFlag(bool i) { m_overscanInfoPresentFlag = i; } - - bool getOverscanAppropriateFlag() const { return m_overscanAppropriateFlag; } - void setOverscanAppropriateFlag(bool i) { m_overscanAppropriateFlag = i; } - - bool getVideoSignalTypePresentFlag() const { return m_videoSignalTypePresentFlag; } - void setVideoSignalTypePresentFlag(bool i) { m_videoSignalTypePresentFlag = i; } - - int getVideoFormat() const { return m_videoFormat; } - void setVideoFormat(int i) { m_videoFormat = i; } - - bool getVideoFullRangeFlag() const { return m_videoFullRangeFlag; } - void setVideoFullRangeFlag(bool i) { m_videoFullRangeFlag = i; } - bool getColourDescriptionPresentFlag() const { return m_colourDescriptionPresentFlag; } void setColourDescriptionPresentFlag(bool i) { m_colourDescriptionPresentFlag = i; } @@ -867,6 +872,9 @@ public: int getMatrixCoefficients() const { return m_matrixCoefficients; } void setMatrixCoefficients(int i) { m_matrixCoefficients = i; } + bool getFieldSeqFlag() const { return m_fieldSeqFlag; } + void setFieldSeqFlag(bool i) { m_fieldSeqFlag = i; } + bool getChromaLocInfoPresentFlag() const { return m_chromaLocInfoPresentFlag; } void setChromaLocInfoPresentFlag(bool i) { m_chromaLocInfoPresentFlag = i; } @@ -876,56 +884,18 @@ public: int getChromaSampleLocTypeBottomField() const { return m_chromaSampleLocTypeBottomField; } void setChromaSampleLocTypeBottomField(int i) { m_chromaSampleLocTypeBottomField = i; } - bool getNeutralChromaIndicationFlag() const { return m_neutralChromaIndicationFlag; } - void setNeutralChromaIndicationFlag(bool i) { m_neutralChromaIndicationFlag = i; } - - bool getFieldSeqFlag() const { return m_fieldSeqFlag; } - void setFieldSeqFlag(bool i) { m_fieldSeqFlag = i; } - - bool getFrameFieldInfoPresentFlag() const { return m_frameFieldInfoPresentFlag; } - void setFrameFieldInfoPresentFlag(bool i) { m_frameFieldInfoPresentFlag = i; } - - Window& getDefaultDisplayWindow() { return m_defaultDisplayWindow; } - const Window& getDefaultDisplayWindow() const { return m_defaultDisplayWindow; } - void setDefaultDisplayWindow(Window& defaultDisplayWindow ) { m_defaultDisplayWindow = defaultDisplayWindow; } - - bool getHrdParametersPresentFlag() const { return m_hrdParametersPresentFlag; } - void setHrdParametersPresentFlag(bool i) { m_hrdParametersPresentFlag = i; } - - bool getBitstreamRestrictionFlag() const { return m_bitstreamRestrictionFlag; } - void setBitstreamRestrictionFlag(bool i) { m_bitstreamRestrictionFlag = i; } - -#if HEVC_TILES_WPP - bool getTilesFixedStructureFlag() const { return m_tilesFixedStructureFlag; } - void setTilesFixedStructureFlag(bool i) { m_tilesFixedStructureFlag = i; } -#endif - - bool getMotionVectorsOverPicBoundariesFlag() const { return m_motionVectorsOverPicBoundariesFlag; } - void setMotionVectorsOverPicBoundariesFlag(bool i) { m_motionVectorsOverPicBoundariesFlag = i; } - - bool getRestrictedRefPicListsFlag() const { return m_restrictedRefPicListsFlag; } - void setRestrictedRefPicListsFlag(bool b) { m_restrictedRefPicListsFlag = b; } - - int getMinSpatialSegmentationIdc() const { return m_minSpatialSegmentationIdc; } - void setMinSpatialSegmentationIdc(int i) { m_minSpatialSegmentationIdc = i; } - - int getMaxBytesPerPicDenom() const { return m_maxBytesPerPicDenom; } - void setMaxBytesPerPicDenom(int i) { m_maxBytesPerPicDenom = i; } + int getChromaSampleLocType() const { return m_chromaSampleLocType; } + void setChromaSampleLocType(int i) { m_chromaSampleLocType = i; } - int getMaxBitsPerMinCuDenom() const { return m_maxBitsPerMinCuDenom; } - void setMaxBitsPerMinCuDenom(int i) { m_maxBitsPerMinCuDenom = i; } - - int getLog2MaxMvLengthHorizontal() const { return m_log2MaxMvLengthHorizontal; } - void setLog2MaxMvLengthHorizontal(int i) { m_log2MaxMvLengthHorizontal = i; } + bool getOverscanInfoPresentFlag() const { return m_overscanInfoPresentFlag; } + void setOverscanInfoPresentFlag(bool i) { m_overscanInfoPresentFlag = i; } - int getLog2MaxMvLengthVertical() const { return m_log2MaxMvLengthVertical; } - void setLog2MaxMvLengthVertical(int i) { m_log2MaxMvLengthVertical = i; } + bool getOverscanAppropriateFlag() const { return m_overscanAppropriateFlag; } + void setOverscanAppropriateFlag(bool i) { m_overscanAppropriateFlag = i; } - HRD* getHrdParameters() { return &m_hrdParameters; } - const HRD* getHrdParameters() const { return &m_hrdParameters; } + bool getVideoFullRangeFlag() const { return m_videoFullRangeFlag; } + void setVideoFullRangeFlag(bool i) { m_videoFullRangeFlag = i; } - TimingInfo* getTimingInfo() { return &m_timingInfo; } - const TimingInfo* getTimingInfo() const { return &m_timingInfo; } }; /// SPS RExt class @@ -989,105 +959,102 @@ class SPS { private: int m_SPSId; -#if !JVET_M0101_HLS - bool m_bIntraOnlyConstraintFlag; - uint32_t m_maxBitDepthConstraintIdc; - uint32_t m_maxChromaFormatConstraintIdc; - bool m_bFrameConstraintFlag; - bool m_bNoQtbttDualTreeIntraConstraintFlag; - bool m_bNoSaoConstraintFlag; - bool m_bNoAlfConstraintFlag; - bool m_bNoPcmConstraintFlag; - bool m_bNoRefWraparoundConstraintFlag; - bool m_bNoTemporalMvpConstraintFlag; - bool m_bNoSbtmvpConstraintFlag; - bool m_bNoAmvrConstraintFlag; - bool m_bNoBdofConstraintFlag; - bool m_bNoCclmConstraintFlag; - bool m_bNoMtsConstraintFlag; - bool m_bNoAffineMotionConstraintFlag; - bool m_bNoGbiConstraintFlag; - bool m_bNoMhIntraConstraintFlag; - bool m_bNoTriangleConstraintFlag; - bool m_bNoLadfConstraintFlag; - bool m_bNoCurrPicRefConstraintFlag; - bool m_bNoQpDeltaConstraintFlag; - bool m_bNoDepQuantConstraintFlag; - bool m_bNoSignDataHidingConstraintFlag; -#endif + int m_decodingParameterSetId; + int m_VPSId; bool m_affineAmvrEnabledFlag; bool m_DMVR; + bool m_MMVD; bool m_SBT; - uint8_t m_MaxSbtSize; -#if HEVC_VPS - int m_VPSId; -#endif + bool m_ISP; ChromaFormat m_chromaFormatIdc; + bool m_separateColourPlaneFlag; //!< separate colour plane flag uint32_t m_uiMaxTLayers; // maximum number of temporal layers // Structure - uint32_t m_picWidthInLumaSamples; - uint32_t m_picHeightInLumaSamples; + uint32_t m_maxWidthInLumaSamples; + uint32_t m_maxHeightInLumaSamples; + bool m_subPicPresentFlag; // indicates the presence of sub-pictures + uint8_t m_numSubPics; //!< number of sub-pictures used + uint32_t m_subPicCtuTopLeftX[MAX_NUM_SUB_PICS]; + uint32_t m_subPicCtuTopLeftY[MAX_NUM_SUB_PICS]; + uint32_t m_SubPicWidth[MAX_NUM_SUB_PICS]; + uint32_t m_SubPicHeight[MAX_NUM_SUB_PICS]; + bool m_subPicTreatedAsPicFlag[MAX_NUM_SUB_PICS]; + bool m_loopFilterAcrossSubpicEnabledFlag[MAX_NUM_SUB_PICS]; + bool m_subPicIdPresentFlag; //!< indicates the presence of sub-picture IDs + bool m_subPicIdSignallingPresentFlag; //!< indicates the presence of sub-picture ID signalling in the SPS + uint32_t m_subPicIdLen; //!< sub-picture ID length in bits + uint8_t m_subPicId[MAX_NUM_SUB_PICS]; //!< sub-picture ID for each sub-picture in the sequence int m_log2MinCodingBlockSize; int m_log2DiffMaxMinCodingBlockSize; unsigned m_CTUSize; unsigned m_partitionOverrideEnalbed; // enable partition constraints override function unsigned m_minQT[3]; // 0: I slice luma; 1: P/B slice; 2: I slice chroma - unsigned m_maxBTDepth[3]; + unsigned m_maxMTTHierarchyDepth[3]; unsigned m_maxBTSize[3]; unsigned m_maxTTSize[3]; + bool m_idrRefParamList; unsigned m_dualITree; uint32_t m_uiMaxCUWidth; uint32_t m_uiMaxCUHeight; uint32_t m_uiMaxCodingDepth; ///< Total CU depth, relative to the smallest possible transform block size. - Window m_conformanceWindow; - RPSList m_RPSList; + RPLList m_RPLList0; + RPLList m_RPLList1; + uint32_t m_numRPL0; + uint32_t m_numRPL1; + bool m_rpl1CopyFromRpl0Flag; + bool m_rpl1IdxPresentFlag; + bool m_allRplEntriesHasSameSignFlag; bool m_bLongTermRefsPresent; bool m_SPSTemporalMVPEnabledFlag; int m_numReorderPics[MAX_TLAYER]; // Tool list - bool m_pcmEnabledFlag; - uint32_t m_pcmLog2MaxSize; - uint32_t m_uiPCMLog2MinSize; + bool m_transformSkipEnabledFlag; + int m_BDPCMEnabled; + bool m_JointCbCrEnabledFlag; // Parameter BitDepths m_bitDepths; int m_qpBDOffset[MAX_NUM_CHANNEL_TYPE]; - int m_pcmBitDepths[MAX_NUM_CHANNEL_TYPE]; - bool m_bPCMFilterDisableFlag; + int m_minQpMinus4[MAX_NUM_CHANNEL_TYPE]; // QP_internal - QP_input; bool m_sbtmvpEnabledFlag; bool m_bdofEnabledFlag; - bool m_disFracMmvdEnabledFlag; + bool m_fpelMmvdEnabledFlag; + bool m_BdofControlPresentFlag; + bool m_DmvrControlPresentFlag; + bool m_ProfControlPresentFlag; uint32_t m_uiBitsForPOC; uint32_t m_numLongTermRefPicSPS; uint32_t m_ltRefPicPocLsbSps[MAX_NUM_LONG_TERM_REF_PICS]; bool m_usedByCurrPicLtSPSFlag[MAX_NUM_LONG_TERM_REF_PICS]; -#if MAX_TB_SIZE_SIGNALLING uint32_t m_log2MaxTbSize; -#endif + bool m_useWeightPred; //!< Use of Weighting Prediction (P_SLICE) + bool m_useWeightedBiPred; //!< Use of Weighting Bi-Prediction (B_SLICE) bool m_saoEnabledFlag; bool m_bTemporalIdNestingFlag; // temporal_id_nesting_flag -#if HEVC_USE_SCALING_LISTS bool m_scalingListEnabledFlag; - bool m_scalingListPresentFlag; - ScalingList m_scalingList; -#endif + bool m_loopFilterAcrossVirtualBoundariesDisabledFlag; //!< disable loop filtering across virtual boundaries + unsigned m_numVerVirtualBoundaries; //!< number of vertical virtual boundaries + unsigned m_numHorVirtualBoundaries; //!< number of horizontal virtual boundaries + unsigned m_virtualBoundariesPosX[3]; //!< horizontal position of each vertical virtual boundary + unsigned m_virtualBoundariesPosY[3]; //!< vertical position of each horizontal virtual boundary uint32_t m_uiMaxDecPicBuffering[MAX_TLAYER]; uint32_t m_uiMaxLatencyIncreasePlus1[MAX_TLAYER]; -#if HEVC_USE_INTRA_SMOOTHING_T32 || HEVC_USE_INTRA_SMOOTHING_T64 - bool m_useStrongIntraSmoothing; -#endif + + TimingInfo m_timingInfo; + bool m_hrdParametersPresentFlag; + HRDParameters m_hrdParameters; bool m_vuiParametersPresentFlag; VUI m_vuiParameters; @@ -1096,29 +1063,31 @@ private: static const int m_winUnitX[NUM_CHROMA_FORMAT]; static const int m_winUnitY[NUM_CHROMA_FORMAT]; -#if !JVET_M0101_HLS - PTL m_pcPTL; -#else ProfileTierLevel m_profileTierLevel; -#endif bool m_alfEnabledFlag; bool m_wrapAroundEnabledFlag; unsigned m_wrapAroundOffset; unsigned m_IBCFlag; + bool m_useColorTrans; + unsigned m_PLTMode; - bool m_lumaReshapeEnable; + bool m_lmcsEnabled; bool m_AMVREnabledFlag; bool m_LMChroma; - bool m_cclmCollocatedChromaFlag; + bool m_horCollocatedChromaFlag; + bool m_verCollocatedChromaFlag; bool m_MTS; bool m_IntraMTS; // 18 bool m_InterMTS; // 19 + bool m_LFNST; + bool m_SMVD; bool m_Affine; bool m_AffineType; - bool m_GBi; // - bool m_MHIntra; + bool m_PROF; + bool m_bcw; // + bool m_ciip; bool m_Triangle; #if LUMA_ADAPTIVE_DEBLOCKING_FILTER_QP_OFFSET bool m_LadfEnabled; @@ -1126,84 +1095,66 @@ private: int m_LadfQpOffset[MAX_LADF_INTERVALS]; int m_LadfIntervalLowerBound[MAX_LADF_INTERVALS]; #endif + bool m_MRL; + bool m_MIP; + ChromaQpMappingTable m_chromaQpMappingTable; + bool m_GDREnabledFlag; + bool m_SubLayerCbpParametersPresentFlag; + bool m_rprEnabledFlag; + bool m_interLayerPresentFlag; public: SPS(); virtual ~SPS(); -#if HEVC_VPS - int getVPSId() const { return m_VPSId; } - void setVPSId(int i) { m_VPSId = i; } -#endif -#if !JVET_M0101_HLS - bool getIntraOnlyConstraintFlag() const { return m_bIntraOnlyConstraintFlag; } - void setIntraOnlyConstraintFlag(bool bVal) { m_bIntraOnlyConstraintFlag = bVal; } - uint32_t getMaxBitDepthConstraintIdc() const { return m_maxBitDepthConstraintIdc; } - void setMaxBitDepthConstraintIdc(uint32_t u) { m_maxBitDepthConstraintIdc = u; } - uint32_t getMaxChromaFormatConstraintIdc() const { return m_maxChromaFormatConstraintIdc; } - void setMaxChromaFormatConstraintIdc(uint32_t u) { m_maxChromaFormatConstraintIdc = u; } - bool getFrameConstraintFlag() const { return m_bFrameConstraintFlag; } - void setFrameConstraintFlag(bool bVal) { m_bFrameConstraintFlag = bVal; } - bool getNoQtbttDualTreeIntraConstraintFlag() const { return m_bNoQtbttDualTreeIntraConstraintFlag; } - void setNoQtbttDualTreeIntraConstraintFlag(bool bVal) { m_bNoQtbttDualTreeIntraConstraintFlag = bVal; } - bool getNoSaoConstraintFlag() const { return m_bNoSaoConstraintFlag; } - void setNoSaoConstraintFlag(bool bVal) { m_bNoSaoConstraintFlag = bVal; } - bool getNoAlfConstraintFlag() const { return m_bNoAlfConstraintFlag; } - void setNoAlfConstraintFlag(bool bVal) { m_bNoAlfConstraintFlag = bVal; } - bool getNoPcmConstraintFlag() const { return m_bNoPcmConstraintFlag; } - void setNoPcmConstraintFlag(bool bVal) { m_bNoPcmConstraintFlag = bVal; } - bool getNoRefWraparoundConstraintFlag() const { return m_bNoRefWraparoundConstraintFlag; } - void setNoRefWraparoundConstraintFlag(bool bVal) { m_bNoRefWraparoundConstraintFlag= bVal; } - bool getNoTemporalMvpConstraintFlag() const { return m_bNoTemporalMvpConstraintFlag; } - void setNoTemporalMvpConstraintFlag(bool bVal) { m_bNoTemporalMvpConstraintFlag = bVal; } - bool getNoSbtmvpConstraintFlag() const { return m_bNoSbtmvpConstraintFlag; } - void setNoSbtmvpConstraintFlag(bool bVal) { m_bNoSbtmvpConstraintFlag = bVal; } - bool getNoAmvrConstraintFlag() const { return m_bNoAmvrConstraintFlag; } - void setNoAmvrConstraintFlag(bool bVal) { m_bNoAmvrConstraintFlag = bVal; } - bool getNoBdofConstraintFlag() const { return m_bNoBdofConstraintFlag; } - void setNoBdofConstraintFlag(bool bVal) { m_bNoBdofConstraintFlag = bVal; } - bool getNoCclmConstraintFlag() const { return m_bNoCclmConstraintFlag; } - void setNoCclmConstraintFlag(bool bVal) { m_bNoCclmConstraintFlag = bVal; } - bool getNoMtsConstraintFlag() const { return m_bNoMtsConstraintFlag; } - void setNoMtsConstraintFlag(bool bVal) { m_bNoMtsConstraintFlag = bVal; } - bool getNoAffineMotionConstraintFlag() const { return m_bNoAffineMotionConstraintFlag; } - void setNoAffineMotionConstraintFlag(bool bVal) { m_bNoAffineMotionConstraintFlag = bVal; } - bool getNoGbiConstraintFlag() const { return m_bNoGbiConstraintFlag; } - void setNoGbiConstraintFlag(bool bVal) { m_bNoGbiConstraintFlag = bVal; } - bool getNoMhIntraConstraintFlag() const { return m_bNoMhIntraConstraintFlag; } - void setNoMhIntraConstraintFlag(bool bVal) { m_bNoMhIntraConstraintFlag = bVal; } - bool getNoTriangleConstraintFlag() const { return m_bNoTriangleConstraintFlag; } - void setNoTriangleConstraintFlag(bool bVal) { m_bNoTriangleConstraintFlag = bVal; } - bool getNoLadfConstraintFlag() const { return m_bNoLadfConstraintFlag; } - void setNoLadfConstraintFlag(bool bVal) { m_bNoLadfConstraintFlag = bVal; } - bool getNoCurrPicRefConstraintFlag() const { return m_bNoCurrPicRefConstraintFlag; } - void setNoCurrPicRefConstraintFlag(bool bVal) { m_bNoCurrPicRefConstraintFlag = bVal; } - bool getNoQpDeltaConstraintFlag() const { return m_bNoQpDeltaConstraintFlag; } - void setNoQpDeltaConstraintFlag(bool bVal) { m_bNoQpDeltaConstraintFlag = bVal; } - bool getNoDepQuantConstraintFlag() const { return m_bNoDepQuantConstraintFlag; } - void setNoDepQuantConstraintFlag(bool bVal) { m_bNoDepQuantConstraintFlag = bVal; } - bool getNoSignDataHidingConstraintFlag() const { return m_bNoSignDataHidingConstraintFlag; } - void setNoSignDataHidingConstraintFlag(bool bVal) { m_bNoSignDataHidingConstraintFlag = bVal; } -#endif int getSPSId() const { return m_SPSId; } void setSPSId(int i) { m_SPSId = i; } + void setDecodingParameterSetId(int val) { m_decodingParameterSetId = val; } + int getDecodingParameterSetId() const { return m_decodingParameterSetId; } + int getVPSId() const { return m_VPSId; } + void setVPSId(int i) { m_VPSId = i; } + ChromaFormat getChromaFormatIdc () const { return m_chromaFormatIdc; } void setChromaFormatIdc (ChromaFormat i) { m_chromaFormatIdc = i; } + void setSeparateColourPlaneFlag ( bool b ) { m_separateColourPlaneFlag = b; } + bool getSeparateColourPlaneFlag () const { return m_separateColourPlaneFlag; } static int getWinUnitX (int chromaFormatIdc) { CHECK(chromaFormatIdc < 0 || chromaFormatIdc >= NUM_CHROMA_FORMAT, "Invalid chroma format parameter"); return m_winUnitX[chromaFormatIdc]; } static int getWinUnitY (int chromaFormatIdc) { CHECK(chromaFormatIdc < 0 || chromaFormatIdc >= NUM_CHROMA_FORMAT, "Invalid chroma format parameter"); return m_winUnitY[chromaFormatIdc]; } // structure - void setPicWidthInLumaSamples( uint32_t u ) { m_picWidthInLumaSamples = u; } - uint32_t getPicWidthInLumaSamples() const { return m_picWidthInLumaSamples; } - void setPicHeightInLumaSamples( uint32_t u ) { m_picHeightInLumaSamples = u; } - uint32_t getPicHeightInLumaSamples() const { return m_picHeightInLumaSamples; } - - Window& getConformanceWindow() { return m_conformanceWindow; } - const Window& getConformanceWindow() const { return m_conformanceWindow; } - void setConformanceWindow(Window& conformanceWindow ) { m_conformanceWindow = conformanceWindow; } + void setMaxPicWidthInLumaSamples( uint32_t u ) { m_maxWidthInLumaSamples = u; } + uint32_t getMaxPicWidthInLumaSamples() const { return m_maxWidthInLumaSamples; } + void setMaxPicHeightInLumaSamples( uint32_t u ) { m_maxHeightInLumaSamples = u; } + uint32_t getMaxPicHeightInLumaSamples() const { return m_maxHeightInLumaSamples; } + + void setSubPicPresentFlag(bool b) { m_subPicPresentFlag = b; } + bool getSubPicPresentFlag() const { return m_subPicPresentFlag; } + + void setNumSubPics( uint8_t u ) { m_numSubPics = u; } + uint8_t getNumSubPics( ) const { return m_numSubPics; } + void setSubPicCtuTopLeftX( int i, uint32_t u ) { CHECK( i >= MAX_NUM_SUB_PICS, "Sub-picture index exceeds valid range" ); m_subPicCtuTopLeftX[i] = u; } + uint32_t getSubPicCtuTopLeftX( int i ) const { CHECK( i >= MAX_NUM_SUB_PICS, "Sub-picture index exceeds valid range" ); return m_subPicCtuTopLeftX[i]; } + void setSubPicCtuTopLeftY( int i, uint32_t u ) { CHECK( i >= MAX_NUM_SUB_PICS, "Sub-picture index exceeds valid range" ); m_subPicCtuTopLeftY[i] = u; } + uint32_t getSubPicCtuTopLeftY( int i ) const { CHECK( i >= MAX_NUM_SUB_PICS, "Sub-picture index exceeds valid range" ); return m_subPicCtuTopLeftY[i]; } + void setSubPicWidth( int i, uint32_t u ) { CHECK( i >= MAX_NUM_SUB_PICS, "Sub-picture index exceeds valid range" ); m_SubPicWidth[i] = u; } + uint32_t getSubPicWidth( int i ) const { CHECK( i >= MAX_NUM_SUB_PICS, "Sub-picture index exceeds valid range" ); return m_SubPicWidth[i]; } + void setSubPicHeight( int i, uint32_t u ) { CHECK( i >= MAX_NUM_SUB_PICS, "Sub-picture index exceeds valid range" ); m_SubPicHeight[i] = u; } + uint32_t getSubPicHeight( int i ) const { CHECK( i >= MAX_NUM_SUB_PICS, "Sub-picture index exceeds valid range" ); return m_SubPicHeight[i]; } + void setSubPicTreatedAsPicFlag( int i, bool u ) { CHECK( i >= MAX_NUM_SUB_PICS, "Sub-picture index exceeds valid range" ); m_subPicTreatedAsPicFlag[i] = u; } + bool getSubPicTreatedAsPicFlag( int i ) const { CHECK( i >= MAX_NUM_SUB_PICS, "Sub-picture index exceeds valid range" ); return m_subPicTreatedAsPicFlag[i]; } + void setLoopFilterAcrossSubpicEnabledFlag( int i, bool u ) { CHECK( i >= MAX_NUM_SUB_PICS, "Sub-picture index exceeds valid range" ); m_loopFilterAcrossSubpicEnabledFlag[i] = u; } + bool getLoopFilterAcrossSubpicEnabledFlag( int i ) const { CHECK( i >= MAX_NUM_SUB_PICS, "Sub-picture index exceeds valid range" ); return m_loopFilterAcrossSubpicEnabledFlag[i]; } + void setSubPicIdPresentFlag( bool b ) { m_subPicIdPresentFlag = b; } + bool getSubPicIdPresentFlag() const { return m_subPicIdPresentFlag; } + void setSubPicIdSignallingPresentFlag( bool b ) { m_subPicIdSignallingPresentFlag = b; } + bool getSubPicIdSignallingPresentFlag() const { return m_subPicIdSignallingPresentFlag; } + void setSubPicIdLen( uint32_t u ) { m_subPicIdLen = u; } + uint32_t getSubPicIdLen() const { return m_subPicIdLen; } + void setSubPicId( int i, uint8_t u ) { CHECK( i >= MAX_NUM_SUB_PICS, "Sub-picture index exceeds valid range" ); m_subPicId[i] = u; } + uint8_t getSubPicId( int i ) const { CHECK( i >= MAX_NUM_SUB_PICS, "Sub-picture index exceeds valid range" ); return m_subPicId[i]; } uint32_t getNumLongTermRefPicSPS() const { return m_numLongTermRefPicSPS; } void setNumLongTermRefPicSPS(uint32_t val) { m_numLongTermRefPicSPS = val; } @@ -1226,13 +1177,13 @@ public: unsigned getMinQTSize(SliceType slicetype, ChannelType chType = CHANNEL_TYPE_LUMA) const { return slicetype == I_SLICE ? (chType == CHANNEL_TYPE_LUMA ? m_minQT[0] : m_minQT[2]) : m_minQT[1]; } - void setMaxBTDepth(unsigned maxBTDepth, - unsigned maxBTDepthI, - unsigned maxBTDepthIChroma) - { m_maxBTDepth[1] = maxBTDepth; m_maxBTDepth[0] = maxBTDepthI; m_maxBTDepth[2] = maxBTDepthIChroma; } - unsigned getMaxBTDepth() const { return m_maxBTDepth[1]; } - unsigned getMaxBTDepthI() const { return m_maxBTDepth[0]; } - unsigned getMaxBTDepthIChroma() const { return m_maxBTDepth[2]; } + void setMaxMTTHierarchyDepth(unsigned maxMTTHierarchyDepth, + unsigned maxMTTHierarchyDepthI, + unsigned maxMTTHierarchyDepthIChroma) + { m_maxMTTHierarchyDepth[1] = maxMTTHierarchyDepth; m_maxMTTHierarchyDepth[0] = maxMTTHierarchyDepthI; m_maxMTTHierarchyDepth[2] = maxMTTHierarchyDepthIChroma; } + unsigned getMaxMTTHierarchyDepth() const { return m_maxMTTHierarchyDepth[1]; } + unsigned getMaxMTTHierarchyDepthI() const { return m_maxMTTHierarchyDepth[0]; } + unsigned getMaxMTTHierarchyDepthIChroma() const { return m_maxMTTHierarchyDepth[2]; } void setMaxBTSize(unsigned maxBTSize, unsigned maxBTSizeI, unsigned maxBTSizeC) @@ -1247,7 +1198,12 @@ public: unsigned getMaxTTSize() const { return m_maxTTSize[1]; } unsigned getMaxTTSizeI() const { return m_maxTTSize[0]; } unsigned getMaxTTSizeIChroma() const { return m_maxTTSize[2]; } - + unsigned* getMinQTSizes() const { return (unsigned *)m_minQT; } + unsigned* getMaxMTTHierarchyDepths() const { return (unsigned *)m_maxMTTHierarchyDepth; } + unsigned* getMaxBTSizes() const { return (unsigned *)m_maxBTSize; } + unsigned* getMaxTTSizes() const { return (unsigned *)m_maxTTSize; } + void setIDRRefParamListPresent(bool b) { m_idrRefParamList = b; } + bool getIDRRefParamListPresent() const { return m_idrRefParamList; } void setUseDualITree(bool b) { m_dualITree = b; } bool getUseDualITree() const { return m_dualITree; } @@ -1257,28 +1213,37 @@ public: uint32_t getMaxCUHeight() const { return m_uiMaxCUHeight; } void setMaxCodingDepth( uint32_t u ) { m_uiMaxCodingDepth = u; } uint32_t getMaxCodingDepth() const { return m_uiMaxCodingDepth; } - void setPCMEnabledFlag( bool b ) { m_pcmEnabledFlag = b; } - bool getPCMEnabledFlag() const { return m_pcmEnabledFlag; } - void setPCMLog2MaxSize( uint32_t u ) { m_pcmLog2MaxSize = u; } - uint32_t getPCMLog2MaxSize() const { return m_pcmLog2MaxSize; } - void setPCMLog2MinSize( uint32_t u ) { m_uiPCMLog2MinSize = u; } - uint32_t getPCMLog2MinSize() const { return m_uiPCMLog2MinSize; } + bool getTransformSkipEnabledFlag() const { return m_transformSkipEnabledFlag; } + void setTransformSkipEnabledFlag( bool b ) { m_transformSkipEnabledFlag = b; } + int getBDPCMEnabled() const { return m_BDPCMEnabled; } + void setBDPCMEnabled(int val) { m_BDPCMEnabled = val; } void setBitsForPOC( uint32_t u ) { m_uiBitsForPOC = u; } uint32_t getBitsForPOC() const { return m_uiBitsForPOC; } void setNumReorderPics(int i, uint32_t tlayer) { m_numReorderPics[tlayer] = i; } int getNumReorderPics(uint32_t tlayer) const { return m_numReorderPics[tlayer]; } - void createRPSList( int numRPS ); - const RPSList* getRPSList() const { return &m_RPSList; } - RPSList* getRPSList() { return &m_RPSList; } + void createRPLList0(int numRPL); + void createRPLList1(int numRPL); + const RPLList* getRPLList( bool b ) const { return b==1 ? &m_RPLList1 : &m_RPLList0; } + RPLList* getRPLList( bool b ) { return b==1 ? &m_RPLList1 : &m_RPLList0; } + uint32_t getNumRPL( bool b ) const { return b==1 ? m_numRPL1 : m_numRPL0; } + const RPLList* getRPLList0() const { return &m_RPLList0; } + RPLList* getRPLList0() { return &m_RPLList0; } + const RPLList* getRPLList1() const { return &m_RPLList1; } + RPLList* getRPLList1() { return &m_RPLList1; } + uint32_t getNumRPL0() const { return m_numRPL0; } + uint32_t getNumRPL1() const { return m_numRPL1; } + void setRPL1CopyFromRPL0Flag(bool isCopy) { m_rpl1CopyFromRpl0Flag = isCopy; } + bool getRPL1CopyFromRPL0Flag() const { return m_rpl1CopyFromRpl0Flag; } + bool getRPL1IdxPresentFlag() const { return m_rpl1IdxPresentFlag; } + void setAllActiveRplEntriesHasSameSignFlag(bool isAllSame) { m_allRplEntriesHasSameSignFlag = isAllSame; } + bool getAllActiveRplEntriesHasSameSignFlag() const { return m_allRplEntriesHasSameSignFlag; } bool getLongTermRefsPresent() const { return m_bLongTermRefsPresent; } void setLongTermRefsPresent(bool b) { m_bLongTermRefsPresent=b; } bool getSPSTemporalMVPEnabledFlag() const { return m_SPSTemporalMVPEnabledFlag; } void setSPSTemporalMVPEnabledFlag(bool b) { m_SPSTemporalMVPEnabledFlag=b; } -#if MAX_TB_SIZE_SIGNALLING void setLog2MaxTbSize( uint32_t u ) { m_log2MaxTbSize = u; } uint32_t getLog2MaxTbSize() const { return m_log2MaxTbSize; } uint32_t getMaxTbSize() const { return 1 << m_log2MaxTbSize; } -#endif // Bit-depth int getBitDepth(ChannelType type) const { return m_bitDepths.recon[type]; } void setBitDepth(ChannelType type, int u ) { m_bitDepths.recon[type] = u; } @@ -1288,12 +1253,16 @@ public: int getDifferentialLumaChromaBitDepth() const { return int(m_bitDepths.recon[CHANNEL_TYPE_LUMA]) - int(m_bitDepths.recon[CHANNEL_TYPE_CHROMA]); } int getQpBDOffset(ChannelType type) const { return m_qpBDOffset[type]; } void setQpBDOffset(ChannelType type, int i) { m_qpBDOffset[type] = i; } + int getMinQpPrimeTsMinus4(ChannelType type) const { return m_minQpMinus4[type]; } + void setMinQpPrimeTsMinus4(ChannelType type, int i) { m_minQpMinus4[type] = i; } void setSAOEnabledFlag(bool bVal) { m_saoEnabledFlag = bVal; } bool getSAOEnabledFlag() const { return m_saoEnabledFlag; } bool getALFEnabledFlag() const { return m_alfEnabledFlag; } void setALFEnabledFlag( bool b ) { m_alfEnabledFlag = b; } + void setJointCbCrEnabledFlag(bool bVal) { m_JointCbCrEnabledFlag = bVal; } + bool getJointCbCrEnabledFlag() const { return m_JointCbCrEnabledFlag; } bool getSBTMVPEnabledFlag() const { return m_sbtmvpEnabledFlag; } void setSBTMVPEnabledFlag(bool b) { m_sbtmvpEnabledFlag = b; } @@ -1301,51 +1270,57 @@ public: void setBDOFEnabledFlag(bool b) { m_bdofEnabledFlag = b; } bool getBDOFEnabledFlag() const { return m_bdofEnabledFlag; } - bool getDisFracMmvdEnabledFlag() const { return m_disFracMmvdEnabledFlag; } - void setDisFracMmvdEnabledFlag( bool b ) { m_disFracMmvdEnabledFlag = b; } + bool getFpelMmvdEnabledFlag() const { return m_fpelMmvdEnabledFlag; } + void setFpelMmvdEnabledFlag( bool b ) { m_fpelMmvdEnabledFlag = b; } bool getUseDMVR()const { return m_DMVR; } void setUseDMVR(bool b) { m_DMVR = b; } + bool getUseMMVD()const { return m_MMVD; } + void setUseMMVD(bool b) { m_MMVD = b; } + bool getBdofControlPresentFlag()const { return m_BdofControlPresentFlag; } + void setBdofControlPresentFlag(bool b) { m_BdofControlPresentFlag = b; } + + bool getDmvrControlPresentFlag()const { return m_DmvrControlPresentFlag; } + void setDmvrControlPresentFlag(bool b) { m_DmvrControlPresentFlag = b; } + + bool getProfControlPresentFlag()const { return m_ProfControlPresentFlag; } + void setProfControlPresentFlag(bool b) { m_ProfControlPresentFlag = b; } uint32_t getMaxTLayers() const { return m_uiMaxTLayers; } void setMaxTLayers( uint32_t uiMaxTLayers ) { CHECK( uiMaxTLayers > MAX_TLAYER, "Invalid number T-layers" ); m_uiMaxTLayers = uiMaxTLayers; } bool getTemporalIdNestingFlag() const { return m_bTemporalIdNestingFlag; } void setTemporalIdNestingFlag( bool bValue ) { m_bTemporalIdNestingFlag = bValue; } - uint32_t getPCMBitDepth(ChannelType type) const { return m_pcmBitDepths[type]; } - void setPCMBitDepth(ChannelType type, uint32_t u) { m_pcmBitDepths[type] = u; } - void setPCMFilterDisableFlag( bool bValue ) { m_bPCMFilterDisableFlag = bValue; } - bool getPCMFilterDisableFlag() const { return m_bPCMFilterDisableFlag; } -#if HEVC_USE_SCALING_LISTS bool getScalingListFlag() const { return m_scalingListEnabledFlag; } void setScalingListFlag( bool b ) { m_scalingListEnabledFlag = b; } - bool getScalingListPresentFlag() const { return m_scalingListPresentFlag; } - void setScalingListPresentFlag( bool b ) { m_scalingListPresentFlag = b; } - ScalingList& getScalingList() { return m_scalingList; } - const ScalingList& getScalingList() const { return m_scalingList; } -#endif + void setLoopFilterAcrossVirtualBoundariesDisabledFlag(bool b) { m_loopFilterAcrossVirtualBoundariesDisabledFlag = b; } + bool getLoopFilterAcrossVirtualBoundariesDisabledFlag() const { return m_loopFilterAcrossVirtualBoundariesDisabledFlag; } + void setNumVerVirtualBoundaries(unsigned u) { m_numVerVirtualBoundaries = u; } + unsigned getNumVerVirtualBoundaries() const { return m_numVerVirtualBoundaries; } + void setNumHorVirtualBoundaries(unsigned u) { m_numHorVirtualBoundaries = u; } + unsigned getNumHorVirtualBoundaries() const { return m_numHorVirtualBoundaries; } + void setVirtualBoundariesPosX(unsigned u, unsigned idx) { CHECK( idx >= 3, "vitrual boundary index exceeds valid range" ); m_virtualBoundariesPosX[idx] = u; } + unsigned getVirtualBoundariesPosX(unsigned idx) const { CHECK( idx >= 3, "vitrual boundary index exceeds valid range" ); return m_virtualBoundariesPosX[idx]; } + void setVirtualBoundariesPosY(unsigned u, unsigned idx) { CHECK( idx >= 3, "vitrual boundary index exceeds valid range" ); m_virtualBoundariesPosY[idx] = u; } + unsigned getVirtualBoundariesPosY(unsigned idx) const { CHECK( idx >= 3, "vitrual boundary index exceeds valid range" ); return m_virtualBoundariesPosY[idx]; } uint32_t getMaxDecPicBuffering(uint32_t tlayer) const { return m_uiMaxDecPicBuffering[tlayer]; } void setMaxDecPicBuffering( uint32_t ui, uint32_t tlayer ) { CHECK(tlayer >= MAX_TLAYER, "Invalid T-layer"); m_uiMaxDecPicBuffering[tlayer] = ui; } uint32_t getMaxLatencyIncreasePlus1(uint32_t tlayer) const { return m_uiMaxLatencyIncreasePlus1[tlayer]; } void setMaxLatencyIncreasePlus1( uint32_t ui , uint32_t tlayer) { m_uiMaxLatencyIncreasePlus1[tlayer] = ui; } -#if HEVC_USE_INTRA_SMOOTHING_T32 || HEVC_USE_INTRA_SMOOTHING_T64 - void setUseStrongIntraSmoothing(bool bVal) { m_useStrongIntraSmoothing = bVal; } - bool getUseStrongIntraSmoothing() const { return m_useStrongIntraSmoothing; } - -#endif void setAffineAmvrEnabledFlag( bool val ) { m_affineAmvrEnabledFlag = val; } bool getAffineAmvrEnabledFlag() const { return m_affineAmvrEnabledFlag; } + TimingInfo* getTimingInfo() { return &m_timingInfo; } + const TimingInfo* getTimingInfo() const { return &m_timingInfo; } + bool getHrdParametersPresentFlag() const { return m_hrdParametersPresentFlag; } + void setHrdParametersPresentFlag(bool b) { m_hrdParametersPresentFlag = b; } + HRDParameters* getHrdParameters() { return &m_hrdParameters; } + const HRDParameters* getHrdParameters() const { return &m_hrdParameters; } bool getVuiParametersPresentFlag() const { return m_vuiParametersPresentFlag; } void setVuiParametersPresentFlag(bool b) { m_vuiParametersPresentFlag = b; } VUI* getVuiParameters() { return &m_vuiParameters; } const VUI* getVuiParameters() const { return &m_vuiParameters; } -#if !JVET_M0101_HLS - const PTL* getPTL() const { return &m_pcPTL; } - PTL* getPTL() { return &m_pcPTL; } -#else const ProfileTierLevel* getProfileTierLevel() const { return &m_profileTierLevel; } ProfileTierLevel* getProfileTierLevel() { return &m_profileTierLevel; } -#endif const SPSRExt& getSpsRangeExtension() const { return m_spsRangeExtension; } SPSRExt& getSpsRangeExtension() { return m_spsRangeExtension; } @@ -1354,14 +1329,18 @@ public: bool getWrapAroundEnabledFlag() const { return m_wrapAroundEnabledFlag; } void setWrapAroundOffset(unsigned offset) { m_wrapAroundOffset = offset; } unsigned getWrapAroundOffset() const { return m_wrapAroundOffset; } - void setUseReshaper(bool b) { m_lumaReshapeEnable = b; } - bool getUseReshaper() const { return m_lumaReshapeEnable; } + void setUseLmcs(bool b) { m_lmcsEnabled = b; } + bool getUseLmcs() const { return m_lmcsEnabled; } void setIBCFlag(unsigned IBCFlag) { m_IBCFlag = IBCFlag; } unsigned getIBCFlag() const { return m_IBCFlag; } + void setUseColorTrans(bool value) { m_useColorTrans = value; } + bool getUseColorTrans() const { return m_useColorTrans; } + void setPLTMode(unsigned PLTMode) { m_PLTMode = PLTMode; } + unsigned getPLTMode() const { return m_PLTMode; } void setUseSBT( bool b ) { m_SBT = b; } bool getUseSBT() const { return m_SBT; } - void setMaxSbtSize( uint8_t val ) { m_MaxSbtSize = val; } - uint8_t getMaxSbtSize() const { return m_MaxSbtSize; } + void setUseISP( bool b ) { m_ISP = b; } + bool getUseISP() const { return m_ISP; } void setAMVREnabledFlag ( bool b ) { m_AMVREnabledFlag = b; } bool getAMVREnabledFlag () const { return m_AMVREnabledFlag; } @@ -1369,19 +1348,28 @@ public: bool getUseAffine () const { return m_Affine; } void setUseAffineType ( bool b ) { m_AffineType = b; } bool getUseAffineType () const { return m_AffineType; } + void setUsePROF ( bool b ) { m_PROF = b; } + bool getUsePROF () const { return m_PROF; } void setUseLMChroma ( bool b ) { m_LMChroma = b; } bool getUseLMChroma () const { return m_LMChroma; } - void setCclmCollocatedChromaFlag( bool b ) { m_cclmCollocatedChromaFlag = b; } - bool getCclmCollocatedChromaFlag() const { return m_cclmCollocatedChromaFlag; } + void setHorCollocatedChromaFlag( bool b ) { m_horCollocatedChromaFlag = b; } + bool getHorCollocatedChromaFlag() const { return m_horCollocatedChromaFlag; } + void setVerCollocatedChromaFlag( bool b ) { m_verCollocatedChromaFlag = b; } + bool getVerCollocatedChromaFlag() const { return m_verCollocatedChromaFlag; } + bool getCclmCollocatedChromaFlag() const { return m_verCollocatedChromaFlag; } void setUseMTS ( bool b ) { m_MTS = b; } bool getUseMTS () const { return m_MTS; } - bool getUseImplicitMTS () const { return m_MTS && !m_IntraMTS && !m_InterMTS; } + bool getUseImplicitMTS () const { return m_MTS && !m_IntraMTS; } void setUseIntraMTS ( bool b ) { m_IntraMTS = b; } bool getUseIntraMTS () const { return m_IntraMTS; } void setUseInterMTS ( bool b ) { m_InterMTS = b; } bool getUseInterMTS () const { return m_InterMTS; } - void setUseGBi ( bool b ) { m_GBi = b; } - bool getUseGBi () const { return m_GBi; } + void setUseLFNST ( bool b ) { m_LFNST = b; } + bool getUseLFNST () const { return m_LFNST; } + void setUseSMVD(bool b) { m_SMVD = b; } + bool getUseSMVD() const { return m_SMVD; } + void setUseBcw ( bool b ) { m_bcw = b; } + bool getUseBcw () const { return m_bcw; } #if LUMA_ADAPTIVE_DEBLOCKING_FILTER_QP_OFFSET void setLadfEnabled ( bool b ) { m_LadfEnabled = b; } bool getLadfEnabled () const { return m_LadfEnabled; } @@ -1393,51 +1381,45 @@ public: int getLadfIntervalLowerBound( int idx ) const { return m_LadfIntervalLowerBound[ idx ]; } #endif - void setUseMHIntra ( bool b ) { m_MHIntra = b; } - bool getUseMHIntra () const { return m_MHIntra; } + void setUseCiip ( bool b ) { m_ciip = b; } + bool getUseCiip () const { return m_ciip; } void setUseTriangle ( bool b ) { m_Triangle = b; } bool getUseTriangle () const { return m_Triangle; } + void setUseMRL ( bool b ) { m_MRL = b; } + bool getUseMRL () const { return m_MRL; } + void setUseMIP ( bool b ) { m_MIP = b; } + bool getUseMIP () const { return m_MIP; } + + bool getUseWP () const { return m_useWeightPred; } + bool getUseWPBiPred () const { return m_useWeightedBiPred; } + void setUseWP ( bool b ) { m_useWeightPred = b; } + void setUseWPBiPred ( bool b ) { m_useWeightedBiPred = b; } + void setChromaQpMappingTableFromParams(const ChromaQpMappingTableParams ¶ms, const int qpBdOffset) { m_chromaQpMappingTable.setParams(params, qpBdOffset); } + void derivedChromaQPMappingTables() { m_chromaQpMappingTable.derivedChromaQPMappingTables(); } + const ChromaQpMappingTable& getChromaQpMappingTable() const { return m_chromaQpMappingTable;} + int getMappedChromaQpValue(ComponentID compID, int qpVal) const { return m_chromaQpMappingTable.getMappedChromaQpValue(compID, qpVal); } + void setGDREnabledFlag(bool flag) { m_GDREnabledFlag = flag; } + bool getGDREnabledFlag() const { return m_GDREnabledFlag; } + void setSubLayerParametersPresentFlag(bool flag) { m_SubLayerCbpParametersPresentFlag = flag; } + bool getSubLayerParametersPresentFlag() const { return m_SubLayerCbpParametersPresentFlag; } + + bool getRprEnabledFlag() const { return m_rprEnabledFlag; } + void setRprEnabledFlag( bool flag ) { m_rprEnabledFlag = flag; } + bool getInterLayerPresentFlag() const { return m_interLayerPresentFlag; } + void setInterLayerPresentFlag( bool b ) { m_interLayerPresentFlag = b; } + }; /// Reference Picture Lists class -class RefPicListModification -{ -private: - bool m_refPicListModificationFlagL0; - bool m_refPicListModificationFlagL1; - uint32_t m_RefPicSetIdxL0[REF_PIC_LIST_NUM_IDX]; - uint32_t m_RefPicSetIdxL1[REF_PIC_LIST_NUM_IDX]; - -public: - RefPicListModification(); - virtual ~RefPicListModification(); - - bool getRefPicListModificationFlagL0() const { return m_refPicListModificationFlagL0; } - void setRefPicListModificationFlagL0(bool flag) { m_refPicListModificationFlagL0 = flag; } - bool getRefPicListModificationFlagL1() const { return m_refPicListModificationFlagL1; } - void setRefPicListModificationFlagL1(bool flag) { m_refPicListModificationFlagL1 = flag; } - uint32_t getRefPicSetIdxL0(uint32_t idx) const { CHECK(idx>=REF_PIC_LIST_NUM_IDX, "Invalid ref-pic-list index"); return m_RefPicSetIdxL0[idx]; } - void setRefPicSetIdxL0(uint32_t idx, uint32_t refPicSetIdx) { CHECK(idx>=REF_PIC_LIST_NUM_IDX, "Invalid ref-pic-list index"); m_RefPicSetIdxL0[idx] = refPicSetIdx; } - uint32_t getRefPicSetIdxL1(uint32_t idx) const { CHECK(idx>=REF_PIC_LIST_NUM_IDX, "Invalid ref-pic-list index"); return m_RefPicSetIdxL1[idx]; } - void setRefPicSetIdxL1(uint32_t idx, uint32_t refPicSetIdx) { CHECK(idx>=REF_PIC_LIST_NUM_IDX, "Invalid ref-pic-list index"); m_RefPicSetIdxL1[idx] = refPicSetIdx; } -}; - - /// PPS RExt class class PPSRExt // Names aligned to text specification { private: - int m_log2MaxTransformSkipBlockSize; bool m_crossComponentPredictionEnabledFlag; - // Chroma QP Adjustments - int m_cuChromaQpOffsetSubdiv; - int m_chromaQpOffsetListLen; // size (excludes the null entry used in the following array). - ChromaQpAdj m_ChromaQpAdjTableIncludingNullEntry[1+MAX_QP_OFFSET_LIST_SIZE]; //!< Array includes entry [0] for the null offset used when cu_chroma_qp_offset_flag=0, and entries [cu_chroma_qp_offset_idx+1...] otherwise - uint32_t m_log2SaoOffsetScale[MAX_NUM_CHANNEL_TYPE]; public: @@ -1445,41 +1427,14 @@ public: bool settingsDifferFromDefaults(const bool bTransformSkipEnabledFlag) const { - return (bTransformSkipEnabledFlag && (getLog2MaxTransformSkipBlockSize() !=2)) - || (getCrossComponentPredictionEnabledFlag() ) - || (getChromaQpOffsetListEnabledFlag() ) + return (getCrossComponentPredictionEnabledFlag() ) || (getLog2SaoOffsetScale(CHANNEL_TYPE_LUMA) !=0 ) || (getLog2SaoOffsetScale(CHANNEL_TYPE_CHROMA) !=0 ); } - uint32_t getLog2MaxTransformSkipBlockSize() const { return m_log2MaxTransformSkipBlockSize; } - void setLog2MaxTransformSkipBlockSize( uint32_t u ) { m_log2MaxTransformSkipBlockSize = u; } - bool getCrossComponentPredictionEnabledFlag() const { return m_crossComponentPredictionEnabledFlag; } void setCrossComponentPredictionEnabledFlag(bool value) { m_crossComponentPredictionEnabledFlag = value; } - void clearChromaQpOffsetList() { m_chromaQpOffsetListLen = 0; } - - uint32_t getCuChromaQpOffsetSubdiv () const { return m_cuChromaQpOffsetSubdiv; } - void setCuChromaQpOffsetSubdiv ( uint32_t u ) { m_cuChromaQpOffsetSubdiv = u; } - - bool getChromaQpOffsetListEnabledFlag() const { return getChromaQpOffsetListLen()>0; } - int getChromaQpOffsetListLen() const { return m_chromaQpOffsetListLen; } - - const ChromaQpAdj& getChromaQpOffsetListEntry( int cuChromaQpOffsetIdxPlus1 ) const - { - CHECK(cuChromaQpOffsetIdxPlus1 >= m_chromaQpOffsetListLen+1, "Invalid chroma QP offset"); - return m_ChromaQpAdjTableIncludingNullEntry[cuChromaQpOffsetIdxPlus1]; // Array includes entry [0] for the null offset used when cu_chroma_qp_offset_flag=0, and entries [cu_chroma_qp_offset_idx+1...] otherwise - } - - void setChromaQpOffsetListEntry( int cuChromaQpOffsetIdxPlus1, int cbOffset, int crOffset ) - { - CHECK(cuChromaQpOffsetIdxPlus1 == 0 || cuChromaQpOffsetIdxPlus1 > MAX_QP_OFFSET_LIST_SIZE, "Invalid chroma QP offset"); - m_ChromaQpAdjTableIncludingNullEntry[cuChromaQpOffsetIdxPlus1].u.comp.CbOffset = cbOffset; // Array includes entry [0] for the null offset used when cu_chroma_qp_offset_flag=0, and entries [cu_chroma_qp_offset_idx+1...] otherwise - m_ChromaQpAdjTableIncludingNullEntry[cuChromaQpOffsetIdxPlus1].u.comp.CrOffset = crOffset; - m_chromaQpOffsetListLen = std::max(m_chromaQpOffsetListLen, cuChromaQpOffsetIdxPlus1); - } - // Now: getPpsRangeExtension().getLog2SaoOffsetScale and getPpsRangeExtension().setLog2SaoOffsetScale uint32_t getLog2SaoOffsetScale(ChannelType type) const { return m_log2SaoOffsetScale[type]; } void setLog2SaoOffsetScale(ChannelType type, uint32_t uiBitShift) { m_log2SaoOffsetScale[type] = uiBitShift; } @@ -1495,54 +1450,86 @@ private: int m_SPSId; // seq_parameter_set_id int m_picInitQPMinus26; bool m_useDQP; - bool m_bConstrainedIntraPred; // constrained_intra_pred_flag bool m_bSliceChromaQpFlag; // slicelevel_chroma_qp_flag + int m_layerId; + int m_temporalId; + // access channel - uint32_t m_cuQpDeltaSubdiv; // cu_qp_delta_subdiv int m_chromaCbQpOffset; int m_chromaCrQpOffset; + bool m_chromaJointCbCrQpOffsetPresentFlag; + int m_chromaCbCrQpOffset; + + // Chroma QP Adjustments + int m_chromaQpOffsetListLen; // size (excludes the null entry used in the following array). + ChromaQpAdj m_ChromaQpAdjTableIncludingNullEntry[1+MAX_QP_OFFSET_LIST_SIZE]; //!< Array includes entry [0] for the null offset used when cu_chroma_qp_offset_flag=0, and entries [cu_chroma_qp_offset_idx+1...] otherwise uint32_t m_numRefIdxL0DefaultActive; uint32_t m_numRefIdxL1DefaultActive; + bool m_rpl1IdxPresentFlag; + bool m_bUseWeightPred; //!< Use of Weighting Prediction (P_SLICE) bool m_useWeightedBiPred; //!< Use of Weighting Bi-Prediction (B_SLICE) bool m_OutputFlagPresentFlag; //!< Indicates the presence of output_flag in slice header - bool m_TransquantBypassEnabledFlag; //!< Indicates presence of cu_transquant_bypass_flag in CUs. - bool m_useTransformSkip; -#if HEVC_DEPENDENT_SLICES - bool m_dependentSliceSegmentsEnabledFlag; //!< Indicates the presence of dependent slices -#endif -#if HEVC_TILES_WPP - bool m_tilesEnabledFlag; //!< Indicates the presence of tiles + uint8_t m_numSubPics; //!< number of sub-pictures used - must match SPS + bool m_subPicIdSignallingPresentFlag; //!< indicates the presence of sub-picture ID signalling in the PPS + uint32_t m_subPicIdLen; //!< sub-picture ID length in bits + uint8_t m_subPicId[MAX_NUM_SUB_PICS]; //!< sub-picture ID for each sub-picture in the sequence + bool m_noPicPartitionFlag; //!< no picture partitioning flag - single slice, single tile + uint8_t m_log2CtuSize; //!< log2 of the CTU size - required to match corresponding value in SPS + uint8_t m_ctuSize; //!< CTU size + uint32_t m_picWidthInCtu; //!< picture width in units of CTUs + uint32_t m_picHeightInCtu; //!< picture height in units of CTUs + uint32_t m_numExpTileCols; //!< number of explicitly specified tile columns + uint32_t m_numExpTileRows; //!< number of explicitly specified tile rows + uint32_t m_numTileCols; //!< number of tile columns + uint32_t m_numTileRows; //!< number of tile rows + std::vector<uint32_t> m_tileColWidth; //!< tile column widths in units of CTUs + std::vector<uint32_t> m_tileRowHeight; //!< tile row heights in units of CTUs + std::vector<uint32_t> m_tileColBd; //!< tile column left-boundaries in units of CTUs + std::vector<uint32_t> m_tileRowBd; //!< tile row top-boundaries in units of CTUs + std::vector<uint32_t> m_ctuToTileCol; //!< mapping between CTU horizontal address and tile column index + std::vector<uint32_t> m_ctuToTileRow; //!< mapping between CTU vertical address and tile row index + bool m_rectSliceFlag; //!< rectangular slice flag + bool m_singleSlicePerSubPicFlag; //!< single slice per sub-picture flag + std::vector<uint32_t> m_ctuToSubPicIdx; //!< mapping between CTU and Sub-picture index + uint32_t m_numSlicesInPic; //!< number of rectangular slices in the picture (raster-scan slice specified at slice level) + bool m_tileIdxDeltaPresentFlag; //!< tile index delta present flag + std::vector<RectSlice> m_rectSlices; //!< list of rectangular slice signalling parameters + std::vector<SliceMap> m_sliceMap; //!< list of CTU maps for each slice in the picture + bool m_loopFilterAcrossTilesEnabledFlag; //!< loop filtering applied across tiles flag + bool m_loopFilterAcrossSlicesEnabledFlag; //!< loop filtering applied across slices flag + int m_log2MaxTransformSkipBlockSize; bool m_entropyCodingSyncEnabledFlag; //!< Indicates the presence of wavefronts - bool m_loopFilterAcrossTilesEnabledFlag; - bool m_uniformSpacingFlag; - int m_numTileColumnsMinus1; - int m_numTileRowsMinus1; - std::vector<int> m_tileColumnWidth; - std::vector<int> m_tileRowHeight; -#endif + bool m_constantSliceHeaderParamsEnabledFlag; + int m_PPSDepQuantEnabledIdc; + int m_PPSRefPicListSPSIdc0; + int m_PPSRefPicListSPSIdc1; + int m_PPSMvdL1ZeroIdc; + int m_PPSCollocatedFromL0Idc; + uint32_t m_PPSSixMinusMaxNumMergeCandPlus1; + uint32_t m_PPSMaxNumMergeCandMinusMaxNumTriangleCandPlus1; bool m_cabacInitPresentFlag; + bool m_pictureHeaderExtensionPresentFlag; //< picture header extension flags present in picture headers or not bool m_sliceHeaderExtensionPresentFlag; - bool m_loopFilterAcrossSlicesEnabledFlag; bool m_deblockingFilterControlPresentFlag; bool m_deblockingFilterOverrideEnabledFlag; bool m_ppsDeblockingFilterDisabledFlag; int m_deblockingFilterBetaOffsetDiv2; //< beta offset for deblocking filter int m_deblockingFilterTcOffsetDiv2; //< tc offset for deblocking filter -#if HEVC_USE_SCALING_LISTS - bool m_scalingListPresentFlag; - ScalingList m_scalingList; //!< ScalingList class -#endif bool m_listsModificationPresentFlag; - uint32_t m_log2ParallelMergeLevelMinus2; - int m_numExtraSliceHeaderBits; + + + uint32_t m_picWidthInLumaSamples; + uint32_t m_picHeightInLumaSamples; + Window m_conformanceWindow; + Window m_scalingWindow; PPSRExt m_ppsRangeExtension; @@ -1558,17 +1545,21 @@ public: int getSPSId() const { return m_SPSId; } void setSPSId(int i) { m_SPSId = i; } + void setTemporalId( int i ) { m_temporalId = i; } + int getTemporalId() const { return m_temporalId; } + void setLayerId( int i ) { m_layerId = i; } + int getLayerId() const { return m_layerId; } + int getPicInitQPMinus26() const { return m_picInitQPMinus26; } void setPicInitQPMinus26( int i ) { m_picInitQPMinus26 = i; } bool getUseDQP() const { return m_useDQP; } void setUseDQP( bool b ) { m_useDQP = b; } - bool getConstrainedIntraPred() const { return m_bConstrainedIntraPred; } - void setConstrainedIntraPred( bool b ) { m_bConstrainedIntraPred = b; } bool getSliceChromaQpFlag() const { return m_bSliceChromaQpFlag; } void setSliceChromaQpFlag( bool b ) { m_bSliceChromaQpFlag = b; } - void setCuQpDeltaSubdiv( uint32_t u ) { m_cuQpDeltaSubdiv = u; } - uint32_t getCuQpDeltaSubdiv() const { return m_cuQpDeltaSubdiv; } + + bool getJointCbCrQpOffsetPresentFlag() const { return m_chromaJointCbCrQpOffsetPresentFlag; } + void setJointCbCrQpOffsetPresentFlag(bool b) { m_chromaJointCbCrQpOffsetPresentFlag = b; } void setQpOffset(ComponentID compID, int i ) { @@ -1580,6 +1571,10 @@ public: { m_chromaCrQpOffset = i; } + else if (compID==JOINT_CbCr) + { + m_chromaCbCrQpOffset = i; + } else { THROW( "Invalid chroma QP offset" ); @@ -1587,7 +1582,26 @@ public: } int getQpOffset(ComponentID compID) const { - return (compID==COMPONENT_Y) ? 0 : (compID==COMPONENT_Cb ? m_chromaCbQpOffset : m_chromaCrQpOffset ); + return (compID==COMPONENT_Y) ? 0 : (compID==COMPONENT_Cb ? m_chromaCbQpOffset : compID==COMPONENT_Cr ? m_chromaCrQpOffset : m_chromaCbCrQpOffset ); + } + + bool getCuChromaQpOffsetEnabledFlag() const { return getChromaQpOffsetListLen()>0; } + int getChromaQpOffsetListLen() const { return m_chromaQpOffsetListLen; } + void clearChromaQpOffsetList() { m_chromaQpOffsetListLen = 0; } + + const ChromaQpAdj& getChromaQpOffsetListEntry( int cuChromaQpOffsetIdxPlus1 ) const + { + CHECK(cuChromaQpOffsetIdxPlus1 >= m_chromaQpOffsetListLen+1, "Invalid chroma QP offset"); + return m_ChromaQpAdjTableIncludingNullEntry[cuChromaQpOffsetIdxPlus1]; // Array includes entry [0] for the null offset used when cu_chroma_qp_offset_flag=0, and entries [cu_chroma_qp_offset_idx+1...] otherwise + } + + void setChromaQpOffsetListEntry( int cuChromaQpOffsetIdxPlus1, int cbOffset, int crOffset, int jointCbCrOffset ) + { + CHECK(cuChromaQpOffsetIdxPlus1 == 0 || cuChromaQpOffsetIdxPlus1 > MAX_QP_OFFSET_LIST_SIZE, "Invalid chroma QP offset"); + m_ChromaQpAdjTableIncludingNullEntry[cuChromaQpOffsetIdxPlus1].u.comp.CbOffset = cbOffset; // Array includes entry [0] for the null offset used when cu_chroma_qp_offset_flag=0, and entries [cu_chroma_qp_offset_idx+1...] otherwise + m_ChromaQpAdjTableIncludingNullEntry[cuChromaQpOffsetIdxPlus1].u.comp.CrOffset = crOffset; + m_ChromaQpAdjTableIncludingNullEntry[cuChromaQpOffsetIdxPlus1].u.comp.JointCbCrOffset = jointCbCrOffset; + m_chromaQpOffsetListLen = std::max(m_chromaQpOffsetListLen, cuChromaQpOffsetIdxPlus1); } void setNumRefIdxL0DefaultActive(uint32_t ui) { m_numRefIdxL0DefaultActive=ui; } @@ -1595,6 +1609,9 @@ public: void setNumRefIdxL1DefaultActive(uint32_t ui) { m_numRefIdxL1DefaultActive=ui; } uint32_t getNumRefIdxL1DefaultActive() const { return m_numRefIdxL1DefaultActive; } + void setRpl1IdxPresentFlag(bool isPresent) { m_rpl1IdxPresentFlag = isPresent; } + uint32_t getRpl1IdxPresentFlag() const { return m_rpl1IdxPresentFlag; } + bool getUseWP() const { return m_bUseWeightPred; } bool getWPBiPred() const { return m_useWeightedBiPred; } void setUseWP( bool b ) { m_bUseWeightPred = b; } @@ -1602,37 +1619,105 @@ public: void setOutputFlagPresentFlag( bool b ) { m_OutputFlagPresentFlag = b; } bool getOutputFlagPresentFlag() const { return m_OutputFlagPresentFlag; } - void setTransquantBypassEnabledFlag( bool b ) { m_TransquantBypassEnabledFlag = b; } - bool getTransquantBypassEnabledFlag() const { return m_TransquantBypassEnabledFlag; } + void setNumSubPics( uint8_t u ) { m_numSubPics = u; } + uint8_t getNumSubPics( ) const { return m_numSubPics; } + void setSubPicIdSignallingPresentFlag( bool b ) { m_subPicIdSignallingPresentFlag = b; } + bool getSubPicIdSignallingPresentFlag() const { return m_subPicIdSignallingPresentFlag; } + void setSubPicIdLen( uint32_t u ) { m_subPicIdLen = u; } + uint32_t getSubPicIdLen() const { return m_subPicIdLen; } + void setSubPicId( int i, uint8_t u ) { CHECK( i >= MAX_NUM_SUB_PICS, "Sub-picture index exceeds valid range" ); m_subPicId[i] = u; } + uint8_t getSubPicId( int i ) const { CHECK( i >= MAX_NUM_SUB_PICS, "Sub-picture index exceeds valid range" ); return m_subPicId[i]; } + void setNoPicPartitionFlag( bool b ) { m_noPicPartitionFlag = b; } + bool getNoPicPartitionFlag( ) const { return m_noPicPartitionFlag; } + void setLog2CtuSize( uint8_t u ) { m_log2CtuSize = u; m_ctuSize = 1 << m_log2CtuSize; + m_picWidthInCtu = (m_picWidthInLumaSamples + m_ctuSize - 1) / m_ctuSize; + m_picHeightInCtu = (m_picHeightInLumaSamples + m_ctuSize - 1) / m_ctuSize; } + uint8_t getLog2CtuSize( ) const { return m_log2CtuSize; } + uint8_t getCtuSize( ) const { return m_ctuSize; } + uint8_t getPicWidthInCtu( ) const { return m_picWidthInCtu; } + uint8_t getPicHeightInCtu( ) const { return m_picHeightInCtu; } + void setNumExpTileColumns( uint32_t u ) { m_numExpTileCols = u; } + uint32_t getNumExpTileColumns( ) const { return m_numExpTileCols; } + void setNumExpTileRows( uint32_t u ) { m_numExpTileRows = u; } + uint32_t getNumExpTileRows( ) const { return m_numExpTileRows; } + void setNumTileColumns( uint32_t u ) { m_numTileCols = u; } + uint32_t getNumTileColumns( ) const { return m_numTileCols; } + void setNumTileRows( uint32_t u ) { m_numTileRows = u; } + uint32_t getNumTileRows( ) const { return m_numTileRows; } + uint32_t getNumTiles( ) const { return m_numTileCols * m_numTileRows; } + void setTileColumnWidths( std::vector<uint32_t> widths ) { m_tileColWidth = widths; } + void setTileRowHeights( std::vector<uint32_t> heights ) { m_tileRowHeight = heights; } + void addTileColumnWidth( uint32_t u ) { CHECK( m_tileColWidth.size() >= MAX_TILE_COLS, "Number of tile columns exceeds valid range" ); m_tileColWidth.push_back(u); } + void addTileRowHeight( uint32_t u ) { CHECK( m_tileRowHeight.size() >= MAX_TILE_ROWS, "Number of tile rows exceeds valid range" ); m_tileRowHeight.push_back(u); } + uint32_t getTileColumnWidth( int idx ) const { CHECK( idx >= m_tileColWidth.size(), "Tile column index exceeds valid range" ); return m_tileColWidth[idx]; } + uint32_t getTileRowHeight( int idx ) const { CHECK( idx >= m_tileRowHeight.size(), "Tile row index exceeds valid range" ); return m_tileRowHeight[idx]; } + uint32_t getTileColumnBd( int idx ) const { CHECK( idx >= m_tileColBd.size(), "Tile column index exceeds valid range" ); return m_tileColBd[idx]; } + uint32_t getTileRowBd( int idx ) const { CHECK( idx >= m_tileRowBd.size(), "Tile row index exceeds valid range" ); return m_tileRowBd[idx]; } + uint32_t ctuToTileCol( int ctuX ) const { CHECK( ctuX >= m_ctuToTileCol.size(), "CTU address index exceeds valid range" ); return m_ctuToTileCol[ctuX]; } + uint32_t ctuToTileRow( int ctuY ) const { CHECK( ctuY >= m_ctuToTileRow.size(), "CTU address index exceeds valid range" ); return m_ctuToTileRow[ctuY]; } + uint32_t ctuToTileColBd( int ctuX ) const { return getTileColumnBd(ctuToTileCol( ctuX )); } + uint32_t ctuToTileRowBd( int ctuY ) const { return getTileRowBd(ctuToTileRow( ctuY )); } + bool ctuIsTileColBd( int ctuX ) const { return ctuX == ctuToTileColBd( ctuX ); } + bool ctuIsTileRowBd( int ctuY ) const { return ctuY == ctuToTileRowBd( ctuY ); } + uint32_t getTileIdx( uint32_t ctuX, uint32_t ctuY ) const { return (ctuToTileRow( ctuY ) * getNumTileColumns()) + ctuToTileCol( ctuX ); } + uint32_t getTileIdx( uint32_t ctuRsAddr) const { return getTileIdx( ctuRsAddr % m_picWidthInCtu, ctuRsAddr / m_picWidthInCtu ); } + uint32_t getTileIdx( const Position& pos ) const { return getTileIdx( pos.x / m_ctuSize, pos.y / m_ctuSize ); } + void setRectSliceFlag( bool b ) { m_rectSliceFlag = b; } + bool getRectSliceFlag( ) const { return m_rectSliceFlag; } + void setSingleSlicePerSubPicFlag( bool b ) { m_singleSlicePerSubPicFlag = b; } + bool getSingleSlicePerSubPicFlag( ) const { return m_singleSlicePerSubPicFlag; } + uint32_t getCtuToSubPicIdx( int idx ) const { CHECK( idx >= m_ctuToSubPicIdx.size(), "CTU address index exceeds valid range" ); return m_ctuToSubPicIdx[idx]; } + void setNumSlicesInPic( uint32_t u ) { CHECK( u > MAX_SLICES, "Number of slices in picture exceeds valid range" ); m_numSlicesInPic = u; } + uint32_t getNumSlicesInPic( ) const { return m_numSlicesInPic; } + void setTileIdxDeltaPresentFlag( bool b ) { m_tileIdxDeltaPresentFlag = b; } + bool getTileIdxDeltaPresentFlag( ) const { return m_tileIdxDeltaPresentFlag; } + void setSliceWidthInTiles( int idx, uint32_t u ) { CHECK( idx >= m_numSlicesInPic, "Slice index exceeds valid range" ); m_rectSlices[idx].setSliceWidthInTiles( u ); } + uint32_t getSliceWidthInTiles( int idx ) const { CHECK( idx >= m_numSlicesInPic, "Slice index exceeds valid range" ); return m_rectSlices[idx].getSliceWidthInTiles( ); } + void setSliceHeightInTiles( int idx, uint32_t u ) { CHECK( idx >= m_numSlicesInPic, "Slice index exceeds valid range" ); m_rectSlices[idx].setSliceHeightInTiles( u ); } + uint32_t getSliceHeightInTiles( int idx ) const { CHECK( idx >= m_numSlicesInPic, "Slice index exceeds valid range" ); return m_rectSlices[idx].getSliceHeightInTiles( ); } + void setNumSlicesInTile( int idx, uint32_t u ) { CHECK( idx >= m_numSlicesInPic, "Slice index exceeds valid range" ); m_rectSlices[idx].setNumSlicesInTile( u ); } + uint32_t getNumSlicesInTile( int idx ) const { CHECK( idx >= m_numSlicesInPic, "Slice index exceeds valid range" ); return m_rectSlices[idx].getNumSlicesInTile( ); } + void setSliceHeightInCtu( int idx, uint32_t u ) { CHECK( idx >= m_numSlicesInPic, "Slice index exceeds valid range" ); m_rectSlices[idx].setSliceHeightInCtu( u ); } + uint32_t getSliceHeightInCtu( int idx ) const { CHECK( idx >= m_numSlicesInPic, "Slice index exceeds valid range" ); return m_rectSlices[idx].getSliceHeightInCtu( ); } + void setSliceTileIdx( int idx, uint32_t u ) { CHECK( idx >= m_numSlicesInPic, "Slice index exceeds valid range" ); m_rectSlices[idx].setTileIdx( u ); } + uint32_t getSliceTileIdx( int idx ) const { CHECK( idx >= m_numSlicesInPic, "Slice index exceeds valid range" ); return m_rectSlices[idx].getTileIdx( ); } + void setRectSlices( std::vector<RectSlice> rectSlices ) { m_rectSlices = rectSlices; } + void setLoopFilterAcrossTilesEnabledFlag( bool b ) { m_loopFilterAcrossTilesEnabledFlag = b; } + bool getLoopFilterAcrossTilesEnabledFlag( ) const { return m_loopFilterAcrossTilesEnabledFlag; } + void setLoopFilterAcrossSlicesEnabledFlag( bool b ) { m_loopFilterAcrossSlicesEnabledFlag = b; } + bool getLoopFilterAcrossSlicesEnabledFlag( ) const { return m_loopFilterAcrossSlicesEnabledFlag; } + void resetTileSliceInfo(); + void initTiles(); + void initRectSlices(); + void initRectSliceMap(); + void initRasterSliceMap( std::vector<uint32_t> sizes ); + void checkSliceMap(); + SliceMap getSliceMap( int idx ) const { CHECK( idx >= m_numSlicesInPic, "Slice index exceeds valid range" ); return m_sliceMap[idx]; } + + uint32_t getLog2MaxTransformSkipBlockSize() const { return m_log2MaxTransformSkipBlockSize; } + void setLog2MaxTransformSkipBlockSize(uint32_t u) { m_log2MaxTransformSkipBlockSize = u; } - bool getUseTransformSkip() const { return m_useTransformSkip; } - void setUseTransformSkip( bool b ) { m_useTransformSkip = b; } - -#if HEVC_TILES_WPP - void setLoopFilterAcrossTilesEnabledFlag(bool b) { m_loopFilterAcrossTilesEnabledFlag = b; } - bool getLoopFilterAcrossTilesEnabledFlag() const { return m_loopFilterAcrossTilesEnabledFlag; } -#endif -#if HEVC_DEPENDENT_SLICES - bool getDependentSliceSegmentsEnabledFlag() const { return m_dependentSliceSegmentsEnabledFlag; } - void setDependentSliceSegmentsEnabledFlag(bool val) { m_dependentSliceSegmentsEnabledFlag = val; } -#endif -#if HEVC_TILES_WPP bool getEntropyCodingSyncEnabledFlag() const { return m_entropyCodingSyncEnabledFlag; } void setEntropyCodingSyncEnabledFlag(bool val) { m_entropyCodingSyncEnabledFlag = val; } - void setTilesEnabledFlag(bool val) { m_tilesEnabledFlag = val; } - bool getTilesEnabledFlag() const { return m_tilesEnabledFlag; } - void setTileUniformSpacingFlag(bool b) { m_uniformSpacingFlag = b; } - bool getTileUniformSpacingFlag() const { return m_uniformSpacingFlag; } - void setNumTileColumnsMinus1(int i) { m_numTileColumnsMinus1 = i; } - int getNumTileColumnsMinus1() const { return m_numTileColumnsMinus1; } - void setTileColumnWidth(const std::vector<int>& columnWidth ) { m_tileColumnWidth = columnWidth; } - uint32_t getTileColumnWidth(uint32_t columnIdx) const { return m_tileColumnWidth[columnIdx]; } - void setNumTileRowsMinus1(int i) { m_numTileRowsMinus1 = i; } - int getNumTileRowsMinus1() const { return m_numTileRowsMinus1; } - void setTileRowHeight(const std::vector<int>& rowHeight) { m_tileRowHeight = rowHeight; } - uint32_t getTileRowHeight(uint32_t rowIdx) const { return m_tileRowHeight[rowIdx]; } -#endif + + bool getConstantSliceHeaderParamsEnabledFlag() const { return m_constantSliceHeaderParamsEnabledFlag; } + void setConstantSliceHeaderParamsEnabledFlag(bool b) { m_constantSliceHeaderParamsEnabledFlag = b; } + int getPPSDepQuantEnabledIdc() const { return m_PPSDepQuantEnabledIdc; } + void setPPSDepQuantEnabledIdc(int u) { m_PPSDepQuantEnabledIdc = u; } + int getPPSRefPicListSPSIdc( bool b ) const { return b==1 ? m_PPSRefPicListSPSIdc1: m_PPSRefPicListSPSIdc0; } + int getPPSRefPicListSPSIdc0() const { return m_PPSRefPicListSPSIdc0; } + void setPPSRefPicListSPSIdc0(int u) { m_PPSRefPicListSPSIdc0 = u; } + int getPPSRefPicListSPSIdc1() const { return m_PPSRefPicListSPSIdc1; } + void setPPSRefPicListSPSIdc1(int u) { m_PPSRefPicListSPSIdc1 = u; } + int getPPSMvdL1ZeroIdc() const { return m_PPSMvdL1ZeroIdc; } + void setPPSMvdL1ZeroIdc(int u) { m_PPSMvdL1ZeroIdc = u; } + int getPPSCollocatedFromL0Idc() const { return m_PPSCollocatedFromL0Idc; } + void setPPSCollocatedFromL0Idc(int u) { m_PPSCollocatedFromL0Idc = u; } + uint32_t getPPSSixMinusMaxNumMergeCandPlus1() const { return m_PPSSixMinusMaxNumMergeCandPlus1; } + void setPPSSixMinusMaxNumMergeCandPlus1(uint32_t u) { m_PPSSixMinusMaxNumMergeCandPlus1 = u; } + uint32_t getPPSMaxNumMergeCandMinusMaxNumTriangleCandPlus1() const { return m_PPSMaxNumMergeCandMinusMaxNumTriangleCandPlus1; } + void setPPSMaxNumMergeCandMinusMaxNumTriangleCandPlus1(uint32_t u) { m_PPSMaxNumMergeCandMinusMaxNumTriangleCandPlus1 = u; } void setCabacInitPresentFlag( bool flag ) { m_cabacInitPresentFlag = flag; } bool getCabacInitPresentFlag() const { return m_cabacInitPresentFlag; } @@ -1646,32 +1731,41 @@ public: int getDeblockingFilterBetaOffsetDiv2() const { return m_deblockingFilterBetaOffsetDiv2; } //!< get beta offset for deblocking filter void setDeblockingFilterTcOffsetDiv2(int val) { m_deblockingFilterTcOffsetDiv2 = val; } //!< set tc offset for deblocking filter int getDeblockingFilterTcOffsetDiv2() const { return m_deblockingFilterTcOffsetDiv2; } //!< get tc offset for deblocking filter -#if HEVC_USE_SCALING_LISTS - bool getScalingListPresentFlag() const { return m_scalingListPresentFlag; } - void setScalingListPresentFlag( bool b ) { m_scalingListPresentFlag = b; } - ScalingList& getScalingList() { return m_scalingList; } - const ScalingList& getScalingList() const { return m_scalingList; } -#endif bool getListsModificationPresentFlag() const { return m_listsModificationPresentFlag; } void setListsModificationPresentFlag( bool b ) { m_listsModificationPresentFlag = b; } - uint32_t getLog2ParallelMergeLevelMinus2() const { return m_log2ParallelMergeLevelMinus2; } - void setLog2ParallelMergeLevelMinus2(uint32_t mrgLevel) { m_log2ParallelMergeLevelMinus2 = mrgLevel; } - int getNumExtraSliceHeaderBits() const { return m_numExtraSliceHeaderBits; } - void setNumExtraSliceHeaderBits(int i) { m_numExtraSliceHeaderBits = i; } - void setLoopFilterAcrossSlicesEnabledFlag( bool bValue ) { m_loopFilterAcrossSlicesEnabledFlag = bValue; } - bool getLoopFilterAcrossSlicesEnabledFlag() const { return m_loopFilterAcrossSlicesEnabledFlag; } + bool getPictureHeaderExtensionPresentFlag() const { return m_pictureHeaderExtensionPresentFlag; } + void setPictureHeaderExtensionPresentFlag(bool val) { m_pictureHeaderExtensionPresentFlag = val; } bool getSliceHeaderExtensionPresentFlag() const { return m_sliceHeaderExtensionPresentFlag; } void setSliceHeaderExtensionPresentFlag(bool val) { m_sliceHeaderExtensionPresentFlag = val; } + const PPSRExt& getPpsRangeExtension() const { return m_ppsRangeExtension; } PPSRExt& getPpsRangeExtension() { return m_ppsRangeExtension; } + + void setPicWidthInLumaSamples( uint32_t u ) { m_picWidthInLumaSamples = u; } + uint32_t getPicWidthInLumaSamples() const { return m_picWidthInLumaSamples; } + void setPicHeightInLumaSamples( uint32_t u ) { m_picHeightInLumaSamples = u; } + uint32_t getPicHeightInLumaSamples() const { return m_picHeightInLumaSamples; } + + Window& getConformanceWindow() { return m_conformanceWindow; } + const Window& getConformanceWindow() const { return m_conformanceWindow; } + void setConformanceWindow( Window& conformanceWindow ) { m_conformanceWindow = conformanceWindow; } + + Window& getScalingWindow() { return m_scalingWindow; } + const Window& getScalingWindow() const { return m_scalingWindow; } + void setScalingWindow( Window& scalingWindow ) { m_scalingWindow = scalingWindow; } }; class APS { private: int m_APSId; // adaptation_parameter_set_id - AlfSliceParam m_alfAPSParam; + int m_temporalId; + int m_layerId; + ApsType m_APSType; // aps_params_type + AlfParam m_alfAPSParam; + SliceReshapeInfo m_reshapeAPSInfo; + ScalingList m_scalingListApsInfo; public: APS(); @@ -1680,8 +1774,20 @@ public: int getAPSId() const { return m_APSId; } void setAPSId(int i) { m_APSId = i; } - void setAlfAPSParam(AlfSliceParam& alfAPSParam) { m_alfAPSParam = alfAPSParam; } - const AlfSliceParam& getAlfAPSParam() const { return m_alfAPSParam; } + ApsType getAPSType() const { return m_APSType; } + void setAPSType( ApsType type ) { m_APSType = type; } + + void setAlfAPSParam(AlfParam& alfAPSParam) { m_alfAPSParam = alfAPSParam; } + void setTemporalId( int i ) { m_temporalId = i; } + int getTemporalId() const { return m_temporalId; } + void setLayerId( int i ) { m_layerId = i; } + int getLayerId() const { return m_layerId; } + AlfParam& getAlfAPSParam() { return m_alfAPSParam; } + + void setReshaperAPSInfo(SliceReshapeInfo& reshapeAPSInfo) { m_reshapeAPSInfo = reshapeAPSInfo; } + SliceReshapeInfo& getReshaperAPSInfo() { return m_reshapeAPSInfo; } + void setScalingList( ScalingList& scalingListAPSInfo ) { m_scalingListApsInfo = scalingListAPSInfo; } + ScalingList& getScalingList() { return m_scalingListApsInfo; } }; struct WPScalingParam { @@ -1706,7 +1812,245 @@ struct WPACDCParam int64_t iDC; }; +// picture header class +class PicHeader +{ +private: + bool m_valid; //!< picture header is valid yet or not + Picture* m_pcPic; //!< pointer to picture structure + bool m_nonReferencePictureFlag; //!< non-reference picture flag + bool m_gdrPicFlag; //!< gradual decoding refresh picture flag + bool m_noOutputOfPriorPicsFlag; //!< no output of prior pictures flag + uint32_t m_recoveryPocCnt; //!< recovery POC count + int m_spsId; //!< sequence parameter set ID + int m_ppsId; //!< picture parameter set ID + bool m_subPicIdSignallingPresentFlag; //!< indicates the presence of sub-picture ID signalling in the SPS + uint32_t m_subPicIdLen; //!< sub-picture ID length in bits + uint8_t m_subPicId[MAX_NUM_SUB_PICS]; //!< sub-picture ID for each sub-picture in the sequence + bool m_loopFilterAcrossVirtualBoundariesDisabledFlag; //!< loop filtering across virtual boundaries disabled + unsigned m_numVerVirtualBoundaries; //!< number of vertical virtual boundaries + unsigned m_numHorVirtualBoundaries; //!< number of horizontal virtual boundaries + unsigned m_virtualBoundariesPosX[3]; //!< horizontal virtual boundary positions + unsigned m_virtualBoundariesPosY[3]; //!< vertical virtual boundary positions + unsigned m_colourPlaneId; //!< 4:4:4 colour plane ID + bool m_picOutputFlag; //!< picture output flag + bool m_picRplPresentFlag; //!< reference lists present in picture header or not + const ReferencePictureList* m_pRPL0; //!< pointer to RPL for L0, either in the SPS or the local RPS in the picture header + const ReferencePictureList* m_pRPL1; //!< pointer to RPL for L1, either in the SPS or the local RPS in the picture header + ReferencePictureList m_localRPL0; //!< RPL for L0 when present in picture header + ReferencePictureList m_localRPL1; //!< RPL for L1 when present in picture header + int m_rpl0Idx; //!< index of used RPL in the SPS or -1 for local RPL in the picture header + int m_rpl1Idx; //!< index of used RPL in the SPS or -1 for local RPL in the picture header + bool m_splitConsOverrideFlag; //!< partitioning constraint override flag + uint32_t m_cuQpDeltaSubdivIntra; //!< CU QP delta maximum subdivision for intra slices + uint32_t m_cuQpDeltaSubdivInter; //!< CU QP delta maximum subdivision for inter slices + uint32_t m_cuChromaQpOffsetSubdivIntra; //!< CU chroma QP offset maximum subdivision for intra slices + uint32_t m_cuChromaQpOffsetSubdivInter; //!< CU chroma QP offset maximum subdivision for inter slices + bool m_enableTMVPFlag; //!< enable temporal motion vector prediction + bool m_mvdL1ZeroFlag; //!< L1 MVD set to zero flag + uint32_t m_maxNumMergeCand; //!< max number of merge candidates + uint32_t m_maxNumAffineMergeCand; //!< max number of sub-block merge candidates + bool m_disFracMMVD; //!< fractional MMVD offsets disabled flag + bool m_disBdofFlag; //!< picture level BDOF disable flag + bool m_disDmvrFlag; //!< picture level DMVR disable flag + bool m_disProfFlag; //!< picture level PROF disable flag + uint32_t m_maxNumTriangleCand; //!< max number of triangle merge candidates + uint32_t m_maxNumIBCMergeCand; //!< max number of IBC merge candidates + bool m_jointCbCrSignFlag; //!< joint Cb/Cr residual sign flag + bool m_saoEnabledPresentFlag; //!< sao enabled flags present in the picture header + bool m_saoEnabledFlag[MAX_NUM_CHANNEL_TYPE]; //!< sao enabled flags for each channel + bool m_alfEnabledPresentFlag; //!< alf enabled flags present in the picture header + bool m_alfEnabledFlag[MAX_NUM_COMPONENT]; //!< alf enabled flags for each component + int m_numAlfAps; //!< number of alf aps active for the picture + std::vector<int> m_alfApsId; //!< list of alf aps for the picture + int m_alfChromaApsId; //!< chroma alf aps ID + bool m_depQuantEnabledFlag; //!< dependent quantization enabled flag + bool m_signDataHidingEnabledFlag; //!< sign data hiding enabled flag + bool m_deblockingFilterOverridePresentFlag; //!< deblocking filter override controls present in picture header + bool m_deblockingFilterOverrideFlag; //!< deblocking filter override controls enabled + bool m_deblockingFilterDisable; //!< deblocking filter disabled flag + int m_deblockingFilterBetaOffsetDiv2; //!< beta offset for deblocking filter + int m_deblockingFilterTcOffsetDiv2; //!< tc offset for deblocking filter + bool m_lmcsEnabledFlag; //!< lmcs enabled flag + int m_lmcsApsId; //!< lmcs APS ID + APS* m_lmcsAps; //!< lmcs APS + bool m_lmcsChromaResidualScaleFlag; //!< lmcs chroma residual scale flag + bool m_scalingListPresentFlag; //!< quantization scaling lists present + int m_scalingListApsId; //!< quantization scaling list APS ID + APS* m_scalingListAps; //!< quantization scaling list APS + unsigned m_minQT[3]; //!< minimum quad-tree size 0: I slice luma; 1: P/B slice; 2: I slice chroma + unsigned m_maxMTTHierarchyDepth[3]; //!< maximum MTT depth + unsigned m_maxBTSize[3]; //!< maximum BT size + unsigned m_maxTTSize[3]; //!< maximum TT size +public: + PicHeader(); + virtual ~PicHeader(); + void initPicHeader(); + bool isValid() { return m_valid; } + void setValid() { m_valid = true; } + void setPic( Picture* p ) { m_pcPic = p; } + Picture* getPic() { return m_pcPic; } + const Picture* getPic() const { return m_pcPic; } + void setNonReferencePictureFlag( bool b ) { m_nonReferencePictureFlag = b; } + bool getNonReferencePictureFlag() const { return m_nonReferencePictureFlag; } + void setGdrPicFlag( bool b ) { m_gdrPicFlag = b; } + bool getGdrPicFlag() const { return m_gdrPicFlag; } + void setNoOutputOfPriorPicsFlag( bool b ) { m_noOutputOfPriorPicsFlag = b; } + bool getNoOutputOfPriorPicsFlag() const { return m_noOutputOfPriorPicsFlag; } + void setRecoveryPocCnt( uint32_t u ) { m_recoveryPocCnt = u; } + bool getRecoveryPocCnt() const { return m_recoveryPocCnt; } + void setSPSId( uint32_t u ) { m_spsId = u; } + uint32_t getSPSId() const { return m_spsId; } + void setPPSId( uint32_t u ) { m_ppsId = u; } + uint32_t getPPSId() const { return m_ppsId; } + void setSubPicIdSignallingPresentFlag( bool b ) { m_subPicIdSignallingPresentFlag = b; } + bool getSubPicIdSignallingPresentFlag() const { return m_subPicIdSignallingPresentFlag; } + void setSubPicIdLen( uint32_t u ) { m_subPicIdLen = u; } + uint32_t getSubPicIdLen() const { return m_subPicIdLen; } + void setSubPicId( int i, uint8_t u ) { CHECK( i >= MAX_NUM_SUB_PICS, "Sub-pic index exceeds valid range" ); m_subPicId[i] = u; } + uint8_t getSubPicId( int i ) const { CHECK( i >= MAX_NUM_SUB_PICS, "Sub-pic index exceeds valid range" ); return m_subPicId[i]; } + void setLoopFilterAcrossVirtualBoundariesDisabledFlag(bool b) { m_loopFilterAcrossVirtualBoundariesDisabledFlag = b; } + bool getLoopFilterAcrossVirtualBoundariesDisabledFlag() const { return m_loopFilterAcrossVirtualBoundariesDisabledFlag; } + void setNumVerVirtualBoundaries(unsigned u) { m_numVerVirtualBoundaries = u; } + unsigned getNumVerVirtualBoundaries() const { return m_numVerVirtualBoundaries; } + void setNumHorVirtualBoundaries(unsigned u) { m_numHorVirtualBoundaries = u; } + unsigned getNumHorVirtualBoundaries() const { return m_numHorVirtualBoundaries; } + void setVirtualBoundariesPosX(unsigned u, unsigned idx) { CHECK( idx >= 3, "boundary index exceeds valid range" ); m_virtualBoundariesPosX[idx] = u; } + unsigned getVirtualBoundariesPosX(unsigned idx) const { CHECK( idx >= 3, "boundary index exceeds valid range" ); return m_virtualBoundariesPosX[idx];} + void setVirtualBoundariesPosY(unsigned u, unsigned idx) { CHECK( idx >= 3, "boundary index exceeds valid range" ); m_virtualBoundariesPosY[idx] = u; } + unsigned getVirtualBoundariesPosY(unsigned idx) const { CHECK( idx >= 3, "boundary index exceeds valid range" ); return m_virtualBoundariesPosY[idx];} + void setColourPlaneId(unsigned u) { m_colourPlaneId = u; } + unsigned getColourPlaneId() const { return m_colourPlaneId; } + void setPicOutputFlag( bool b ) { m_picOutputFlag = b; } + bool getPicOutputFlag() const { return m_picOutputFlag; } + void setPicRplPresentFlag( bool b ) { m_picRplPresentFlag = b; } + bool getPicRplPresentFlag() const { return m_picRplPresentFlag; } + void setRPL( bool b, const ReferencePictureList *pcRPL) { if(b==1) { m_pRPL1 = pcRPL; } else { m_pRPL0 = pcRPL; } } + const ReferencePictureList* getRPL( bool b ) { return b==1 ? m_pRPL1 : m_pRPL0; } + ReferencePictureList* getLocalRPL( bool b ) { return b==1 ? &m_localRPL1 : &m_localRPL0; } + void setRPLIdx( bool b, int rplIdx) { if(b==1) { m_rpl1Idx = rplIdx; } else { m_rpl0Idx = rplIdx; } } + int getRPLIdx( bool b ) const { return b==1 ? m_rpl1Idx : m_rpl0Idx; } + void setRPL0(const ReferencePictureList *pcRPL) { m_pRPL0 = pcRPL; } + void setRPL1(const ReferencePictureList *pcRPL) { m_pRPL1 = pcRPL; } + const ReferencePictureList* getRPL0() { return m_pRPL0; } + const ReferencePictureList* getRPL1() { return m_pRPL1; } + ReferencePictureList* getLocalRPL0() { return &m_localRPL0; } + ReferencePictureList* getLocalRPL1() { return &m_localRPL1; } + void setRPL0idx(int rplIdx) { m_rpl0Idx = rplIdx; } + void setRPL1idx(int rplIdx) { m_rpl1Idx = rplIdx; } + int getRPL0idx() const { return m_rpl0Idx; } + int getRPL1idx() const { return m_rpl1Idx; } + void setSplitConsOverrideFlag( bool b ) { m_splitConsOverrideFlag = b; } + bool getSplitConsOverrideFlag() const { return m_splitConsOverrideFlag; } + void setCuQpDeltaSubdivIntra( uint32_t u ) { m_cuQpDeltaSubdivIntra = u; } + uint32_t getCuQpDeltaSubdivIntra() const { return m_cuQpDeltaSubdivIntra; } + void setCuQpDeltaSubdivInter( uint32_t u ) { m_cuQpDeltaSubdivInter = u; } + uint32_t getCuQpDeltaSubdivInter() const { return m_cuQpDeltaSubdivInter; } + void setCuChromaQpOffsetSubdivIntra( uint32_t u ) { m_cuChromaQpOffsetSubdivIntra = u; } + uint32_t getCuChromaQpOffsetSubdivIntra() const { return m_cuChromaQpOffsetSubdivIntra; } + void setCuChromaQpOffsetSubdivInter( uint32_t u ) { m_cuChromaQpOffsetSubdivInter = u; } + uint32_t getCuChromaQpOffsetSubdivInter() const { return m_cuChromaQpOffsetSubdivInter; } + void setEnableTMVPFlag( bool b ) { m_enableTMVPFlag = b; } + bool getEnableTMVPFlag() const { return m_enableTMVPFlag; } + void setMvdL1ZeroFlag( bool b ) { m_mvdL1ZeroFlag = b; } + bool getMvdL1ZeroFlag() const { return m_mvdL1ZeroFlag; } + void setMaxNumMergeCand(uint32_t val ) { m_maxNumMergeCand = val; } + uint32_t getMaxNumMergeCand() const { return m_maxNumMergeCand; } + void setMaxNumAffineMergeCand( uint32_t val ) { m_maxNumAffineMergeCand = val; } + uint32_t getMaxNumAffineMergeCand() const { return m_maxNumAffineMergeCand; } + void setDisFracMMVD( bool val ) { m_disFracMMVD = val; } + bool getDisFracMMVD() const { return m_disFracMMVD; } + void setDisBdofFlag( bool val ) { m_disBdofFlag = val; } + bool getDisBdofFlag() const { return m_disBdofFlag; } + void setDisDmvrFlag( bool val ) { m_disDmvrFlag = val; } + bool getDisDmvrFlag() const { return m_disDmvrFlag; } + void setDisProfFlag( bool val ) { m_disProfFlag = val; } + bool getDisProfFlag() const { return m_disProfFlag; } + void setMaxNumTriangleCand(uint32_t b) { m_maxNumTriangleCand = b; } + uint32_t getMaxNumTriangleCand() const { return m_maxNumTriangleCand; } + void setMaxNumIBCMergeCand( uint32_t b ) { m_maxNumIBCMergeCand = b; } + uint32_t getMaxNumIBCMergeCand() const { return m_maxNumIBCMergeCand; } + void setJointCbCrSignFlag( bool b ) { m_jointCbCrSignFlag = b; } + bool getJointCbCrSignFlag() const { return m_jointCbCrSignFlag; } + void setSaoEnabledPresentFlag( bool b ) { m_saoEnabledPresentFlag = b; } + bool getSaoEnabledPresentFlag() const { return m_saoEnabledPresentFlag; } + void setSaoEnabledFlag(ChannelType chType, bool b) { m_saoEnabledFlag[chType] = b; } + bool getSaoEnabledFlag(ChannelType chType) const { return m_saoEnabledFlag[chType]; } + void setAlfEnabledPresentFlag( bool b ) { m_alfEnabledPresentFlag = b; } + bool getAlfEnabledPresentFlag() const { return m_alfEnabledPresentFlag; } + void setAlfEnabledFlag(ComponentID compId, bool b) { m_alfEnabledFlag[compId] = b; } + bool getAlfEnabledFlag(ComponentID compId) const { return m_alfEnabledFlag[compId]; } + void setNumAlfAps(int i) { m_numAlfAps = i; } + int getNumAlfAps() const { return m_numAlfAps; } + void setAlfApsIdChroma(int i) { m_alfChromaApsId = i; } + int getAlfApsIdChroma() const { return m_alfChromaApsId; } + void setDepQuantEnabledFlag( bool b ) { m_depQuantEnabledFlag = b; } + bool getDepQuantEnabledFlag() const { return m_depQuantEnabledFlag; } + void setSignDataHidingEnabledFlag( bool b ) { m_signDataHidingEnabledFlag = b; } + bool getSignDataHidingEnabledFlag() const { return m_signDataHidingEnabledFlag; } + void setDeblockingFilterOverridePresentFlag( bool b ) { m_deblockingFilterOverridePresentFlag = b; } + bool getDeblockingFilterOverridePresentFlag() const { return m_deblockingFilterOverridePresentFlag; } + void setDeblockingFilterOverrideFlag( bool b ) { m_deblockingFilterOverrideFlag = b; } + bool getDeblockingFilterOverrideFlag() const { return m_deblockingFilterOverrideFlag; } + void setDeblockingFilterDisable( bool b ) { m_deblockingFilterDisable= b; } + bool getDeblockingFilterDisable() const { return m_deblockingFilterDisable; } + void setDeblockingFilterBetaOffsetDiv2( int i ) { m_deblockingFilterBetaOffsetDiv2 = i; } + int getDeblockingFilterBetaOffsetDiv2()const { return m_deblockingFilterBetaOffsetDiv2; } + void setDeblockingFilterTcOffsetDiv2( int i ) { m_deblockingFilterTcOffsetDiv2 = i; } + int getDeblockingFilterTcOffsetDiv2() const { return m_deblockingFilterTcOffsetDiv2; } + void setLmcsEnabledFlag(bool b) { m_lmcsEnabledFlag = b; } + bool getLmcsEnabledFlag() { return m_lmcsEnabledFlag; } + const bool getLmcsEnabledFlag() const { return m_lmcsEnabledFlag; } + void setLmcsAPS(APS* aps) { m_lmcsAps = aps; m_lmcsApsId = (aps) ? aps->getAPSId() : -1; } + APS* getLmcsAPS() const { return m_lmcsAps; } + void setLmcsAPSId(int id) { m_lmcsApsId = id; } + int getLmcsAPSId() const { return m_lmcsApsId; } + void setLmcsChromaResidualScaleFlag(bool b) { m_lmcsChromaResidualScaleFlag = b; } + bool getLmcsChromaResidualScaleFlag() { return m_lmcsChromaResidualScaleFlag; } + const bool getLmcsChromaResidualScaleFlag() const { return m_lmcsChromaResidualScaleFlag; } + void setScalingListAPS( APS* aps ) { m_scalingListAps = aps; m_scalingListApsId = ( aps ) ? aps->getAPSId() : -1; } + APS* getScalingListAPS() const { return m_scalingListAps; } + void setScalingListAPSId( int id ) { m_scalingListApsId = id; } + int getScalingListAPSId() const { return m_scalingListApsId; } + void setScalingListPresentFlag( bool b ) { m_scalingListPresentFlag = b; } + bool getScalingListPresentFlag() { return m_scalingListPresentFlag; } + const bool getScalingListPresentFlag() const { return m_scalingListPresentFlag; } + + unsigned* getMinQTSizes() const { return (unsigned *)m_minQT; } + unsigned* getMaxMTTHierarchyDepths() const { return (unsigned *)m_maxMTTHierarchyDepth; } + unsigned* getMaxBTSizes() const { return (unsigned *)m_maxBTSize; } + unsigned* getMaxTTSizes() const { return (unsigned *)m_maxTTSize; } + + void setMinQTSize(unsigned idx, unsigned minQT) { m_minQT[idx] = minQT; } + void setMaxMTTHierarchyDepth(unsigned idx, unsigned maxMTT) { m_maxMTTHierarchyDepth[idx] = maxMTT; } + void setMaxBTSize(unsigned idx, unsigned maxBT) { m_maxBTSize[idx] = maxBT; } + void setMaxTTSize(unsigned idx, unsigned maxTT) { m_maxTTSize[idx] = maxTT; } + + void setMinQTSizes(unsigned* minQT) { m_minQT[0] = minQT[0]; m_minQT[1] = minQT[1]; m_minQT[2] = minQT[2]; } + void setMaxMTTHierarchyDepths(unsigned* maxMTT) { m_maxMTTHierarchyDepth[0] = maxMTT[0]; m_maxMTTHierarchyDepth[1] = maxMTT[1]; m_maxMTTHierarchyDepth[2] = maxMTT[2]; } + void setMaxBTSizes(unsigned* maxBT) { m_maxBTSize[0] = maxBT[0]; m_maxBTSize[1] = maxBT[1]; m_maxBTSize[2] = maxBT[2]; } + void setMaxTTSizes(unsigned* maxTT) { m_maxTTSize[0] = maxTT[0]; m_maxTTSize[1] = maxTT[1]; m_maxTTSize[2] = maxTT[2]; } + + unsigned getMinQTSize(SliceType slicetype, + ChannelType chType = CHANNEL_TYPE_LUMA) const { return slicetype == I_SLICE ? (chType == CHANNEL_TYPE_LUMA ? m_minQT[0] : m_minQT[2]) : m_minQT[1]; } + unsigned getMaxMTTHierarchyDepth(SliceType slicetype, + ChannelType chType = CHANNEL_TYPE_LUMA) const { return slicetype == I_SLICE ? (chType == CHANNEL_TYPE_LUMA ? m_maxMTTHierarchyDepth[0] : m_maxMTTHierarchyDepth[2]) : m_maxMTTHierarchyDepth[1]; } + unsigned getMaxBTSize(SliceType slicetype, + ChannelType chType = CHANNEL_TYPE_LUMA) const { return slicetype == I_SLICE ? (chType == CHANNEL_TYPE_LUMA ? m_maxBTSize[0] : m_maxBTSize[2]) : m_maxBTSize[1]; } + unsigned getMaxTTSize(SliceType slicetype, + ChannelType chType = CHANNEL_TYPE_LUMA) const { return slicetype == I_SLICE ? (chType == CHANNEL_TYPE_LUMA ? m_maxTTSize[0] : m_maxTTSize[2]) : m_maxTTSize[1]; } + + void setAlfAPSs(std::vector<int> apsIDs) { m_alfApsId.resize(m_numAlfAps); + for (int i = 0; i < m_numAlfAps; i++) + { + m_alfApsId[i] = apsIDs[i]; + } + } + + std::vector<int> getAlfAPSs() const { return m_alfApsId; } + +}; /// slice header class class Slice @@ -1715,23 +2059,24 @@ class Slice private: // Bitstream writing bool m_saoEnabledFlag[MAX_NUM_CHANNEL_TYPE]; - int m_iPPSId; ///< picture parameter set ID - bool m_PicOutputFlag; ///< pic_output_flag int m_iPOC; int m_iLastIDR; int m_iAssociatedIRAP; NalUnitType m_iAssociatedIRAPType; - const ReferencePictureSet* m_pRPS; //< pointer to RPS, either in the SPS or the local RPS in the same slice header - ReferencePictureSet m_localRPS; //< RPS when present in slice header - int m_rpsIdx; //< index of used RPS in the SPS or -1 for local RPS in the slice header - RefPicListModification m_RefPicListModification; + bool m_enableDRAPSEI; + bool m_useLTforDRAP; + bool m_isDRAP; + int m_latestDRAPPOC; + const ReferencePictureList* m_pRPL0; //< pointer to RPL for L0, either in the SPS or the local RPS in the same slice header + const ReferencePictureList* m_pRPL1; //< pointer to RPL for L1, either in the SPS or the local RPS in the same slice header + ReferencePictureList m_localRPL0; //< RPL for L0 when present in slice header + ReferencePictureList m_localRPL1; //< RPL for L1 when present in slice header + int m_rpl0Idx; //< index of used RPL in the SPS or -1 for local RPL in the slice header + int m_rpl1Idx; //< index of used RPL in the SPS or -1 for local RPL in the slice header NalUnitType m_eNalUnitType; ///< Nal unit type for the slice SliceType m_eSliceType; int m_iSliceQp; int m_iSliceQpBase; -#if HEVC_DEPENDENT_SLICES - bool m_dependentSliceSegmentFlag; -#endif bool m_ChromaQpAdjEnabled; bool m_deblockingFilterDisable; bool m_deblockingFilterOverrideFlag; //< offsets for deblocking filter inherit from PPS @@ -1741,10 +2086,6 @@ private: int m_aiNumRefIdx [NUM_REF_PIC_LIST_01]; // for multiple reference of current slice bool m_pendingRasInit; - bool m_depQuantEnabledFlag; -#if HEVC_USE_SIGN_HIDING - bool m_signDataHidingEnabledFlag; -#endif bool m_bCheckLDC; bool m_biDirPred; @@ -1752,127 +2093,103 @@ private: // Data int m_iSliceQpDelta; - int m_iSliceChromaQpDelta[MAX_NUM_COMPONENT]; + int m_iSliceChromaQpDelta[MAX_NUM_COMPONENT+1]; Picture* m_apcRefPicList [NUM_REF_PIC_LIST_01][MAX_NUM_REF+1]; int m_aiRefPOCList [NUM_REF_PIC_LIST_01][MAX_NUM_REF+1]; bool m_bIsUsedAsLongTerm[NUM_REF_PIC_LIST_01][MAX_NUM_REF+1]; int m_iDepth; + Picture* m_scaledRefPicList[NUM_REF_PIC_LIST_01][MAX_NUM_REF + 1]; + Picture* m_savedRefPicList[NUM_REF_PIC_LIST_01][MAX_NUM_REF + 1]; + std::pair<int, int> m_scalingRatio[NUM_REF_PIC_LIST_01][MAX_NUM_REF_PICS]; // access channel -#if HEVC_VPS + const DPS* m_dps; const VPS* m_pcVPS; -#endif const SPS* m_pcSPS; const PPS* m_pcPPS; Picture* m_pcPic; + const PicHeader* m_pcPicHeader; //!< pointer to picture header structure bool m_colFromL0Flag; // collocated picture from List0 flag - bool m_noOutputPriorPicsFlag; - bool m_noRaslOutputFlag; + bool m_noIncorrectPicOutputFlag; bool m_handleCraAsCvsStartFlag; uint32_t m_colRefIdx; - uint32_t m_maxNumMergeCand; - uint32_t m_maxNumAffineMergeCand; - bool m_disFracMMVD; double m_lambdas[MAX_NUM_COMPONENT]; bool m_abEqualRef [NUM_REF_PIC_LIST_01][MAX_NUM_REF][MAX_NUM_REF]; uint32_t m_uiTLayer; bool m_bTLayerSwitchingFlag; - SliceConstraint m_sliceMode; - uint32_t m_sliceArgument; - uint32_t m_sliceCurStartCtuTsAddr; - uint32_t m_sliceCurEndCtuTsAddr; + SliceMap m_sliceMap; //!< list of CTUs in current slice - raster scan CTU addresses uint32_t m_independentSliceIdx; -#if HEVC_DEPENDENT_SLICES - uint32_t m_sliceSegmentIdx; - SliceConstraint m_sliceSegmentMode; - uint32_t m_sliceSegmentArgument; - uint32_t m_sliceSegmentCurStartCtuTsAddr; - uint32_t m_sliceSegmentCurEndCtuTsAddr; -#endif bool m_nextSlice; -#if HEVC_DEPENDENT_SLICES - bool m_nextSliceSegment; -#endif uint32_t m_sliceBits; -#if HEVC_DEPENDENT_SLICES - uint32_t m_sliceSegmentBits; -#endif bool m_bFinalized; + bool m_bTestWeightPred; bool m_bTestWeightBiPred; WPScalingParam m_weightPredTable[NUM_REF_PIC_LIST_01][MAX_NUM_REF][MAX_NUM_COMPONENT]; // [REF_PIC_LIST_0 or REF_PIC_LIST_1][refIdx][0:Y, 1:U, 2:V] WPACDCParam m_weightACDCParam[MAX_NUM_COMPONENT]; ClpRngs m_clpRngs; std::vector<uint32_t> m_substreamSizes; + uint32_t m_numEntryPoints; bool m_cabacInitFlag; - bool m_bLMvdL1Zero; -#if !JVET_M0101_HLS - bool m_temporalLayerNonReferenceFlag; -#endif - bool m_LFCrossSliceBoundaryFlag; + uint32_t m_sliceSubPicId; - bool m_enableTMVPFlag; + + SliceType m_encCABACTableIdx; // Used to transmit table selection across slices. clock_t m_iProcessingStartTime; double m_dProcessingTime; - bool m_splitConsOverrideFlag; - uint32_t m_uiMinQTSize; - uint32_t m_uiMaxBTDepth; - uint32_t m_uiMaxTTSize; - - uint32_t m_uiMinQTSizeIChroma; - uint32_t m_uiMaxBTDepthIChroma; - uint32_t m_uiMaxBTSizeIChroma; - uint32_t m_uiMaxTTSizeIChroma; - uint32_t m_uiMaxBTSize; - - int m_apsId; - APS* m_aps; - bool m_tileGroupAlfEnabledFlag; - SliceReshapeInfo m_sliceReshapeInfo; + + int m_rpPicOrderCntVal; + APS* m_alfApss[ALF_CTB_MAX_NUM_APS]; + bool m_tileGroupAlfEnabledFlag[MAX_NUM_COMPONENT]; + int m_tileGroupNumAps; + std::vector<int> m_tileGroupLumaApsId; + int m_tileGroupChromaApsId; + bool m_disableSATDForRd; public: Slice(); virtual ~Slice(); void initSlice(); + void inheritFromPicHeader( PicHeader *picHeader, const PPS *pps, const SPS *sps ); + void setPicHeader( const PicHeader* pcPicHeader ) { m_pcPicHeader = pcPicHeader; } + const PicHeader* getPicHeader() const { return m_pcPicHeader; } int getRefIdx4MVPair( RefPicList eCurRefPicList, int nCurRefIdx ); -#if HEVC_VPS - void setVPS( VPS* pcVPS ) { m_pcVPS = pcVPS; } - const VPS* getVPS() const { return m_pcVPS; } -#endif + void setDPS( DPS* dps ) { m_dps = dps; } + const DPS* getDPS() const { return m_dps; } + void setSPS( const SPS* pcSPS ) { m_pcSPS = pcSPS; } const SPS* getSPS() const { return m_pcSPS; } + void setVPS( const VPS* pcVPS ) { m_pcVPS = pcVPS; } + const VPS* getVPS() const { return m_pcVPS; } - void setPPS( const PPS* pcPPS ) { m_pcPPS = pcPPS; m_iPPSId = (pcPPS) ? pcPPS->getPPSId() : -1; } + void setPPS( const PPS* pcPPS ) { m_pcPPS = pcPPS; } const PPS* getPPS() const { return m_pcPPS; } - void setPPSId( int PPSId ) { m_iPPSId = PPSId; } - int getPPSId() const { return m_iPPSId; } - void setAPS(APS* aps) { m_aps = aps; m_apsId = (aps) ? aps->getAPSId() : -1; } - APS* getAPS() { return m_aps; } - void setAPSId(int apsId) { m_apsId = apsId; } - int getAPSId() const { return m_apsId; } - void setPicOutputFlag( bool b ) { m_PicOutputFlag = b; } - bool getPicOutputFlag() const { return m_PicOutputFlag; } + void setAlfAPSs(APS** apss) { memcpy(m_alfApss, apss, sizeof(m_alfApss)); } + APS** getAlfAPSs() { return m_alfApss; } void setSaoEnabledFlag(ChannelType chType, bool s) {m_saoEnabledFlag[chType] =s; } bool getSaoEnabledFlag(ChannelType chType) const { return m_saoEnabledFlag[chType]; } - void setRPS( const ReferencePictureSet *pcRPS ) { m_pRPS = pcRPS; } - const ReferencePictureSet* getRPS() { return m_pRPS; } - ReferencePictureSet* getLocalRPS() { return &m_localRPS; } - - void setRPSidx( int rpsIdx ) { m_rpsIdx = rpsIdx; } - int getRPSidx() const { return m_rpsIdx; } - RefPicListModification* getRefPicListModification() { return &m_RefPicListModification; } + void setRPL0(const ReferencePictureList *pcRPL) { m_pRPL0 = pcRPL; } + void setRPL1(const ReferencePictureList *pcRPL) { m_pRPL1 = pcRPL; } + const ReferencePictureList* getRPL0() { return m_pRPL0; } + const ReferencePictureList* getRPL1() { return m_pRPL1; } + ReferencePictureList* getLocalRPL0() { return &m_localRPL0; } + ReferencePictureList* getLocalRPL1() { return &m_localRPL1; } + void setRPL0idx(int rplIdx) { m_rpl0Idx = rplIdx; } + void setRPL1idx(int rplIdx) { m_rpl1Idx = rplIdx; } + int getRPL0idx() const { return m_rpl0Idx; } + int getRPL1idx() const { return m_rpl1Idx; } void setLastIDR(int iIDRPOC) { m_iLastIDR = iIDRPOC; } int getLastIDR() const { return m_iLastIDR; } void setAssociatedIRAPPOC(int iAssociatedIRAPPOC) { m_iAssociatedIRAP = iAssociatedIRAPPOC; } @@ -1883,10 +2200,6 @@ public: int getPOC() const { return m_iPOC; } int getSliceQp() const { return m_iSliceQp; } bool getUseWeightedPrediction() const { return( (m_eSliceType==P_SLICE && testWeightPred()) || (m_eSliceType==B_SLICE && testWeightBiPred()) ); } -#if HEVC_DEPENDENT_SLICES - bool getDependentSliceSegmentFlag() const { return m_dependentSliceSegmentFlag; } - void setDependentSliceSegmentFlag(bool val) { m_dependentSliceSegmentFlag = val; } -#endif int getSliceQpDelta() const { return m_iSliceQpDelta; } int getSliceChromaQpDelta(ComponentID compID) const { return isLuma(compID) ? 0 : m_iSliceChromaQpDelta[compID]; } bool getUseChromaQpAdj() const { return m_ChromaQpAdjEnabled; } @@ -1909,25 +2222,16 @@ public: bool getIsUsedAsLongTerm(int i, int j) const { return m_bIsUsedAsLongTerm[i][j]; } void setIsUsedAsLongTerm(int i, int j, bool value) { m_bIsUsedAsLongTerm[i][j] = value; } bool getCheckLDC() const { return m_bCheckLDC; } - bool getMvdL1ZeroFlag() const { return m_bLMvdL1Zero; } - int getNumRpsCurrTempList() const; int getList1IdxToList0Idx( int list1Idx ) const { return m_list1IdxToList0Idx[list1Idx]; } -#if !JVET_M0101_HLS - bool isReferenceNalu() const { return ((getNalUnitType() <= NAL_UNIT_RESERVED_VCL_R15) && (getNalUnitType()%2 != 0)) || ((getNalUnitType() >= NAL_UNIT_CODED_SLICE_BLA_W_LP) && (getNalUnitType() <= NAL_UNIT_RESERVED_IRAP_VCL23) ); } -#endif void setPOC( int i ) { m_iPOC = i; } void setNalUnitType( NalUnitType e ) { m_eNalUnitType = e; } NalUnitType getNalUnitType() const { return m_eNalUnitType; } bool getRapPicFlag() const; bool getIdrPicFlag() const { return getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_W_RADL || getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_N_LP; } -#if !JVET_M0101_HLS - bool isIRAP() const { return (getNalUnitType() >= 16) && (getNalUnitType() <= 23); } - bool isIDRorBLA() const { return (getNalUnitType() >= 16) && (getNalUnitType() <= 20); } -#else - bool isIRAP() const { return (getNalUnitType() >= NAL_UNIT_CODED_SLICE_IDR_W_RADL) && (getNalUnitType() <= NAL_UNIT_RESERVED_IRAP_VCL13); } - bool isIDRorBLA() const { return (getNalUnitType() >= NAL_UNIT_CODED_SLICE_IDR_W_RADL) && (getNalUnitType() <= NAL_UNIT_CODED_SLICE_IDR_N_LP); } -#endif - void checkCRA(const ReferencePictureSet *pReferencePictureSet, int& pocCRA, NalUnitType& associatedIRAPType, PicList& rcListPic); + bool isIRAP() const { return (getNalUnitType() >= NAL_UNIT_CODED_SLICE_IDR_W_RADL) && (getNalUnitType() <= NAL_UNIT_CODED_SLICE_CRA); } + bool isIDRorBLA() const { return (getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_W_RADL) || (getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_N_LP); } + void checkCRA(const ReferencePictureList *pRPL0, const ReferencePictureList *pRPL1, int& pocCRA, NalUnitType& associatedIRAPType, PicList& rcListPic); + void checkSTSA(PicList& rcListPic); void decodingRefreshMarking(int& pocCRA, bool& bRefreshPending, PicList& rcListPic, const bool bEfficientFieldIRAPEnabled); void setSliceType( SliceType e ) { m_eSliceType = e; } void setSliceQp( int i ) { m_iSliceQp = i; } @@ -1943,13 +2247,12 @@ public: void setPic( Picture* p ) { m_pcPic = p; } void setDepth( int iDepth ) { m_iDepth = iDepth; } - void setRefPicList( PicList& rcListPic, bool checkNumPocTotalCurr = false, bool bCopyL0toL1ErrorCase = false ); + void constructRefPicList(PicList& rcListPic); void setRefPOCList(); void setColFromL0Flag( bool colFromL0 ) { m_colFromL0Flag = colFromL0; } void setColRefIdx( uint32_t refIdx) { m_colRefIdx = refIdx; } void setCheckLDC( bool b ) { m_bCheckLDC = b; } - void setMvdL1ZeroFlag( bool b) { m_bLMvdL1Zero = b; } void setBiDirPred( bool b, int refIdx0, int refIdx1 ) { m_biDirPred = b; m_symRefIdx[0] = refIdx0; m_symRefIdx[1] = refIdx1; } bool getBiDirPred() const { return m_biDirPred; } @@ -1959,36 +2262,26 @@ public: bool isInterB() const { return m_eSliceType == B_SLICE; } bool isInterP() const { return m_eSliceType == P_SLICE; } + bool getEnableDRAPSEI () const { return m_enableDRAPSEI; } + void setEnableDRAPSEI ( bool b ) { m_enableDRAPSEI = b; } + bool getUseLTforDRAP () const { return m_useLTforDRAP; } + void setUseLTforDRAP ( bool b ) { m_useLTforDRAP = b; } + bool isDRAP () const { return m_isDRAP; } + void setDRAP ( bool b ) { m_isDRAP = b; } + void setLatestDRAPPOC ( int i ) { m_latestDRAPPOC = i; } + int getLatestDRAPPOC () const { return m_latestDRAPPOC; } + bool cvsHasPreviousDRAP() const { return m_latestDRAPPOC != MAX_INT; } + bool isPocRestrictedByDRAP( int poc, bool precedingDRAPinDecodingOrder ); + bool isPOCInRefPicList( const ReferencePictureList *rpl, int poc ); + void checkConformanceForDRAP( uint32_t temporalId ); + void setLambdas( const double lambdas[MAX_NUM_COMPONENT] ) { for (int component = 0; component < MAX_NUM_COMPONENT; component++) m_lambdas[component] = lambdas[component]; } const double* getLambdas() const { return m_lambdas; } - void setSplitConsOverrideFlag(bool b) { m_splitConsOverrideFlag = b; } - bool getSplitConsOverrideFlag() const { return m_splitConsOverrideFlag; } - void setMinQTSize(int i) { m_uiMinQTSize = i; } - uint32_t getMinQTSize() const { return m_uiMinQTSize; } - void setMaxBTDepth(int i) { m_uiMaxBTDepth = i; } - uint32_t getMaxBTDepth() const { return m_uiMaxBTDepth; } - void setMaxTTSize(int i) { m_uiMaxTTSize = i; } - uint32_t getMaxTTSize() const { return m_uiMaxTTSize; } - - void setMinQTSizeIChroma(int i) { m_uiMinQTSizeIChroma = i; } - uint32_t getMinQTSizeIChroma() const { return m_uiMinQTSizeIChroma; } - void setMaxBTDepthIChroma(int i) { m_uiMaxBTDepthIChroma = i; } - uint32_t getMaxBTDepthIChroma() const { return m_uiMaxBTDepthIChroma; } - void setMaxBTSizeIChroma(int i) { m_uiMaxBTSizeIChroma = i; } - uint32_t getMaxBTSizeIChroma() const { return m_uiMaxBTSizeIChroma; } - void setMaxTTSizeIChroma(int i) { m_uiMaxTTSizeIChroma = i; } - uint32_t getMaxTTSizeIChroma() const { return m_uiMaxTTSizeIChroma; } - void setMaxBTSize(int i) { m_uiMaxBTSize = i; } - uint32_t getMaxBTSize() const { return m_uiMaxBTSize; } - - void setDepQuantEnabledFlag( bool b ) { m_depQuantEnabledFlag = b; } - bool getDepQuantEnabledFlag() const { return m_depQuantEnabledFlag; } -#if HEVC_USE_SIGN_HIDING - void setSignDataHidingEnabledFlag( bool b ) { m_signDataHidingEnabledFlag = b; } - bool getSignDataHidingEnabledFlag() const { return m_signDataHidingEnabledFlag; } -#endif - + void setSliceSubPicId(int i) { m_sliceSubPicId = i; } + uint32_t getSliceSubPicId() const { return m_sliceSubPicId; } + uint32_t getCuQpDeltaSubdiv() const { return this->isIntra() ? m_pcPicHeader->getCuQpDeltaSubdivIntra() : m_pcPicHeader->getCuQpDeltaSubdivInter(); } + uint32_t getCuChromaQpOffsetSubdiv() const { return this->isIntra() ? m_pcPicHeader->getCuChromaQpOffsetSubdivIntra() : m_pcPicHeader->getCuChromaQpOffsetSubdivInter(); } void initEqualRef(); bool isEqualRef( RefPicList e, int iRefIdx1, int iRefIdx2 ) { @@ -2016,61 +2309,34 @@ public: void setTLayer( uint32_t uiTLayer ) { m_uiTLayer = uiTLayer; } void checkLeadingPictureRestrictions( PicList& rcListPic ) const; - void applyReferencePictureSet( PicList& rcListPic, const ReferencePictureSet *RPSList) const; + int checkThatAllRefPicsAreAvailable(PicList& rcListPic, const ReferencePictureList* pRPL, int rplIdx, bool printErrors, int* refPicIndex) const; + void applyReferencePictureListBasedMarking( PicList& rcListPic, const ReferencePictureList *pRPL0, const ReferencePictureList *pRPL1, const int layerId ) const; bool isTemporalLayerSwitchingPoint( PicList& rcListPic ) const; bool isStepwiseTemporalLayerSwitchingPointCandidate( PicList& rcListPic ) const; - int checkThatAllRefPicsAreAvailable( PicList& rcListPic, const ReferencePictureSet *pReferencePictureSet, bool printErrors, int pocRandomAccess = 0, bool bUseRecoveryPoint = false) const; - void createExplicitReferencePictureSetFromReference(PicList& rcListPic, const ReferencePictureSet *pReferencePictureSet, bool isRAP, int pocRandomAccess, bool bUseRecoveryPoint, const bool bEfficientFieldIRAPEnabled - , bool isEncodeLtRef, bool isCompositeRefEnable - ); - void setMaxNumMergeCand(uint32_t val ) { m_maxNumMergeCand = val; } - uint32_t getMaxNumMergeCand() const { return m_maxNumMergeCand; } - void setMaxNumAffineMergeCand( uint32_t val ) { m_maxNumAffineMergeCand = val; } - uint32_t getMaxNumAffineMergeCand() const { return m_maxNumAffineMergeCand; } - void setDisFracMMVD( bool val ) { m_disFracMMVD = val; } - bool getDisFracMMVD() const { return m_disFracMMVD; } - void setNoOutputPriorPicsFlag( bool val ) { m_noOutputPriorPicsFlag = val; } - bool getNoOutputPriorPicsFlag() const { return m_noOutputPriorPicsFlag; } - - void setNoRaslOutputFlag( bool val ) { m_noRaslOutputFlag = val; } - bool getNoRaslOutputFlag() const { return m_noRaslOutputFlag; } + int checkThatAllRefPicsAreAvailable(PicList& rcListPic, const ReferencePictureList *pRPL, int rplIdx, bool printErrors) const; + void setNoIncorrectPicOutputFlag(bool val) { m_noIncorrectPicOutputFlag = val; } + bool getNoIncorrectPicOutputFlag() const { return m_noIncorrectPicOutputFlag; } void setHandleCraAsCvsStartFlag( bool val ) { m_handleCraAsCvsStartFlag = val; } bool getHandleCraAsCvsStartFlag() const { return m_handleCraAsCvsStartFlag; } - void setSliceMode( SliceConstraint mode ) { m_sliceMode = mode; } - SliceConstraint getSliceMode() const { return m_sliceMode; } - void setSliceArgument( uint32_t uiArgument ) { m_sliceArgument = uiArgument; } - uint32_t getSliceArgument() const { return m_sliceArgument; } - void setSliceCurStartCtuTsAddr( uint32_t ctuTsAddr ) { m_sliceCurStartCtuTsAddr = ctuTsAddr; } // CTU Tile-scan address (as opposed to raster-scan) - uint32_t getSliceCurStartCtuTsAddr() const { return m_sliceCurStartCtuTsAddr; } // CTU Tile-scan address (as opposed to raster-scan) - void setSliceCurEndCtuTsAddr( uint32_t ctuTsAddr ) { m_sliceCurEndCtuTsAddr = ctuTsAddr; } // CTU Tile-scan address (as opposed to raster-scan) - uint32_t getSliceCurEndCtuTsAddr() const { return m_sliceCurEndCtuTsAddr; } // CTU Tile-scan address (as opposed to raster-scan) + void setNumTilesInSlice( uint32_t u ) { m_sliceMap.setNumTilesInSlice( u ); } + uint32_t getNumTilesInSlice() const { return m_sliceMap.getNumTilesInSlice(); } + void setSliceMap( SliceMap map ) { m_sliceMap = map; } + uint32_t getFirstCtuRsAddrInSlice() const { return m_sliceMap.getCtuAddrInSlice(0); } + void setSliceID( uint32_t u ) { m_sliceMap.setSliceID( u ); } + uint32_t getSliceID() const { return m_sliceMap.getSliceID(); } + uint32_t getNumCtuInSlice() const { return m_sliceMap.getNumCtuInSlice(); } + uint32_t getCtuAddrInSlice( int idx ) const { return m_sliceMap.getCtuAddrInSlice( idx ); } + void initSliceMap() { m_sliceMap.initSliceMap(); } + void addCtusToSlice( uint32_t startX, uint32_t stopX, + uint32_t startY, uint32_t stopY, + uint32_t picWidthInCtbsY ) { m_sliceMap.addCtusToSlice(startX, stopX, startY, stopY, picWidthInCtbsY); } void setIndependentSliceIdx( uint32_t i) { m_independentSliceIdx = i; } uint32_t getIndependentSliceIdx() const { return m_independentSliceIdx; } -#if HEVC_DEPENDENT_SLICES - void setSliceSegmentIdx( uint32_t i) { m_sliceSegmentIdx = i; } - uint32_t getSliceSegmentIdx() const { return m_sliceSegmentIdx; } -#endif void copySliceInfo(Slice *pcSliceSrc, bool cpyAlmostAll = true); -#if HEVC_DEPENDENT_SLICES - void setSliceSegmentMode( SliceConstraint mode ) { m_sliceSegmentMode = mode; } - SliceConstraint getSliceSegmentMode() const { return m_sliceSegmentMode; } - void setSliceSegmentArgument( uint32_t uiArgument ) { m_sliceSegmentArgument = uiArgument; } - uint32_t getSliceSegmentArgument() const { return m_sliceSegmentArgument; } -#if HEVC_TILES_WPP - void setSliceSegmentCurStartCtuTsAddr( uint32_t ctuTsAddr ) { m_sliceSegmentCurStartCtuTsAddr = ctuTsAddr; } // CTU Tile-scan address (as opposed to raster-scan) - uint32_t getSliceSegmentCurStartCtuTsAddr() const { return m_sliceSegmentCurStartCtuTsAddr; } // CTU Tile-scan address (as opposed to raster-scan) - void setSliceSegmentCurEndCtuTsAddr( uint32_t ctuTsAddr ) { m_sliceSegmentCurEndCtuTsAddr = ctuTsAddr; } // CTU Tile-scan address (as opposed to raster-scan) - uint32_t getSliceSegmentCurEndCtuTsAddr() const { return m_sliceSegmentCurEndCtuTsAddr; } // CTU Tile-scan address (as opposed to raster-scan) -#endif -#endif void setSliceBits( uint32_t uiVal ) { m_sliceBits = uiVal; } uint32_t getSliceBits() const { return m_sliceBits; } -#if HEVC_DEPENDENT_SLICES - void setSliceSegmentBits( uint32_t uiVal ) { m_sliceSegmentBits = uiVal; } - uint32_t getSliceSegmentBits() const { return m_sliceSegmentBits; } -#endif void setFinalized( bool uiVal ) { m_bFinalized = uiVal; } bool getFinalized() const { return m_bFinalized; } bool testWeightPred( ) const { return m_bTestWeightPred; } @@ -2099,15 +2365,6 @@ public: void setCabacInitFlag( bool val ) { m_cabacInitFlag = val; } //!< set CABAC initial flag bool getCabacInitFlag() const { return m_cabacInitFlag; } //!< get CABAC initial flag -#if !JVET_M0101_HLS - bool getTemporalLayerNonReferenceFlag() const { return m_temporalLayerNonReferenceFlag; } - void setTemporalLayerNonReferenceFlag(bool x) { m_temporalLayerNonReferenceFlag = x; } -#endif - void setLFCrossSliceBoundaryFlag( bool val ) { m_LFCrossSliceBoundaryFlag = val; } - bool getLFCrossSliceBoundaryFlag() const { return m_LFCrossSliceBoundaryFlag; } - - void setEnableTMVPFlag( bool b ) { m_enableTMVPFlag = b; } - bool getEnableTMVPFlag() const { return m_enableTMVPFlag; } void setEncCABACTableIdx( SliceType idx ) { m_encCABACTableIdx = idx; } SliceType getEncCABACTableIdx() const { return m_encCABACTableIdx; } @@ -2126,20 +2383,39 @@ public: void resetProcessingTime() { m_dProcessingTime = m_iProcessingStartTime = 0; } double getProcessingTime() const { return m_dProcessingTime; } - bool getTileGroupAlfEnabledFlag() const { return m_tileGroupAlfEnabledFlag; } - void setTileGroupAlfEnabledFlag(bool b) { m_tileGroupAlfEnabledFlag = b; } + void resetTileGroupAlfEnabledFlag() { memset(m_tileGroupAlfEnabledFlag, 0, sizeof(m_tileGroupAlfEnabledFlag)); } + bool getTileGroupAlfEnabledFlag(ComponentID compId) const { return m_tileGroupAlfEnabledFlag[compId]; } + void setTileGroupAlfEnabledFlag(ComponentID compId, bool b) { m_tileGroupAlfEnabledFlag[compId] = b; } + int getTileGroupNumAps() const { return m_tileGroupNumAps; } + void setTileGroupNumAps(int i) { m_tileGroupNumAps = i; } + int getTileGroupApsIdChroma() const { return m_tileGroupChromaApsId; } + void setTileGroupApsIdChroma(int i) { m_tileGroupChromaApsId = i; } + std::vector<int32_t> getTileGroupApsIdLuma() const { return m_tileGroupLumaApsId; } + void setAlfAPSs(std::vector<int> ApsIDs) + { + m_tileGroupLumaApsId.resize(m_tileGroupNumAps); + for (int i = 0; i < m_tileGroupNumAps; i++) + { + m_tileGroupLumaApsId[i] = ApsIDs[i]; + } + } + void setDisableSATDForRD(bool b) { m_disableSATDForRd = b; } + bool getDisableSATDForRD() { return m_disableSATDForRd; } + void scaleRefPicList( Picture *scaledRefPic[ ], PicHeader *picHeader, APS** apss, APS* lmcsAps, APS* scalingListAps, const bool isDecoder ); + void freeScaledRefPicList( Picture *scaledRefPic[] ); + bool checkRPR(); + const std::pair<int, int>& getScalingRatio( const RefPicList refPicList, const int refIdx ) const { CHECK( refIdx < 0, "Invalid reference index" ); return m_scalingRatio[refPicList][refIdx]; } + void setNumEntryPoints( const PPS *pps ); + uint32_t getNumEntryPoints( ) const { return m_numEntryPoints; } - const SliceReshapeInfo& getReshapeInfo() const { return m_sliceReshapeInfo; } - SliceReshapeInfo& getReshapeInfo() { return m_sliceReshapeInfo; } protected: - Picture* xGetRefPic (PicList& rcListPic, int poc); - Picture* xGetLongTermRefPic(PicList& rcListPic, int poc, bool pocHasMsb); + Picture* xGetRefPic( PicList& rcListPic, int poc, const int layerId ); + Picture* xGetLongTermRefPic( PicList& rcListPic, int poc, bool pocHasMsb, const int layerId ); +public: + std::unordered_map< Position, std::unordered_map< Size, double> > m_mapPltCost; +private: };// END CLASS DEFINITION Slice - - - - void calculateParameterSetChangedFlag(bool &bChanged, const std::vector<uint8_t> *pOldData, const std::vector<uint8_t> *pNewData); template <class T> class ParameterSetMap @@ -2155,9 +2431,10 @@ public: ParameterSetMap(int maxId) :m_maxId (maxId) - ,m_activePsId(-1) ,m_lastActiveParameterSet(NULL) - {} + { + m_activePsId.clear(); + } ~ParameterSetMap() { @@ -2182,6 +2459,11 @@ public: return m_paramsetMap[psId].parameterSet; } + void clearMap() + { + m_paramsetMap.clear(); + } + void storePS(int psId, T *ps, const std::vector<uint8_t> *pNaluData) { CHECK( psId >= m_maxId, "Invalid PS id" ); @@ -2199,7 +2481,7 @@ public: return; } - if( m_activePsId == psId ) + if (find(m_activePsId.begin(), m_activePsId.end(), psId) != m_activePsId.end()) { std::swap( m_paramsetMap[psId].parameterSet, m_lastActiveParameterSet ); } @@ -2224,6 +2506,39 @@ public: } } + void checkAuApsContent( APS *aps, std::vector<int>& accessUnitApsNals ) + { + int apsId = ( aps->getAPSId() << NUM_APS_TYPE_LEN ) + (int)aps->getAPSType(); + + if( std::find( accessUnitApsNals.begin(), accessUnitApsNals.end(), apsId ) != accessUnitApsNals.end() ) + { + CHECK( m_paramsetMap.find( apsId ) == m_paramsetMap.end(), "APS does not exist" ); + APS* existedAPS = m_paramsetMap[apsId].parameterSet; + + if( aps->getAPSType() == LMCS_APS ) + { + CHECK( aps->getReshaperAPSInfo() != existedAPS->getReshaperAPSInfo(), "All APS NAL units with a particular value of adaptation_parameter_set_id and a particular value of aps_params_type within an access unit shall have the same content" ); + } + else if( aps->getAPSType() == ALF_APS ) + { + CHECK( aps->getAlfAPSParam() != existedAPS->getAlfAPSParam(), "All APS NAL units with a particular value of adaptation_parameter_set_id and a particular value of aps_params_type within an access unit shall have the same content" ); + } + else if( aps->getAPSType() == SCALING_LIST_APS ) + { + CHECK( aps->getScalingList() != existedAPS->getScalingList(), "All APS NAL units with a particular value of adaptation_parameter_set_id and a particular value of aps_params_type within an access unit shall have the same content" ); + } + else + { + CHECK( true, "Wrong APS type" ); + } + } + else + { + accessUnitApsNals.push_back( apsId ); + } + } + + void setChangedFlag(int psId, bool bChanged=true) { if ( m_paramsetMap.find(psId) != m_paramsetMap.end() ) @@ -2267,32 +2582,32 @@ public: return (m_paramsetMap.begin() == m_paramsetMap.end() ) ? NULL : m_paramsetMap.begin()->second.parameterSet; } - void setActive(int psId ) { m_activePsId = psId;} + void setActive(int psId) { m_activePsId.push_back(psId); } + void clear() { m_activePsId.clear(); } private: std::map<int,MapData<T> > m_paramsetMap; int m_maxId; - int m_activePsId; + std::vector<int> m_activePsId; T* m_lastActiveParameterSet; static void setID(T* parameterSet, const int psId); }; - class ParameterSetManager { public: ParameterSetManager(); virtual ~ParameterSetManager(); -#if HEVC_VPS - //! store sequence parameter set and take ownership of it - void storeVPS(VPS *vps, const std::vector<uint8_t> &naluData) { m_vpsMap.storePS( vps->getVPSId(), vps, &naluData); }; + void storeVPS(VPS *vps, const std::vector<uint8_t> &naluData) { m_vpsMap.storePS(vps->getVPSId(), vps, &naluData); } + VPS* getVPS( int vpsId ) { return m_vpsMap.getPS( vpsId ); }; + + void storeDPS(DPS *dps, const std::vector<uint8_t> &naluData) { m_dpsMap.storePS( dps->getDecodingParameterSetId(), dps, &naluData); }; //! get pointer to existing video parameter set - VPS* getVPS(int vpsId) { return m_vpsMap.getPS(vpsId); }; - bool getVPSChangedFlag(int vpsId) const { return m_vpsMap.getChangedFlag(vpsId); } - void clearVPSChangedFlag(int vpsId) { m_vpsMap.clearChangedFlag(vpsId); } - VPS* getFirstVPS() { return m_vpsMap.getFirstPS(); }; -#endif + DPS* getDPS(int dpsId) { return m_dpsMap.getPS(dpsId); }; + bool getDPSChangedFlag(int dpsId) const { return m_dpsMap.getChangedFlag(dpsId); } + void clearDPSChangedFlag(int dpsId) { m_dpsMap.clearChangedFlag(dpsId); } + DPS* getFirstDPS() { return m_dpsMap.getFirstPS(); }; //! store sequence parameter set and take ownership of it void storeSPS(SPS *sps, const std::vector<uint8_t> &naluData) { m_spsMap.storePS( sps->getSPSId(), sps, &naluData); }; @@ -2314,37 +2629,34 @@ public: //! \returns true, if activation is successful // bool activateSPSWithSEI(int SPSId); -#if HEVC_VPS - //! activate a PPS and depending on isIDR parameter also SPS and VPS -#else //! activate a PPS and depending on isIDR parameter also SPS -#endif //! \returns true, if activation is successful bool activatePPS(int ppsId, bool isIRAP); - - void storeAPS(APS *aps, const std::vector<uint8_t> &naluData) { m_apsMap.storePS(aps->getAPSId(), aps, &naluData); }; - APS* getAPS(int apsId) { return m_apsMap.getPS(apsId); }; - bool getAPSChangedFlag(int apsId) const { return m_apsMap.getChangedFlag(apsId); } - void clearAPSChangedFlag(int apsId) { m_apsMap.clearChangedFlag(apsId); } - APS* getFirstAPS() { return m_apsMap.getFirstPS(); }; - bool activateAPS(int apsId); -#if HEVC_VPS - const VPS* getActiveVPS()const { return m_vpsMap.getPS(m_activeVPSId); }; -#endif + APS** getAPSs() { return &m_apss[0]; } + ParameterSetMap<APS>* getApsMap() { return &m_apsMap; } + void storeAPS(APS *aps, const std::vector<uint8_t> &naluData) { m_apsMap.storePS(aps->getAPSId() + (MAX_NUM_APS * aps->getAPSType()), aps, &naluData); }; + APS* getAPS(int apsId, int apsType) { return m_apsMap.getPS(apsId + (MAX_NUM_APS * apsType)); }; + bool getAPSChangedFlag(int apsId, int apsType) const { return m_apsMap.getChangedFlag(apsId + (MAX_NUM_APS * apsType)); } + void clearAPSChangedFlag(int apsId, int apsType) { m_apsMap.clearChangedFlag(apsId + ( MAX_NUM_APS * apsType)); } + APS* getFirstAPS() { return m_apsMap.getFirstPS(); }; + bool activateAPS(int apsId, int apsType); const SPS* getActiveSPS()const { return m_spsMap.getPS(m_activeSPSId); }; + const DPS* getActiveDPS()const { return m_dpsMap.getPS(m_activeDPSId); }; + + void checkAuApsContent( APS *aps, std::vector<int>& accessUnitApsNals ) { m_apsMap.checkAuApsContent( aps, accessUnitApsNals ); } protected: -#if HEVC_VPS - ParameterSetMap<VPS> m_vpsMap; -#endif ParameterSetMap<SPS> m_spsMap; ParameterSetMap<PPS> m_ppsMap; ParameterSetMap<APS> m_apsMap; + ParameterSetMap<DPS> m_dpsMap; + ParameterSetMap<VPS> m_vpsMap; -#if HEVC_VPS - int m_activeVPSId; // -1 for nothing active -#endif + APS* m_apss[ALF_CTB_MAX_NUM_APS]; + + int m_activeDPSId; // -1 for nothing active int m_activeSPSId; // -1 for nothing active + int m_activeVPSId; // -1 for nothing active }; class PreCalcValues @@ -2357,28 +2669,28 @@ public: , maxCUHeight ( sps.getMaxCUHeight() ) , maxCUWidthMask ( maxCUWidth - 1 ) , maxCUHeightMask ( maxCUHeight - 1 ) - , maxCUWidthLog2 ( g_aucLog2[ maxCUWidth ] ) - , maxCUHeightLog2 ( g_aucLog2[ maxCUHeight ] ) + , maxCUWidthLog2 ( floorLog2( maxCUWidth ) ) + , maxCUHeightLog2 ( floorLog2( maxCUHeight ) ) , minCUWidth ( sps.getMaxCUWidth() >> sps.getMaxCodingDepth() ) , minCUHeight ( sps.getMaxCUHeight() >> sps.getMaxCodingDepth() ) - , minCUWidthLog2 ( g_aucLog2[ minCUWidth ] ) - , minCUHeightLog2 ( g_aucLog2[ minCUHeight ] ) + , minCUWidthLog2 ( floorLog2( minCUWidth ) ) + , minCUHeightLog2 ( floorLog2( minCUHeight ) ) , partsInCtuWidth ( 1 << sps.getMaxCodingDepth() ) , partsInCtuHeight ( 1 << sps.getMaxCodingDepth() ) , partsInCtu ( 1 << (sps.getMaxCodingDepth() << 1) ) - , widthInCtus ( (sps.getPicWidthInLumaSamples () + sps.getMaxCUWidth () - 1) / sps.getMaxCUWidth () ) - , heightInCtus ( (sps.getPicHeightInLumaSamples() + sps.getMaxCUHeight() - 1) / sps.getMaxCUHeight() ) + , widthInCtus ( (pps.getPicWidthInLumaSamples () + sps.getMaxCUWidth () - 1) / sps.getMaxCUWidth () ) + , heightInCtus ( (pps.getPicHeightInLumaSamples() + sps.getMaxCUHeight() - 1) / sps.getMaxCUHeight() ) , sizeInCtus ( widthInCtus * heightInCtus ) - , lumaWidth ( sps.getPicWidthInLumaSamples() ) - , lumaHeight ( sps.getPicHeightInLumaSamples() ) + , lumaWidth ( pps.getPicWidthInLumaSamples() ) + , lumaHeight ( pps.getPicHeightInLumaSamples() ) , fastDeltaQPCuMaxSize( Clip3(sps.getMaxCUHeight() >> (sps.getLog2DiffMaxMinCodingBlockSize()), sps.getMaxCUHeight(), 32u) ) , noChroma2x2 ( false ) , isEncoder ( _isEncoder ) , ISingleTree ( !sps.getUseDualITree() ) - , maxBtDepth { sps.getMaxBTDepthI(), sps.getMaxBTDepth(), sps.getMaxBTDepthIChroma() } - , minBtSize { MIN_BT_SIZE, MIN_BT_SIZE_INTER, MIN_BT_SIZE_C } + , maxBtDepth { sps.getMaxMTTHierarchyDepthI(), sps.getMaxMTTHierarchyDepth(), sps.getMaxMTTHierarchyDepthIChroma() } + , minBtSize { 1u << sps.getLog2MinCodingBlockSize(), 1u << sps.getLog2MinCodingBlockSize(), 1u << sps.getLog2MinCodingBlockSize() } , maxBtSize { sps.getMaxBTSizeI(), sps.getMaxBTSize(), sps.getMaxBTSizeIChroma() } - , minTtSize { MIN_TT_SIZE, MIN_TT_SIZE_INTER, MIN_TT_SIZE_C } + , minTtSize { 1u << sps.getLog2MinCodingBlockSize(), 1u << sps.getLog2MinCodingBlockSize(), 1u << sps.getLog2MinCodingBlockSize() } , maxTtSize { sps.getMaxTTSizeI(), sps.getMaxTTSize(), sps.getMaxTTSizeIChroma() } , minQtSize { sps.getMinQTSize(I_SLICE, CHANNEL_TYPE_LUMA), sps.getMinQTSize(B_SLICE, CHANNEL_TYPE_LUMA), sps.getMinQTSize(I_SLICE, CHANNEL_TYPE_CHROMA) } {} @@ -2429,12 +2741,12 @@ public: }; #if ENABLE_TRACING -#if HEVC_VPS void xTraceVPSHeader(); -#endif +void xTraceDPSHeader(); void xTraceSPSHeader(); void xTracePPSHeader(); void xTraceAPSHeader(); +void xTracePictureHeader(); void xTraceSliceHeader(); void xTraceAccessUnitDelimiter(); #endif diff --git a/source/Lib/CommonLib/TrQuant.cpp b/source/Lib/CommonLib/TrQuant.cpp index 8c701503e3009991f614e59fe2978e5dec32badd..c4221f13644fe2ea8a3c9d4c2c2e37897cb0bd1e 100644 --- a/source/Lib/CommonLib/TrQuant.cpp +++ b/source/Lib/CommonLib/TrQuant.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -83,6 +83,78 @@ InvTrans *fastInvTrans[NUM_TRANS_TYPE][g_numTransformMatrixSizes] = //! \ingroup CommonLib //! \{ +static inline int64_t square( const int d ) { return d * (int64_t)d; } + +template<int signedMode> std::pair<int64_t,int64_t> fwdTransformCbCr( const PelBuf &resCb, const PelBuf &resCr, PelBuf& resC1, PelBuf& resC2 ) +{ + const Pel* cb = resCb.buf; + const Pel* cr = resCr.buf; + Pel* c1 = resC1.buf; + Pel* c2 = resC2.buf; + int64_t d1 = 0; + int64_t d2 = 0; + for( SizeType y = 0; y < resCb.height; y++, cb += resCb.stride, cr += resCr.stride, c1 += resC1.stride, c2 += resC2.stride ) + { + for( SizeType x = 0; x < resCb.width; x++ ) + { + int cbx = cb[x], crx = cr[x]; + if ( signedMode == 1 ) + { + c1[x] = Pel( ( 4*cbx + 2*crx ) / 5 ); + d1 += square( cbx - c1[x] ) + square( crx - (c1[x]>>1) ); + } + else if ( signedMode == -1 ) + { + c1[x] = Pel( ( 4*cbx - 2*crx ) / 5 ); + d1 += square( cbx - c1[x] ) + square( crx - (-c1[x]>>1) ); + } + else if ( signedMode == 2 ) + { + c1[x] = Pel( ( cbx + crx ) / 2 ); + d1 += square( cbx - c1[x] ) + square( crx - c1[x] ); + } + else if ( signedMode == -2 ) + { + c1[x] = Pel( ( cbx - crx ) / 2 ); + d1 += square( cbx - c1[x] ) + square( crx + c1[x] ); + } + else if ( signedMode == 3 ) + { + c2[x] = Pel( ( 4*crx + 2*cbx ) / 5 ); + d1 += square( cbx - (c2[x]>>1) ) + square( crx - c2[x] ); + } + else if ( signedMode == -3 ) + { + c2[x] = Pel( ( 4*crx - 2*cbx ) / 5 ); + d1 += square( cbx - (-c2[x]>>1) ) + square( crx - c2[x] ); + } + else + { + d1 += square( cbx ); + d2 += square( crx ); + } + } + } + return std::make_pair(d1,d2); +} + +template<int signedMode> void invTransformCbCr( PelBuf &resCb, PelBuf &resCr ) +{ + Pel* cb = resCb.buf; + Pel* cr = resCr.buf; + for( SizeType y = 0; y < resCb.height; y++, cb += resCb.stride, cr += resCr.stride ) + { + for( SizeType x = 0; x < resCb.width; x++ ) + { + if ( signedMode == 1 ) { cr[x] = cb[x] >> 1; } + else if ( signedMode == -1 ) { cr[x] = -cb[x] >> 1; } + else if ( signedMode == 2 ) { cr[x] = cb[x]; } + else if ( signedMode == -2 ) { cr[x] = (cb[x] == -32768 && sizeof(Pel) == 2) ? 32767 : -cb[x]; } // non-normative clipping to prevent 16-bit overflow + else if ( signedMode == 3 ) { cb[x] = cr[x] >> 1; } + else if ( signedMode == -3 ) { cb[x] = -cr[x] >> 1; } + } + } +} // ==================================================================================================================== // TrQuant class member functions @@ -90,11 +162,23 @@ InvTrans *fastInvTrans[NUM_TRANS_TYPE][g_numTransformMatrixSizes] = TrQuant::TrQuant() : m_quant( nullptr ) { // allocate temporary buffers - m_plTempCoeff = (TCoeff*) xMalloc( TCoeff, MAX_CU_SIZE * MAX_CU_SIZE ); - m_mtsCoeffs = new TCoeff*[ NUM_TRAFO_MODES_MTS ]; - for( int i = 0; i < NUM_TRAFO_MODES_MTS; i++ ) { - m_mtsCoeffs[i] = (TCoeff*) xMalloc( TCoeff, MAX_CU_SIZE * MAX_CU_SIZE ); + m_invICT = m_invICTMem + maxAbsIctMode; + m_invICT[ 0] = invTransformCbCr< 0>; + m_invICT[ 1] = invTransformCbCr< 1>; + m_invICT[-1] = invTransformCbCr<-1>; + m_invICT[ 2] = invTransformCbCr< 2>; + m_invICT[-2] = invTransformCbCr<-2>; + m_invICT[ 3] = invTransformCbCr< 3>; + m_invICT[-3] = invTransformCbCr<-3>; + m_fwdICT = m_fwdICTMem + maxAbsIctMode; + m_fwdICT[ 0] = fwdTransformCbCr< 0>; + m_fwdICT[ 1] = fwdTransformCbCr< 1>; + m_fwdICT[-1] = fwdTransformCbCr<-1>; + m_fwdICT[ 2] = fwdTransformCbCr< 2>; + m_fwdICT[-2] = fwdTransformCbCr<-2>; + m_fwdICT[ 3] = fwdTransformCbCr< 3>; + m_fwdICT[-3] = fwdTransformCbCr<-3>; } } @@ -105,23 +189,6 @@ TrQuant::~TrQuant() delete m_quant; m_quant = nullptr; } - - // delete temporary buffers - if ( m_plTempCoeff ) - { - xFree( m_plTempCoeff ); - m_plTempCoeff = nullptr; - } - if( m_mtsCoeffs ) - { - for( int i = 0; i < NUM_TRAFO_MODES_MTS; i++ ) - { - xFree( m_mtsCoeffs[i] ); - m_mtsCoeffs[i] = nullptr; - } - delete[] m_mtsCoeffs; - m_mtsCoeffs = nullptr; - } } #if ENABLE_SPLIT_PARALLELISM @@ -146,23 +213,15 @@ void TrQuant::init( const Quant* otherQuant, #if T0196_SELECTIVE_RDOQ const bool useSelectiveRDOQ, #endif - const bool bEnc, - const bool useTransformSkipFast + const bool bEnc ) { - m_uiMaxTrSize = uiMaxTrSize; - m_bEnc = bEnc; - m_useTransformSkipFast = useTransformSkipFast; - delete m_quant; m_quant = nullptr; - if( bUseRDOQ || !bEnc ) { m_quant = new DepQuant( otherQuant, bEnc ); } - else - m_quant = new Quant( otherQuant ); if( m_quant ) { @@ -170,41 +229,288 @@ void TrQuant::init( const Quant* otherQuant, } } +void TrQuant::fwdLfnstNxN( int* src, int* dst, const uint32_t mode, const uint32_t index, const uint32_t size, int zeroOutSize ) +{ + const int8_t* trMat = ( size > 4 ) ? g_lfnst8x8[ mode ][ index ][ 0 ] : g_lfnst4x4[ mode ][ index ][ 0 ]; + const int trSize = ( size > 4 ) ? 48 : 16; + int coef; + int* out = dst; + assert( index < 3 ); -void TrQuant::invTransformNxN( TransformUnit &tu, const ComponentID &compID, PelBuf &pResi, const QpParam &cQP ) + for( int j = 0; j < zeroOutSize; j++ ) + { + int* srcPtr = src; + const int8_t* trMatTmp = trMat; + coef = 0; + for( int i = 0; i < trSize; i++ ) + { + coef += *srcPtr++ * *trMatTmp++; + } + *out++ = ( coef + 64 ) >> 7; + trMat += trSize; + } + + ::memset( out, 0, ( trSize - zeroOutSize ) * sizeof( int ) ); +} + +void TrQuant::invLfnstNxN( int* src, int* dst, const uint32_t mode, const uint32_t index, const uint32_t size, int zeroOutSize ) { - const CompArea &area = tu.blocks[compID]; - const uint32_t uiWidth = area.width; - const uint32_t uiHeight = area.height; + int maxLog2TrDynamicRange = 15; + const TCoeff outputMinimum = -( 1 << maxLog2TrDynamicRange ); + const TCoeff outputMaximum = ( 1 << maxLog2TrDynamicRange ) - 1; + const int8_t* trMat = ( size > 4 ) ? g_lfnst8x8[ mode ][ index ][ 0 ] : g_lfnst4x4[ mode ][ index ][ 0 ]; + const int trSize = ( size > 4 ) ? 48 : 16; + int resi; + int* out = dst; -#if MAX_TB_SIZE_SIGNALLING - CHECK( uiWidth > tu.cs->sps->getMaxTbSize() || uiHeight > tu.cs->sps->getMaxTbSize(), "Maximal allowed transformation size exceeded!" ); -#else - CHECK( uiWidth > MAX_TB_SIZEY || uiHeight > MAX_TB_SIZEY, "Maximal allowed transformation size exceeded!" ); -#endif - if (tu.cu->transQuantBypass) - { - // where should this logic go? - const bool rotateResidual = TU::isNonTransformedResidualRotated(tu, compID); - const CCoeffBuf pCoeff = tu.getCoeffs(compID); + assert( index < 3 ); - for (uint32_t y = 0, coefficientIndex = 0; y < uiHeight; y++) + for( int j = 0; j < trSize; j++ ) + { + resi = 0; + const int8_t* trMatTmp = trMat; + int* srcPtr = src; + for( int i = 0; i < zeroOutSize; i++ ) { - for (uint32_t x = 0; x < uiWidth; x++, coefficientIndex++) - { - pResi.at(x, y) = rotateResidual ? pCoeff.at(pCoeff.width - x - 1, pCoeff.height - y - 1) : pCoeff.at(x, y); - } + resi += *srcPtr++ * *trMatTmp; + trMatTmp += trSize; } + *out++ = Clip3( outputMinimum, outputMaximum, ( int ) ( resi + 64 ) >> 7 ); + trMat++; + } +} + +uint32_t TrQuant::getLFNSTIntraMode( int wideAngPredMode ) +{ + uint32_t intraMode; + + if( wideAngPredMode < 0 ) + { + intraMode = ( uint32_t ) ( wideAngPredMode + ( NUM_EXT_LUMA_MODE >> 1 ) + NUM_LUMA_MODE ); + } + else if( wideAngPredMode >= NUM_LUMA_MODE ) + { + intraMode = ( uint32_t ) ( wideAngPredMode + ( NUM_EXT_LUMA_MODE >> 1 ) ); } else { - CoeffBuf tempCoeff = CoeffBuf( m_plTempCoeff, area ); + intraMode = ( uint32_t ) wideAngPredMode; + } + + return intraMode; +} + +bool TrQuant::getTransposeFlag( uint32_t intraMode ) +{ + return ( ( intraMode >= NUM_LUMA_MODE ) && ( intraMode >= ( NUM_LUMA_MODE + ( NUM_EXT_LUMA_MODE >> 1 ) ) ) ) || + ( ( intraMode < NUM_LUMA_MODE ) && ( intraMode > DIA_IDX ) ); +} + +void TrQuant::xInvLfnst( const TransformUnit &tu, const ComponentID compID ) +{ + const CompArea& area = tu.blocks[ compID ]; + const uint32_t width = area.width; + const uint32_t height = area.height; + const uint32_t lfnstIdx = tu.cu->lfnstIdx; + + if (lfnstIdx && tu.mtsIdx[compID] != MTS_SKIP && width >= 4 && height >= 4) + { + const bool whge3 = width >= 8 && height >= 8; + const ScanElement * scan = whge3 ? g_coefTopLeftDiagScan8x8[ gp_sizeIdxInfo->idxFrom( width ) ] : g_scanOrder[ SCAN_GROUPED_4x4 ][ SCAN_DIAG ][ gp_sizeIdxInfo->idxFrom( width ) ][ gp_sizeIdxInfo->idxFrom( height ) ]; + uint32_t intraMode = PU::getFinalIntraMode( *tu.cs->getPU( area.pos(), toChannelType( compID ) ), toChannelType( compID ) ); + + if( PU::isLMCMode( tu.cs->getPU( area.pos(), toChannelType( compID ) )->intraDir[ toChannelType( compID ) ] ) ) + { + intraMode = PU::getCoLocatedIntraLumaMode( *tu.cs->getPU( area.pos(), toChannelType( compID ) ) ); + } + if (PU::isMIP(*tu.cs->getPU(area.pos(), toChannelType(compID)), toChannelType(compID))) + { + intraMode = PLANAR_IDX; + } + CHECK( intraMode >= NUM_INTRA_MODE - 1, "Invalid intra mode" ); + + if( lfnstIdx < 3 ) + { + intraMode = getLFNSTIntraMode( PU::getWideAngIntraMode( tu, intraMode, compID ) ); +#if RExt__DECODER_DEBUG_TOOL_STATISTICS + CodingStatistics::IncrementStatisticTool( CodingStatisticsClassType { STATS__TOOL_LFNST, width, height, compID } ); +#endif + bool transposeFlag = getTransposeFlag( intraMode ); + const int sbSize = whge3 ? 8 : 4; + bool tu4x4Flag = ( width == 4 && height == 4 ); + bool tu8x8Flag = ( width == 8 && height == 8 ); + TCoeff* lfnstTemp; + TCoeff* coeffTemp; + int y; + lfnstTemp = m_tempInMatrix; // inverse spectral rearrangement + coeffTemp = m_tempCoeff; + TCoeff * dst = lfnstTemp; + const ScanElement * scanPtr = scan; + for( y = 0; y < 16; y++ ) + { + *dst++ = coeffTemp[ scanPtr->idx ]; + scanPtr++; + } + + invLfnstNxN( m_tempInMatrix, m_tempOutMatrix, g_lfnstLut[ intraMode ], lfnstIdx - 1, sbSize, ( tu4x4Flag || tu8x8Flag ) ? 8 : 16 ); + + lfnstTemp = m_tempOutMatrix; // inverse spectral rearrangement + + if( transposeFlag ) + { + if( sbSize == 4 ) + { + for( y = 0; y < 4; y++ ) + { + coeffTemp[ 0 ] = lfnstTemp[ 0 ]; coeffTemp[ 1 ] = lfnstTemp[ 4 ]; + coeffTemp[ 2 ] = lfnstTemp[ 8 ]; coeffTemp[ 3 ] = lfnstTemp[ 12 ]; + lfnstTemp++; + coeffTemp += width; + } + } + else // ( sbSize == 8 ) + { + for( y = 0; y < 8; y++ ) + { + coeffTemp[ 0 ] = lfnstTemp[ 0 ]; coeffTemp[ 1 ] = lfnstTemp[ 8 ]; + coeffTemp[ 2 ] = lfnstTemp[ 16 ]; coeffTemp[ 3 ] = lfnstTemp[ 24 ]; + if( y < 4 ) + { + coeffTemp[ 4 ] = lfnstTemp[ 32 ]; coeffTemp[ 5 ] = lfnstTemp[ 36 ]; + coeffTemp[ 6 ] = lfnstTemp[ 40 ]; coeffTemp[ 7 ] = lfnstTemp[ 44 ]; + } + lfnstTemp++; + coeffTemp += width; + } + } + } + else + { + for( y = 0; y < sbSize; y++ ) + { + uint32_t uiStride = ( y < 4 ) ? sbSize : 4; + ::memcpy( coeffTemp, lfnstTemp, uiStride * sizeof( TCoeff ) ); + lfnstTemp += uiStride; + coeffTemp += width; + } + } + } + } +} + +void TrQuant::xFwdLfnst( const TransformUnit &tu, const ComponentID compID, const bool loadTr ) +{ + const CompArea& area = tu.blocks[ compID ]; + const uint32_t width = area.width; + const uint32_t height = area.height; + const uint32_t lfnstIdx = tu.cu->lfnstIdx; + + if( lfnstIdx && tu.mtsIdx[compID] != MTS_SKIP && width >= 4 && height >= 4) + { + const bool whge3 = width >= 8 && height >= 8; + const ScanElement * scan = whge3 ? g_coefTopLeftDiagScan8x8[ gp_sizeIdxInfo->idxFrom( width ) ] : g_scanOrder[ SCAN_GROUPED_4x4 ][ SCAN_DIAG ][ gp_sizeIdxInfo->idxFrom( width ) ][ gp_sizeIdxInfo->idxFrom( height ) ]; + uint32_t intraMode = PU::getFinalIntraMode( *tu.cs->getPU( area.pos(), toChannelType( compID ) ), toChannelType( compID ) ); + + if( PU::isLMCMode( tu.cs->getPU( area.pos(), toChannelType( compID ) )->intraDir[ toChannelType( compID ) ] ) ) + { + intraMode = PU::getCoLocatedIntraLumaMode( *tu.cs->getPU( area.pos(), toChannelType( compID ) ) ); + } + if (PU::isMIP(*tu.cs->getPU(area.pos(), toChannelType(compID)), toChannelType(compID))) + { + intraMode = PLANAR_IDX; + } + CHECK( intraMode >= NUM_INTRA_MODE - 1, "Invalid intra mode" ); + + if( lfnstIdx < 3 ) + { + intraMode = getLFNSTIntraMode( PU::getWideAngIntraMode( tu, intraMode, compID ) ); + + bool transposeFlag = getTransposeFlag( intraMode ); + const int sbSize = whge3 ? 8 : 4; + bool tu4x4Flag = ( width == 4 && height == 4 ); + bool tu8x8Flag = ( width == 8 && height == 8 ); + TCoeff* lfnstTemp; + TCoeff* coeffTemp; + TCoeff * tempCoeff = loadTr ? m_mtsCoeffs[tu.mtsIdx[compID]] : m_tempCoeff; + + int y; + lfnstTemp = m_tempInMatrix; // forward low frequency non-separable transform + coeffTemp = tempCoeff; + + if( transposeFlag ) + { + if( sbSize == 4 ) + { + for( y = 0; y < 4; y++ ) + { + lfnstTemp[ 0 ] = coeffTemp[ 0 ]; lfnstTemp[ 4 ] = coeffTemp[ 1 ]; + lfnstTemp[ 8 ] = coeffTemp[ 2 ]; lfnstTemp[ 12 ] = coeffTemp[ 3 ]; + lfnstTemp++; + coeffTemp += width; + } + } + else // ( sbSize == 8 ) + { + for( y = 0; y < 8; y++ ) + { + lfnstTemp[ 0 ] = coeffTemp[ 0 ]; lfnstTemp[ 8 ] = coeffTemp[ 1 ]; + lfnstTemp[ 16 ] = coeffTemp[ 2 ]; lfnstTemp[ 24 ] = coeffTemp[ 3 ]; + if( y < 4 ) + { + lfnstTemp[ 32 ] = coeffTemp[ 4 ]; lfnstTemp[ 36 ] = coeffTemp[ 5 ]; + lfnstTemp[ 40 ] = coeffTemp[ 6 ]; lfnstTemp[ 44 ] = coeffTemp[ 7 ]; + } + lfnstTemp++; + coeffTemp += width; + } + } + } + else + { + for( y = 0; y < sbSize; y++ ) + { + uint32_t uiStride = ( y < 4 ) ? sbSize : 4; + ::memcpy( lfnstTemp, coeffTemp, uiStride * sizeof( TCoeff ) ); + lfnstTemp += uiStride; + coeffTemp += width; + } + } + + fwdLfnstNxN( m_tempInMatrix, m_tempOutMatrix, g_lfnstLut[ intraMode ], lfnstIdx - 1, sbSize, ( tu4x4Flag || tu8x8Flag ) ? 8 : 16 ); + + lfnstTemp = m_tempOutMatrix; // forward spectral rearrangement + coeffTemp = tempCoeff; + const ScanElement * scanPtr = scan; + int lfnstCoeffNum = ( sbSize == 4 ) ? sbSize * sbSize : 48; + for( y = 0; y < lfnstCoeffNum; y++ ) + { + coeffTemp[ scanPtr->idx ] = *lfnstTemp++; + scanPtr++; + } + } + } +} + + +void TrQuant::invTransformNxN( TransformUnit &tu, const ComponentID &compID, PelBuf &pResi, const QpParam &cQP ) +{ + const CompArea &area = tu.blocks[compID]; + const uint32_t uiWidth = area.width; + const uint32_t uiHeight = area.height; + + CHECK( uiWidth > tu.cs->sps->getMaxTbSize() || uiHeight > tu.cs->sps->getMaxTbSize(), "Maximal allowed transformation size exceeded!" ); + CoeffBuf tempCoeff = CoeffBuf(m_tempCoeff, area); xDeQuant( tu, tempCoeff, compID, cQP ); DTRACE_COEFF_BUF( D_TCOEFF, tempCoeff, tu, tu.cu->predMode, compID ); - if( isLuma(compID) && tu.mtsIdx == 1 ) + if( tu.cs->sps->getUseLFNST() ) + { + xInvLfnst( tu, compID ); + } + + if( tu.mtsIdx[compID] == MTS_SKIP ) { xITransformSkip( tempCoeff, pResi, tu, compID ); } @@ -212,7 +518,6 @@ void TrQuant::invTransformNxN( TransformUnit &tu, const ComponentID &compID, Pel { xIT( tu, compID, tempCoeff, pResi ); } - } //DTRACE_BLOCK_COEFF(tu.getCoeffs(compID), tu, tu.cu->predMode, compID); DTRACE_PEL_BUF( D_RESIDUALS, pResi, tu, tu.cu->predMode, compID); @@ -223,7 +528,7 @@ void TrQuant::invRdpcmNxN(TransformUnit& tu, const ComponentID &compID, PelBuf & { const CompArea &area = tu.blocks[compID]; - if (CU::isRDPCMEnabled(*tu.cu) && (tu.mtsIdx==1 || tu.cu->transQuantBypass)) + if (CU::isRDPCMEnabled(*tu.cu) && (tu.mtsIdx[compID] == MTS_SKIP)) { const uint32_t uiWidth = area.width; const uint32_t uiHeight = area.height; @@ -277,25 +582,117 @@ void TrQuant::invRdpcmNxN(TransformUnit& tu, const ComponentID &compID, PelBuf & } } + +std::pair<int64_t,int64_t> TrQuant::fwdTransformICT( const TransformUnit &tu, const PelBuf &resCb, const PelBuf &resCr, PelBuf &resC1, PelBuf &resC2, int jointCbCr ) +{ + CHECK( Size(resCb) != Size(resCr), "resCb and resCr have different sizes" ); + CHECK( Size(resCb) != Size(resC1), "resCb and resC1 have different sizes" ); + CHECK( Size(resCb) != Size(resC2), "resCb and resC2 have different sizes" ); + return (*m_fwdICT[ TU::getICTMode(tu, jointCbCr) ])( resCb, resCr, resC1, resC2 ); +} + +void TrQuant::invTransformICT( const TransformUnit &tu, PelBuf &resCb, PelBuf &resCr ) +{ + CHECK( Size(resCb) != Size(resCr), "resCb and resCr have different sizes" ); + (*m_invICT[ TU::getICTMode(tu) ])( resCb, resCr ); +} + +std::vector<int> TrQuant::selectICTCandidates( const TransformUnit &tu, CompStorage* resCb, CompStorage* resCr ) +{ + CHECK( !resCb[0].valid() || !resCr[0].valid(), "standard components are not valid" ); + + if( !CU::isIntra( *tu.cu ) ) + { + int cbfMask = 3; + resCb[cbfMask].create( tu.blocks[COMPONENT_Cb] ); + resCr[cbfMask].create( tu.blocks[COMPONENT_Cr] ); + fwdTransformICT( tu, resCb[0], resCr[0], resCb[cbfMask], resCr[cbfMask], cbfMask ); + std::vector<int> cbfMasksToTest; + cbfMasksToTest.push_back( cbfMask ); + return cbfMasksToTest; + } + + std::pair<int64_t,int64_t> pairDist[4]; + for( int cbfMask = 0; cbfMask < 4; cbfMask++ ) + { + if( cbfMask ) + { + CHECK( resCb[cbfMask].valid() || resCr[cbfMask].valid(), "target components for cbfMask=" << cbfMask << " are already present" ); + resCb[cbfMask].create( tu.blocks[COMPONENT_Cb] ); + resCr[cbfMask].create( tu.blocks[COMPONENT_Cr] ); + } + pairDist[cbfMask] = fwdTransformICT( tu, resCb[0], resCr[0], resCb[cbfMask], resCr[cbfMask], cbfMask ); + } + + std::vector<int> cbfMasksToTest; + int64_t minDist1 = std::min<int64_t>( pairDist[0].first, pairDist[0].second ); + int64_t minDist2 = std::numeric_limits<int64_t>::max(); + int cbfMask1 = 0; + int cbfMask2 = 0; + for( int cbfMask : { 1, 2, 3 } ) + { + if( pairDist[cbfMask].first < minDist1 ) + { + cbfMask2 = cbfMask1; minDist2 = minDist1; + cbfMask1 = cbfMask; minDist1 = pairDist[cbfMask1].first; + } + else if( pairDist[cbfMask].first < minDist2 ) + { + cbfMask2 = cbfMask; minDist2 = pairDist[cbfMask2].first; + } + } + if( cbfMask1 ) + { + cbfMasksToTest.push_back( cbfMask1 ); + } + if( cbfMask2 && ( ( minDist2 < (9*minDist1)/8 ) || ( !cbfMask1 && minDist2 < (3*minDist1)/2 ) ) ) + { + cbfMasksToTest.push_back( cbfMask2 ); + } + + return cbfMasksToTest; +} + + + // ------------------------------------------------------------------------------------------------ // Logical transform // ------------------------------------------------------------------------------------------------ -void TrQuant::getTrTypes ( TransformUnit tu, const ComponentID compID, int &trTypeHor, int &trTypeVer ) +void TrQuant::getTrTypes(const TransformUnit tu, const ComponentID compID, int &trTypeHor, int &trTypeVer) { - bool mtsActivated = CU::isIntra( *tu.cu ) ? tu.cs->sps->getUseIntraMTS() : tu.cs->sps->getUseInterMTS() && CU::isInter( *tu.cu ); - - bool mtsImplicit = CU::isIntra( *tu.cu ) && tu.cs->sps->getUseImplicitMTS() && compID == COMPONENT_Y; + const bool isExplicitMTS = (CU::isIntra(*tu.cu) ? tu.cs->sps->getUseIntraMTS() : tu.cs->sps->getUseInterMTS() && CU::isInter(*tu.cu)) && isLuma(compID); + const bool isImplicitMTS = CU::isIntra(*tu.cu) && tu.cs->sps->getUseImplicitMTS() && isLuma(compID) && tu.cu->lfnstIdx == 0 && tu.cu->mipFlag == 0; + const bool isISP = CU::isIntra(*tu.cu) && tu.cu->ispMode && isLuma(compID); + const bool isSBT = CU::isInter(*tu.cu) && tu.cu->sbtInfo && isLuma(compID); trTypeHor = DCT2; trTypeVer = DCT2; - if (tu.cu->ispMode && isLuma(compID)) + if (isISP && tu.cu->lfnstIdx) { - TU::getTransformTypeISP(tu, compID, trTypeHor, trTypeVer); return; -} - if( tu.cu->sbtInfo && compID == COMPONENT_Y ) + } + + if (!tu.cs->sps->getUseMTS()) + return; + + if (isImplicitMTS || isISP) + { + int width = tu.blocks[compID].width; + int height = tu.blocks[compID].height; + bool widthDstOk = width >= 4 && width <= 16; + bool heightDstOk = height >= 4 && height <= 16; + + if (widthDstOk) + trTypeHor = DST7; + if (heightDstOk) + trTypeVer = DST7; + return; + } + + + if (isSBT) { uint8_t sbtIdx = tu.cu->getSbtIdx(); uint8_t sbtPos = tu.cu->getSbtPos(); @@ -329,43 +726,27 @@ void TrQuant::getTrTypes ( TransformUnit tu, const ComponentID compID, int &trTy return; } - if ( mtsActivated ) + if (isExplicitMTS) { - if( compID == COMPONENT_Y ) + if (tu.mtsIdx[compID] > MTS_SKIP) { - if ( tu.mtsIdx > 1 ) - { - int indHor = ( tu.mtsIdx - 2 ) & 1; - int indVer = ( tu.mtsIdx - 2 ) >> 1; - - trTypeHor = indHor ? DCT8 : DST7; - trTypeVer = indVer ? DCT8 : DST7; - } + int indHor = (tu.mtsIdx[compID] - MTS_DST7_DST7) & 1; + int indVer = (tu.mtsIdx[compID] - MTS_DST7_DST7) >> 1; + trTypeHor = indHor ? DCT8 : DST7; + trTypeVer = indVer ? DCT8 : DST7; } } - else if ( mtsImplicit ) - { - int width = tu.blocks[compID].width; - int height = tu.blocks[compID].height; - bool widthDstOk = width >= 4 && width <= 16; - bool heightDstOk = height >= 4 && height <= 16; - - if ( width < height && widthDstOk ) - trTypeHor = DST7; - else if ( height < width && heightDstOk ) - trTypeVer = DST7; - else if ( width == height && widthDstOk ) - trTypeHor = trTypeVer = DST7; - } } + + void TrQuant::xT( const TransformUnit &tu, const ComponentID &compID, const CPelBuf &resi, CoeffBuf &dstCoeff, const int width, const int height ) { const unsigned maxLog2TrDynamicRange = tu.cs->sps->getMaxLog2TrDynamicRange( toChannelType( compID ) ); const unsigned bitDepth = tu.cs->sps->getBitDepth( toChannelType( compID ) ); const int TRANSFORM_MATRIX_SHIFT = g_transformMatrixShift[TRANSFORM_FORWARD]; - const uint32_t transformWidthIndex = g_aucLog2[width ] - 1; // nLog2WidthMinus1, since transform start from 2-point - const uint32_t transformHeightIndex = g_aucLog2[height] - 1; // nLog2HeightMinus1, since transform start from 2-point + const uint32_t transformWidthIndex = floorLog2(width ) - 1; // nLog2WidthMinus1, since transform start from 2-point + const uint32_t transformHeightIndex = floorLog2(height) - 1; // nLog2HeightMinus1, since transform start from 2-point int trTypeHor = DCT2; @@ -373,8 +754,21 @@ void TrQuant::xT( const TransformUnit &tu, const ComponentID &compID, const CPel getTrTypes ( tu, compID, trTypeHor, trTypeVer ); - const int skipWidth = ( trTypeHor != DCT2 && width == 32 ) ? 16 : width > JVET_C0024_ZERO_OUT_TH ? width - JVET_C0024_ZERO_OUT_TH : 0; - const int skipHeight = ( trTypeVer != DCT2 && height == 32 ) ? 16 : height > JVET_C0024_ZERO_OUT_TH ? height - JVET_C0024_ZERO_OUT_TH : 0; + int skipWidth = ( trTypeHor != DCT2 && width == 32 ) ? 16 : width > JVET_C0024_ZERO_OUT_TH ? width - JVET_C0024_ZERO_OUT_TH : 0; + int skipHeight = ( trTypeVer != DCT2 && height == 32 ) ? 16 : height > JVET_C0024_ZERO_OUT_TH ? height - JVET_C0024_ZERO_OUT_TH : 0; + if( tu.cs->sps->getUseLFNST() && tu.cu->lfnstIdx ) + { + if( (width == 4 && height > 4) || (width > 4 && height == 4) ) + { + skipWidth = width - 4; + skipHeight = height - 4; + } + else if( (width >= 8 && height >= 8) ) + { + skipWidth = width - 8; + skipHeight = height - 8; + } + } #if RExt__DECODER_DEBUG_TOOL_STATISTICS if ( trTypeHor != DCT2 ) @@ -398,8 +792,8 @@ void TrQuant::xT( const TransformUnit &tu, const ComponentID &compID, const CPel if( width > 1 && height > 1 ) // 2-D transform { - const int shift_1st = ((g_aucLog2[width ]) + bitDepth + TRANSFORM_MATRIX_SHIFT) - maxLog2TrDynamicRange + COM16_C806_TRANS_PREC; - const int shift_2nd = (g_aucLog2[height]) + TRANSFORM_MATRIX_SHIFT + COM16_C806_TRANS_PREC; + const int shift_1st = ((floorLog2(width )) + bitDepth + TRANSFORM_MATRIX_SHIFT) - maxLog2TrDynamicRange + COM16_C806_TRANS_PREC; + const int shift_2nd = (floorLog2(height)) + TRANSFORM_MATRIX_SHIFT + COM16_C806_TRANS_PREC; CHECK( shift_1st < 0, "Negative shift" ); CHECK( shift_2nd < 0, "Negative shift" ); TCoeff *tmp = ( TCoeff * ) alloca( width * height * sizeof( TCoeff ) ); @@ -409,14 +803,14 @@ void TrQuant::xT( const TransformUnit &tu, const ComponentID &compID, const CPel } else if( height == 1 ) //1-D horizontal transform { - const int shift = ((g_aucLog2[width ]) + bitDepth + TRANSFORM_MATRIX_SHIFT) - maxLog2TrDynamicRange + COM16_C806_TRANS_PREC; + const int shift = ((floorLog2(width )) + bitDepth + TRANSFORM_MATRIX_SHIFT) - maxLog2TrDynamicRange + COM16_C806_TRANS_PREC; CHECK( shift < 0, "Negative shift" ); CHECKD( ( transformWidthIndex < 0 ), "There is a problem with the width." ); fastFwdTrans[trTypeHor][transformWidthIndex]( block, dstCoeff.buf, shift, 1, 0, skipWidth ); } else //if (iWidth == 1) //1-D vertical transform { - int shift = ( ( g_aucLog2[height] ) + bitDepth + TRANSFORM_MATRIX_SHIFT ) - maxLog2TrDynamicRange + COM16_C806_TRANS_PREC; + int shift = ( ( floorLog2(height) ) + bitDepth + TRANSFORM_MATRIX_SHIFT ) - maxLog2TrDynamicRange + COM16_C806_TRANS_PREC; CHECK( shift < 0, "Negative shift" ); CHECKD( ( transformHeightIndex < 0 ), "There is a problem with the height." ); fastFwdTrans[trTypeVer][transformHeightIndex]( block, dstCoeff.buf, shift, 1, 0, skipHeight ); @@ -432,17 +826,29 @@ void TrQuant::xIT( const TransformUnit &tu, const ComponentID &compID, const CCo const int TRANSFORM_MATRIX_SHIFT = g_transformMatrixShift[TRANSFORM_INVERSE]; const TCoeff clipMinimum = -( 1 << maxLog2TrDynamicRange ); const TCoeff clipMaximum = ( 1 << maxLog2TrDynamicRange ) - 1; - const uint32_t transformWidthIndex = g_aucLog2[width ] - 1; // nLog2WidthMinus1, since transform start from 2-point - const uint32_t transformHeightIndex = g_aucLog2[height] - 1; // nLog2HeightMinus1, since transform start from 2-point + const uint32_t transformWidthIndex = floorLog2(width ) - 1; // nLog2WidthMinus1, since transform start from 2-point + const uint32_t transformHeightIndex = floorLog2(height) - 1; // nLog2HeightMinus1, since transform start from 2-point int trTypeHor = DCT2; int trTypeVer = DCT2; getTrTypes ( tu, compID, trTypeHor, trTypeVer ); - - const int skipWidth = ( trTypeHor != DCT2 && width == 32 ) ? 16 : width > JVET_C0024_ZERO_OUT_TH ? width - JVET_C0024_ZERO_OUT_TH : 0; - const int skipHeight = ( trTypeVer != DCT2 && height == 32 ) ? 16 : height > JVET_C0024_ZERO_OUT_TH ? height - JVET_C0024_ZERO_OUT_TH : 0; + int skipWidth = ( trTypeHor != DCT2 && width == 32 ) ? 16 : width > JVET_C0024_ZERO_OUT_TH ? width - JVET_C0024_ZERO_OUT_TH : 0; + int skipHeight = ( trTypeVer != DCT2 && height == 32 ) ? 16 : height > JVET_C0024_ZERO_OUT_TH ? height - JVET_C0024_ZERO_OUT_TH : 0; + if( tu.cs->sps->getUseLFNST() && tu.cu->lfnstIdx ) + { + if( (width == 4 && height > 4) || (width > 4 && height == 4) ) + { + skipWidth = width - 4; + skipHeight = height - 4; + } + else if( (width >= 8 && height >= 8) ) + { + skipWidth = width - 8; + skipHeight = height - 8; + } + } TCoeff *block = ( TCoeff * ) alloca( width * height * sizeof( TCoeff ) ); @@ -493,42 +899,13 @@ void TrQuant::xITransformSkip(const CCoeffBuf &pCoeff, const CompArea &area = tu.blocks[compID]; const int width = area.width; const int height = area.height; - const int maxLog2TrDynamicRange = tu.cs->sps->getMaxLog2TrDynamicRange(toChannelType(compID)); - const int channelBitDepth = tu.cs->sps->getBitDepth(toChannelType(compID)); - - int iTransformShift = getTransformShift(channelBitDepth, area.size(), maxLog2TrDynamicRange); - if( tu.cs->sps->getSpsRangeExtension().getExtendedPrecisionProcessingFlag() ) - { - iTransformShift = std::max<int>( 0, iTransformShift ); - } - - int iWHScale = 1; - const bool rotateResidual = TU::isNonTransformedResidualRotated( tu, compID ); - - if( iTransformShift >= 0 ) + for (uint32_t y = 0; y < height; y++) { - const TCoeff offset = iTransformShift == 0 ? 0 : ( 1 << ( iTransformShift - 1 ) ); - - for( uint32_t y = 0; y < height; y++ ) - { - for( uint32_t x = 0; x < width; x++ ) - { - pResidual.at( x, y ) = Pel( ( ( rotateResidual ? pCoeff.at( pCoeff.width - x - 1, pCoeff.height - y - 1 ) : pCoeff.at( x, y ) ) * iWHScale + offset ) >> iTransformShift ); - } - } - } - else //for very high bit depths - { - iTransformShift = -iTransformShift; - - for( uint32_t y = 0; y < height; y++ ) - { - for( uint32_t x = 0; x < width; x++ ) + for (uint32_t x = 0; x < width; x++) { - pResidual.at( x, y ) = Pel( ( rotateResidual ? pCoeff.at( pCoeff.width - x - 1, pCoeff.height - y - 1 ) : pCoeff.at( x, y ) ) * iWHScale << iTransformShift ); + pResidual.at(x, y) = Pel(pCoeff.at(x, y)); } - } } } @@ -537,7 +914,7 @@ void TrQuant::xQuant(TransformUnit &tu, const ComponentID &compID, const CCoeffB m_quant->quant( tu, compID, pSrc, uiAbsSum, cQP, ctx ); } -void TrQuant::transformNxN( TransformUnit &tu, const ComponentID &compID, const QpParam &cQP, std::vector<TrMode>* trModes, const int maxCand, double* diagRatio, double* horVerRatio ) +void TrQuant::transformNxN( TransformUnit& tu, const ComponentID& compID, const QpParam& cQP, std::vector<TrMode>* trModes, const int maxCand ) { CodingStructure &cs = *tu.cs; const CompArea &rect = tu.blocks[compID]; @@ -546,11 +923,7 @@ void TrQuant::transformNxN( TransformUnit &tu, const ComponentID &compID, const const CPelBuf resiBuf = cs.getResiBuf(rect); -#if MAX_TB_SIZE_SIGNALLING CHECK( cs.sps->getMaxTbSize() < width, "Unsupported transformation size" ); -#else - CHECK( MAX_TB_SIZEY < width, "Unsupported transformation size" ); -#endif int pos = 0; std::vector<TrCost> trCosts; @@ -558,8 +931,8 @@ void TrQuant::transformNxN( TransformUnit &tu, const ComponentID &compID, const const double facBB[] = { 1.2, 1.3, 1.3, 1.4, 1.5 }; while( it != trModes->end() ) { - tu.mtsIdx = it->first; - CoeffBuf tempCoeff( m_mtsCoeffs[tu.mtsIdx], rect ); + tu.mtsIdx[compID] = it->first; + CoeffBuf tempCoeff( m_mtsCoeffs[tu.mtsIdx[compID]], rect); if( tu.noResidual ) { int sumAbs = 0; @@ -568,7 +941,7 @@ void TrQuant::transformNxN( TransformUnit &tu, const ComponentID &compID, const continue; } - if( isLuma(compID) && tu.mtsIdx == 1 ) + if ( tu.mtsIdx[compID] == MTS_SKIP ) { xTransformSkip( tu, compID, resiBuf, tempCoeff.buf ); } @@ -584,20 +957,23 @@ void TrQuant::transformNxN( TransformUnit &tu, const ComponentID &compID, const } double scaleSAD=1.0; - if (isLuma(compID) && tu.mtsIdx==1 && ((g_aucLog2[width] + g_aucLog2[height]) & 1) == 1 ) + if ( tu.mtsIdx[compID] == MTS_SKIP && ((floorLog2(width) + floorLog2(height)) & 1) == 1) { scaleSAD=1.0/1.414213562; // compensate for not scaling transform skip coefficients by 1/sqrt(2) } + if (tu.mtsIdx[compID] == MTS_SKIP) + { + int trShift = getTransformShift(tu.cu->slice->getSPS()->getBitDepth(toChannelType(compID)), rect.size(), tu.cu->slice->getSPS()->getMaxLog2TrDynamicRange(toChannelType(compID))); + scaleSAD *= pow(2, trShift); + } + trCosts.push_back( TrCost( int(sumAbs*scaleSAD), pos++ ) ); it++; } - // it gets the distribution of the DCT-II coefficients energy, which will be useful to discard ISP tests - CoeffBuf coeffsDCT( m_mtsCoeffs[0], rect ); - xGetCoeffEnergy( tu, compID, coeffsDCT, diagRatio, horVerRatio ); int numTests = 0; std::vector<TrCost>::iterator itC = trCosts.begin(); - const double fac = facBB[g_aucLog2[std::max(width, height)]-2]; + const double fac = facBB[std::max(0, floorLog2(std::max(width, height)) - 2)]; const double thr = fac * trCosts.begin()->first; const double thrTS = trCosts.begin()->first; while( itC != trCosts.end() ) @@ -609,15 +985,15 @@ void TrQuant::transformNxN( TransformUnit &tu, const ComponentID &compID, const } } -void TrQuant::transformNxN( TransformUnit &tu, const ComponentID &compID, const QpParam &cQP, TCoeff &uiAbsSum, const Ctx &ctx, const bool loadTr, double* diagRatio, double* horVerRatio ) +void TrQuant::transformNxN( TransformUnit& tu, const ComponentID& compID, const QpParam& cQP, TCoeff& uiAbsSum, const Ctx& ctx, const bool loadTr ) { CodingStructure &cs = *tu.cs; + const SPS &sps = *cs.sps; const CompArea &rect = tu.blocks[compID]; const uint32_t uiWidth = rect.width; const uint32_t uiHeight = rect.height; const CPelBuf resiBuf = cs.getResiBuf(rect); - CoeffBuf rpcCoeff = tu.getCoeffs(compID); if( tu.noResidual ) { @@ -629,50 +1005,26 @@ void TrQuant::transformNxN( TransformUnit &tu, const ComponentID &compID, const RDPCMMode rdpcmMode = RDPCM_OFF; rdpcmNxN(tu, compID, cQP, uiAbsSum, rdpcmMode); + if ((tu.cu->bdpcmMode && isLuma(compID)) || (!isLuma(compID) && tu.cu->bdpcmModeChroma)) + { + tu.mtsIdx[compID] = MTS_SKIP; + } + if (rdpcmMode == RDPCM_OFF) { uiAbsSum = 0; // transform and quantize - if (CU::isLosslessCoded(*tu.cu)) - { - const bool rotateResidual = TU::isNonTransformedResidualRotated( tu, compID ); - - for( uint32_t y = 0; y < uiHeight; y++ ) - { - for( uint32_t x = 0; x < uiWidth; x++ ) - { - const Pel currentSample = resiBuf.at( x, y ); - - if( rotateResidual ) - { - rpcCoeff.at( uiWidth - x - 1, uiHeight - y - 1 ) = currentSample; - } - else - { - rpcCoeff.at( x, y ) = currentSample; - } - - uiAbsSum += TCoeff( abs( currentSample ) ); - } - } - } - else - { -#if MAX_TB_SIZE_SIGNALLING CHECK( cs.sps->getMaxTbSize() < uiWidth, "Unsupported transformation size" ); -#else - CHECK( MAX_TB_SIZEY < uiWidth, "Unsupported transformation size" ); -#endif - CoeffBuf tempCoeff( loadTr ? m_mtsCoeffs[tu.mtsIdx] : m_plTempCoeff, rect ); + CoeffBuf tempCoeff(loadTr ? m_mtsCoeffs[tu.mtsIdx[compID]] : m_tempCoeff, rect); DTRACE_PEL_BUF( D_RESIDUALS, resiBuf, tu, tu.cu->predMode, compID ); if( !loadTr ) { - if( isLuma(compID) && tu.mtsIdx == 1 ) + if ( tu.mtsIdx[compID] == MTS_SKIP ) { xTransformSkip( tu, compID, resiBuf, tempCoeff.buf ); } @@ -682,64 +1034,26 @@ void TrQuant::transformNxN( TransformUnit &tu, const ComponentID &compID, const } } - //we do this only with the DCT-II coefficients - if( isLuma(compID) && - !loadTr && tu.mtsIdx == 0 - ) + + if( sps.getUseLFNST() ) { - //it gets the distribution of the coefficients energy, which will be useful to discard ISP tests - xGetCoeffEnergy( tu, compID, tempCoeff, diagRatio, horVerRatio ); + xFwdLfnst( tu, compID, loadTr ); } + DTRACE_COEFF_BUF( D_TCOEFF, tempCoeff, tu, tu.cu->predMode, compID ); xQuant( tu, compID, tempCoeff, uiAbsSum, cQP, ctx ); DTRACE_COEFF_BUF( D_TCOEFF, tu.getCoeffs( compID ), tu, tu.cu->predMode, compID ); - } } // set coded block flag (CBF) TU::setCbfAtDepth (tu, compID, tu.depth, uiAbsSum > 0); } -void TrQuant::xGetCoeffEnergy( TransformUnit &tu, const ComponentID &compID, const CoeffBuf& coeffs, double* diagRatio, double* horVerRatio ) -{ - if( nullptr == diagRatio || nullptr == horVerRatio ) return; - - if( tu.cu->predMode == MODE_INTRA && !tu.cu->ispMode && isLuma( compID ) && CU::canUseISPSplit( *tu.cu, compID ) != NOT_INTRA_SUBPARTITIONS ) - { - const int width = tu.cu->blocks[compID].width; - const int height = tu.cu->blocks[compID].height; - const int log2Sl = width <= height ? g_aucLog2[height >> g_aucLog2[width]] : g_aucLog2[width >> g_aucLog2[height]]; - const int diPos1 = width <= height ? width : height; - const int diPos2 = width <= height ? height : width; - const int ofsPos1 = width <= height ? 1 : coeffs.stride; - const int ofsPos2 = width <= height ? coeffs.stride : 1; - - int wdtE = 0, hgtE = 0, diaE = 0; - int* gtE = width <= height ? &wdtE : &hgtE; - int* stE = width <= height ? &hgtE : &wdtE; - - for( int pos1 = 0; pos1 < diPos1; pos1++ ) - { - const int posN = pos1 << log2Sl; - for( int pos2 = 0; pos2 < diPos2; pos2++ ) - { - const int blkP = pos1 * ofsPos1 + pos2 * ofsPos2; - if( posN > pos2 ) *gtE += abs( coeffs.buf[ blkP ] ); - if( posN < pos2 ) *stE += abs( coeffs.buf[ blkP ] ); - if( posN == pos2 ) diaE += abs( coeffs.buf[ blkP ] ); - } - } - - *horVerRatio = 0 == wdtE && 0 == hgtE ? 1 : double( wdtE ) / double( hgtE ); - *diagRatio = 0 == wdtE && 0 == hgtE && 0 == diaE ? 1 : double( diaE ) / double( wdtE + hgtE ); - } -} void TrQuant::applyForwardRDPCM(TransformUnit &tu, const ComponentID &compID, const QpParam &cQP, TCoeff &uiAbsSum, const RDPCMMode &mode) { - const bool bLossless = tu.cu->transQuantBypass; const uint32_t uiWidth = tu.blocks[compID].width; const uint32_t uiHeight = tu.blocks[compID].height; const bool rotateResidual = TU::isNonTransformedResidualRotated(tu, compID); @@ -773,16 +1087,8 @@ void TrQuant::applyForwardRDPCM(TransformUnit &tu, const ComponentID &compID, co Pel reconstructedDelta; - if (bLossless) - { - pcCoeff.buf[coefficientIndex] = encoderSideDelta; - reconstructedDelta = (Pel) encoderSideDelta; - } - else - { m_quant->transformSkipQuantOneSample(tu, compID, encoderSideDelta, pcCoeff.buf[coefficientIndex], coefficientIndex, cQP, bUseHalfRoundingPoint); m_quant->invTrSkipDeQuantOneSample (tu, compID, pcCoeff.buf[coefficientIndex], reconstructedDelta, coefficientIndex, cQP); - } uiAbsSum += abs(pcCoeff.buf[coefficientIndex]); @@ -796,7 +1102,7 @@ void TrQuant::applyForwardRDPCM(TransformUnit &tu, const ComponentID &compID, co void TrQuant::rdpcmNxN(TransformUnit &tu, const ComponentID &compID, const QpParam &cQP, TCoeff &uiAbsSum, RDPCMMode &rdpcmMode) { - if (!CU::isRDPCMEnabled(*tu.cu) || (tu.mtsIdx!=1 && !tu.cu->transQuantBypass)) + if (!CU::isRDPCMEnabled(*tu.cu) || (tu.mtsIdx[compID] != MTS_SKIP)) { rdpcmMode = RDPCM_OFF; } @@ -860,47 +1166,16 @@ void TrQuant::rdpcmNxN(TransformUnit &tu, const ComponentID &compID, const QpPar void TrQuant::xTransformSkip(const TransformUnit &tu, const ComponentID &compID, const CPelBuf &resi, TCoeff* psCoeff) { - const SPS &sps = *tu.cs->sps; - const CompArea &rect = tu.blocks[compID]; - const uint32_t width = rect.width; - const uint32_t height = rect.height; - const ChannelType chType = toChannelType(compID); - const int channelBitDepth = sps.getBitDepth(chType); - const int maxLog2TrDynamicRange = sps.getMaxLog2TrDynamicRange(chType); - int iTransformShift = getTransformShift(channelBitDepth, rect.size(), maxLog2TrDynamicRange); + const CompArea &rect = tu.blocks[compID]; + const uint32_t width = rect.width; + const uint32_t height = rect.height; - if( sps.getSpsRangeExtension().getExtendedPrecisionProcessingFlag() ) + for (uint32_t y = 0, coefficientIndex = 0; y < height; y++) { - iTransformShift = std::max<int>( 0, iTransformShift ); - } - - int iWHScale = 1; - - const bool rotateResidual = TU::isNonTransformedResidualRotated( tu, compID ); - const uint32_t uiSizeMinus1 = ( width * height ) - 1; - - if( iTransformShift >= 0 ) - { - for( uint32_t y = 0, coefficientIndex = 0; y < height; y++ ) - { - for( uint32_t x = 0; x < width; x++, coefficientIndex++ ) + for (uint32_t x = 0; x < width; x++, coefficientIndex++) { - psCoeff[rotateResidual ? uiSizeMinus1 - coefficientIndex : coefficientIndex] = ( TCoeff( resi.at( x, y ) ) * iWHScale ) << iTransformShift; + psCoeff[ coefficientIndex ] = TCoeff(resi.at(x, y)); } - } - } - else //for very high bit depths - { - iTransformShift = -iTransformShift; - const TCoeff offset = 1 << ( iTransformShift - 1 ); - - for( uint32_t y = 0, coefficientIndex = 0; y < height; y++ ) - { - for( uint32_t x = 0; x < width; x++, coefficientIndex++ ) - { - psCoeff[rotateResidual ? uiSizeMinus1 - coefficientIndex : coefficientIndex] = ( TCoeff( resi.at( x, y ) ) * iWHScale + offset ) >> iTransformShift; - } - } } } diff --git a/source/Lib/CommonLib/TrQuant.h b/source/Lib/CommonLib/TrQuant.h index 85964c1c8efa973db6ab9f8a842ba46918dca4b9..50f893da847821036c47ab93c832c6707a2e7ad5 100644 --- a/source/Lib/CommonLib/TrQuant.h +++ b/source/Lib/CommonLib/TrQuant.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -47,6 +47,7 @@ #include "UnitPartitioner.h" #include "Quant.h" +#include "DepQuant.h" //! \ingroup CommonLib //! \{ @@ -69,32 +70,41 @@ public: void init ( const Quant* otherQuant, const uint32_t uiMaxTrSize, - const bool bUseRDOQ = false, - const bool bUseRDOQTS = false, + const bool bUseRDOQ, + const bool bUseRDOQTS, #if T0196_SELECTIVE_RDOQ - const bool useSelectiveRDOQ = false, + const bool useSelectiveRDOQ, #endif - const bool bEnc = false, - const bool useTransformSkipFast = false + const bool bEnc ); + void getTrTypes(const TransformUnit tu, const ComponentID compID, int &trTypeHor, int &trTypeVer); - void getTrTypes( TransformUnit tu, const ComponentID compID, int &trTypeHor, int &trTypeVer ); + void fwdLfnstNxN( int* src, int* dst, const uint32_t mode, const uint32_t index, const uint32_t size, int zeroOutSize ); + void invLfnstNxN( int* src, int* dst, const uint32_t mode, const uint32_t index, const uint32_t size, int zeroOutSize ); + uint32_t getLFNSTIntraMode( int wideAngPredMode ); + bool getTransposeFlag ( uint32_t intraMode ); protected: + void xFwdLfnst( const TransformUnit &tu, const ComponentID compID, const bool loadTr = false ); + void xInvLfnst( const TransformUnit &tu, const ComponentID compID ); + public: void invTransformNxN (TransformUnit &tu, const ComponentID &compID, PelBuf &pResi, const QpParam &cQPs); - - void transformNxN ( TransformUnit &tu, const ComponentID &compID, const QpParam &cQP, std::vector<TrMode>* trModes, const int maxCand, double* diagRatio = nullptr, double* horVerRatio = nullptr ); - void transformNxN ( TransformUnit &tu, const ComponentID &compID, const QpParam &cQP, TCoeff &uiAbsSum, const Ctx &ctx, const bool loadTr = false, double* diagRatio = nullptr, double* horVerRatio = nullptr ); + void transformNxN ( TransformUnit& tu, const ComponentID& compID, const QpParam& cQP, std::vector<TrMode>* trModes, const int maxCand ); + void transformNxN ( TransformUnit& tu, const ComponentID& compID, const QpParam& cQP, TCoeff& uiAbsSum, const Ctx& ctx, const bool loadTr = false ); void rdpcmNxN (TransformUnit &tu, const ComponentID &compID, const QpParam &cQP, TCoeff &uiAbsSum, RDPCMMode &rdpcmMode); void applyForwardRDPCM(TransformUnit &tu, const ComponentID &compID, const QpParam &cQP, TCoeff &uiAbsSum, const RDPCMMode &rdpcmMode); void transformSkipQuantOneSample(TransformUnit &tu, const ComponentID &compID, const TCoeff &resiDiff, TCoeff &coeff, const uint32_t &uiPos, const QpParam &cQP, const bool bUseHalfRoundingPoint); void invTrSkipDeQuantOneSample (TransformUnit &tu, const ComponentID &compID, const TCoeff &pcCoeff, Pel &reconSample, const uint32_t &uiPos, const QpParam &cQP); + void invTransformICT ( const TransformUnit &tu, PelBuf &resCb, PelBuf &resCr ); + std::pair<int64_t,int64_t> fwdTransformICT ( const TransformUnit &tu, const PelBuf &resCb, const PelBuf &resCr, PelBuf& resC1, PelBuf& resC2, int jointCbCr = -1 ); + std::vector<int> selectICTCandidates ( const TransformUnit &tu, CompStorage* resCb, CompStorage* resCr ); + void invRdpcmNxN(TransformUnit& tu, const ComponentID &compID, PelBuf &pcResidual); #if RDOQ_CHROMA_LAMBDA void setLambdas ( const double lambdas[MAX_NUM_COMPONENT] ) { m_quant->setLambdas( lambdas ); } @@ -104,24 +114,27 @@ public: void setLambda ( const double dLambda ) { m_quant->setLambda( dLambda ); } double getLambda () const { return m_quant->getLambda(); } - Quant* getQuant() { return m_quant; } - + DepQuant* getQuant() { return m_quant; } + void lambdaAdjustColorTrans(bool forward) { m_quant->lambdaAdjustColorTrans(forward); } + void resetStore() { m_quant->resetStore(); } #if ENABLE_SPLIT_PARALLELISM void copyState( const TrQuant& other ); #endif protected: - TCoeff* m_plTempCoeff; - uint32_t m_uiMaxTrSize; - bool m_bEnc; - bool m_useTransformSkipFast; - - bool m_scalingListEnabledFlag; + TCoeff m_tempCoeff[MAX_TB_SIZEY * MAX_TB_SIZEY]; private: - Quant *m_quant; //!< Quantizer - TCoeff** m_mtsCoeffs; + DepQuant *m_quant; //!< Quantizer + TCoeff m_mtsCoeffs[NUM_TRAFO_MODES_MTS][MAX_TB_SIZEY * MAX_TB_SIZEY]; + TCoeff m_tempInMatrix [ 48 ]; + TCoeff m_tempOutMatrix[ 48 ]; + static const int maxAbsIctMode = 3; + void (*m_invICTMem[1+2*maxAbsIctMode])(PelBuf&,PelBuf&); + std::pair<int64_t,int64_t>(*m_fwdICTMem[1+2*maxAbsIctMode])(const PelBuf&,const PelBuf&,PelBuf&,PelBuf&); + void (**m_invICT)(PelBuf&,PelBuf&); + std::pair<int64_t,int64_t>(**m_fwdICT)(const PelBuf&,const PelBuf&,PelBuf&,PelBuf&); // forward Transform diff --git a/source/Lib/CommonLib/TrQuant_EMT.cpp b/source/Lib/CommonLib/TrQuant_EMT.cpp index 78412a87a27187790b7fdf9362da51d229d4ba10..b21ede257072798dca77ec1513bc525815ff7bb8 100644 --- a/source/Lib/CommonLib/TrQuant_EMT.cpp +++ b/source/Lib/CommonLib/TrQuant_EMT.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/source/Lib/CommonLib/TrQuant_EMT.h b/source/Lib/CommonLib/TrQuant_EMT.h index f636da5c60eec2ac8a94414e25d1d2bd7a919577..d6e6a2a2d207bfc3efebb9fed1c1214239ff13fb 100644 --- a/source/Lib/CommonLib/TrQuant_EMT.h +++ b/source/Lib/CommonLib/TrQuant_EMT.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h index ced9c082fbb010645b11783caf2bfd0a6eb17157..2a5138da3c867ead0b38a4e52fa462b27471d583 100644 --- a/source/Lib/CommonLib/TypeDef.h +++ b/source/Lib/CommonLib/TypeDef.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -50,46 +50,32 @@ #include <assert.h> #include <cassert> -#define MMVD_LTRP 1 // MVD scaling for MMVD considering LTRP from JVET-N0332 -#define JCTVC_Y0038_PARAMS 1 +#define JVET_Q0055_MTS_SIGNALLING 1 // JVET-Q0055: Check for transform coefficients outside the 16x16 area +#define JVET_Q0480_RASTER_RECT_SLICES 1 // JVET-Q0480: Eliminate redundant slice height syntax when in raster rectangular slice mode (tile_idx_delta_present_flag == 0) -#define JVET_MMVD_OFF_MACRO 0 +#define JVET_Q0433_MODIFIED_CHROMA_DIST_WEIGHT 1 // modification of chroma distortion weight (as agreed during presentation of JVET-Q0433) -#define FIX_DB_MAX_TRANSFORM_SIZE 1 +#define JVET_Q0487_SCALING_WINDOW_ISSUES 1 // JVET-Q0487: Fix scaling window issues when scaling ratio is 1:1 -#define MRG_SHARELIST_SHARSIZE 32 +#define JVET_AHG14_LOSSLESS 1 +#define JVET_AHG14_LOSSLESS_ENC_QP_FIX 1 && JVET_AHG14_LOSSLESS #define JVET_M0497_MATRIX_MULT 0 // 0: Fast method; 1: Matrix multiplication #define APPLY_SBT_SL_ON_MTS 1 // apply save & load fast algorithm on inter MTS when SBT is on -#define FIX_PCM 1 // Fix PCM bugs in VTM3 - #define MAX_TB_SIZE_SIGNALLING 0 +#define HEVC_SEI 0 // SEI messages that are defined in HEVC, but not in VVC typedef std::pair<int, bool> TrMode; typedef std::pair<int, int> TrCost; // clang-format off -#define ENABLE_JVET_L0283_MRL 1 // 1: Enable MRL, 0: Disable MRL -#define JVET_L0090_PAIR_AVG 1 // Add pairwise average candidates, replace HEVC combined candidates -#define REUSE_CU_RESULTS 1 +#define REUSE_CU_RESULTS 1 #if REUSE_CU_RESULTS #define REUSE_CU_RESULTS_WITH_MULTIPLE_TUS 1 -#define MAX_NUM_TUS 4 #endif // clang-format on - -#ifndef JVET_B0051_NON_MPM_MODE -#define JVET_B0051_NON_MPM_MODE ( 1 && JEM_TOOLS ) -#endif -#ifndef QTBT_AS_IN_JEM -#define QTBT_AS_IN_JEM 1 -#endif -#ifndef HEVC_TOOLS -#define HEVC_TOOLS 0 -#endif - #ifndef JVET_J0090_MEMORY_BANDWITH_MEASURE #define JVET_J0090_MEMORY_BANDWITH_MEASURE 0 #endif @@ -98,15 +84,13 @@ typedef std::pair<int, int> TrCost; #define EXTENSION_360_VIDEO 0 ///< extension for 360/spherical video coding support; this macro should be controlled by makefile, as it would be used to control whether the library is built and linked #endif -#ifndef ENABLE_WPP_PARALLELISM -#define ENABLE_WPP_PARALLELISM 0 -#endif -#if ENABLE_WPP_PARALLELISM -#ifndef ENABLE_WPP_STATIC_LINK -#define ENABLE_WPP_STATIC_LINK 0 // bug fix static link +#ifndef EXTENSION_HDRTOOLS +#define EXTENSION_HDRTOOLS 0 //< extension for HDRTools/Metrics support; this macro should be controlled by makefile, as it would be used to control whether the library is built and linked #endif -#define PARL_WPP_MAX_NUM_THREADS 16 +#define JVET_O0756_CONFIG_HDRMETRICS 1 +#if EXTENSION_HDRTOOLS +#define JVET_O0756_CALCULATE_HDRMETRICS 1 #endif #ifndef ENABLE_SPLIT_PARALLELISM #define ENABLE_SPLIT_PARALLELISM 0 @@ -121,9 +105,8 @@ typedef std::pair<int, int> TrCost; // ==================================================================================================================== -// NEXT software switches +// General settings // ==================================================================================================================== -#define K0238_SAO_GREEDY_MERGE_ENCODING 1 #ifndef ENABLE_TRACING #define ENABLE_TRACING 0 // DISABLE by default (enable only when debugging, requires 15% run-time in decoding) -- see documentation in 'doc/DTrace for NextSoftware.pdf' @@ -139,49 +122,7 @@ typedef std::pair<int, int> TrCost; #define WCG_EXT 1 #define WCG_WPSNR WCG_EXT -#if HEVC_TOOLS -#define HEVC_USE_INTRA_SMOOTHING_T32 1 -#define HEVC_USE_INTRA_SMOOTHING_T64 1 -#define HEVC_USE_DC_PREDFILTERING 1 -#define HEVC_USE_HOR_VER_PREDFILTERING 1 -#define HEVC_USE_MDCS 1 -#define HEVC_USE_SIGN_HIDING 1 -#define HEVC_USE_SCALING_LISTS 1 -#define HEVC_VPS 1 -#define HEVC_DEPENDENT_SLICES 1 -#define HEVC_TILES_WPP 1 -#else -#define HEVC_USE_SIGN_HIDING 1 -#define HEVC_TILES_WPP 1 -#endif - -#ifndef HEVC_TILES_WPP -#define HEVC_TILES_WPP 1 -#endif -#if !HEVC_TILES_WPP -#error JVET_M0445_MCTS_NEEDS_TILES_ENABLED -#endif - -#define JVET_M0101_HLS 1 // joint HLS syntax - #define KEEP_PRED_AND_RESI_SIGNALS 0 - - -#if QTBT_AS_IN_JEM // macros which will cause changes in the decoder behavior ara marked with *** - keep them on to retain compatibility with JEM-toolcheck -#define HM_QTBT_AS_IN_JEM 1 // *** -#if HM_QTBT_AS_IN_JEM -#define HM_QTBT_AS_IN_JEM_QUANT 1 // *** -#define HM_QTBT_REPRODUCE_FAST_LCTU_BUG 1 -#endif -#define HM_CODED_CU_INFO 1 // like in JEM, when related CU is skipped, it stays like this even if a non skip mode wins... -#define HM_4TAPIF_AS_IN_JEM 1 // *** - PM: condition not well suited for 4-tap interpolation filters -#define HM_MDIS_AS_IN_JEM 1 // *** - PM: not filtering ref. samples for 64xn case and using Planar MDIS condition at encoder -#define HM_JEM_CLIP_PEL 1 // *** -#define HM_JEM_MERGE_CANDS 0 // *** - - -#endif//JEM_COMP - // ==================================================================================================================== // Debugging // ==================================================================================================================== @@ -198,6 +139,13 @@ typedef std::pair<int, int> TrCost; #define RExt__DECODER_DEBUG_BIT_STATISTICS 0 ///< 0 (default) = decoder reports as normal, 1 = decoder produces bit usage statistics (will impact decoder run time by up to ~10%) #endif +#ifndef RExt__DECODER_DEBUG_TOOL_MAX_FRAME_STATS +#define RExt__DECODER_DEBUG_TOOL_MAX_FRAME_STATS (1 && RExt__DECODER_DEBUG_BIT_STATISTICS ) ///< 0 (default) = decoder reports as normal, 1 = decoder produces max frame bit usage statistics +#endif + +#define TR_ONLY_COEFF_STATS (1 && RExt__DECODER_DEBUG_BIT_STATISTICS ) ///< 0 combine TS and non-TS decoder debug statistics. 1 = separate TS and non-TS decoder debug statistics. +#define EPBINCOUNT_FIX (1 && RExt__DECODER_DEBUG_BIT_STATISTICS ) ///< 0 use count to represent number of calls to decodeBins. 1 = count and bins for EP bins are the same. + #ifndef RExt__DECODER_DEBUG_TOOL_STATISTICS #define RExt__DECODER_DEBUG_TOOL_STATISTICS 0 ///< 0 (default) = decoder reports as normal, 1 = decoder produces tool usage statistics #endif @@ -236,7 +184,7 @@ typedef std::pair<int, int> TrCost; #define ENABLE_SIMD_OPT_AFFINE_ME ( 1 && ENABLE_SIMD_OPT ) ///< SIMD optimization for affine ME, no impact on RD performance #define ENABLE_SIMD_OPT_ALF ( 1 && ENABLE_SIMD_OPT ) ///< SIMD optimization for ALF #if ENABLE_SIMD_OPT_BUFFER -#define ENABLE_SIMD_OPT_GBI 1 ///< SIMD optimization for GBi +#define ENABLE_SIMD_OPT_BCW 1 ///< SIMD optimization for Bcw #endif // End of SIMD optimizations @@ -312,12 +260,28 @@ typedef uint32_t Intermediate_UInt; ///< used as intermediate v #endif typedef uint64_t SplitSeries; ///< used to encoded the splits that caused a particular CU size +typedef uint64_t ModeTypeSeries; ///< used to encoded the ModeType at different split depth typedef uint64_t Distortion; ///< distortion measurement // ==================================================================================================================== // Enumeration // ==================================================================================================================== + +enum BDPCMControl +{ + BDPCM_INACTIVE = 0, + BDPCM_LUMAONLY = 1, + BDPCM_LUMACHROMA = 2, +}; + +enum ApsType +{ + ALF_APS = 0, + LMCS_APS = 1, + SCALING_LIST_APS = 2, +}; + enum QuantFlags { Q_INIT = 0x0, @@ -336,13 +300,23 @@ enum TransType DCT2_EMT = 4 }; +enum MTSIdx +{ + MTS_DCT2_DCT2 = 0, + MTS_SKIP = 1, + MTS_DST7_DST7 = 2, + MTS_DCT8_DST7 = 3, + MTS_DST7_DCT8 = 4, + MTS_DCT8_DCT8 = 5 +}; + enum ISPType { NOT_INTRA_SUBPARTITIONS = 0, HOR_INTRA_SUBPARTITIONS = 1, VER_INTRA_SUBPARTITIONS = 2, - NUM_INTRA_SUBPARTITIONS_MODES = 3, - CAN_USE_VER_AND_HORL_SPLITS = 4 + NUM_INTRA_SUBPARTITIONS_MODES = 3, + INTRA_SUBPARTITIONS_RESERVED = 4 }; enum SbtIdx @@ -417,6 +391,20 @@ enum ChannelType MAX_NUM_CHANNEL_TYPE = 2 }; +enum TreeType +{ + TREE_D = 0, //default tree status (for single-tree slice, TREE_D means joint tree; for dual-tree I slice, TREE_D means TREE_L for luma and TREE_C for chroma) + TREE_L = 1, //separate tree only contains luma (may split) + TREE_C = 2, //separate tree only contains chroma (not split), to avoid small chroma block +}; + +enum ModeType +{ + MODE_TYPE_ALL = 0, //all modes can try + MODE_TYPE_INTER = 1, //can try inter + MODE_TYPE_INTRA = 2, //can try intra, ibc, palette +}; + #define CH_L CHANNEL_TYPE_LUMA #define CH_C CHANNEL_TYPE_CHROMA @@ -426,6 +414,7 @@ enum ComponentID COMPONENT_Cb = 1, COMPONENT_Cr = 2, MAX_NUM_COMPONENT = 3, + JOINT_CbCr = MAX_NUM_COMPONENT, MAX_NUM_TBLOCKS = MAX_NUM_COMPONENT }; @@ -468,7 +457,8 @@ enum PredMode MODE_INTER = 0, ///< inter-prediction mode MODE_INTRA = 1, ///< intra-prediction mode MODE_IBC = 2, ///< ibc-prediction mode - NUMBER_OF_PREDICTION_MODES = 3, + MODE_PLT = 3, ///< plt-prediction mode + NUMBER_OF_PREDICTION_MODES = 4, }; /// reference list index @@ -573,13 +563,6 @@ enum MvpDir MD_ABOVE_LEFT ///< MVP of above left block }; -enum StoredResidualType -{ - RESIDUAL_RECONSTRUCTED = 0, - RESIDUAL_ENCODER_SIDE = 1, - NUMBER_OF_STORED_RESIDUAL_TYPES = 2 -}; - enum TransformDirection { TRANSFORM_FORWARD = 0, @@ -601,10 +584,8 @@ enum MESearchMethod enum CoeffScanType { SCAN_DIAG = 0, ///< up-right diagonal scan -#if HEVC_USE_MDCS - SCAN_HOR = 1, ///< horizontal first scan - SCAN_VER = 2, ///< vertical first scan -#endif + SCAN_TRAV_HOR = 1, + SCAN_TRAV_VER = 2, SCAN_NUMBER_OF_TYPES }; @@ -615,16 +596,6 @@ enum CoeffScanGroupType SCAN_NUMBER_OF_GROUP_TYPES = 2 }; -enum SignificanceMapContextType -{ - CONTEXT_TYPE_4x4 = 0, - CONTEXT_TYPE_8x8 = 1, - CONTEXT_TYPE_NxN = 2, - CONTEXT_TYPE_SINGLE = 3, - CONTEXT_NUMBER_OF_TYPES = 4 -}; - -#if HEVC_USE_SCALING_LISTS enum ScalingListMode { SCALING_LIST_OFF, @@ -634,7 +605,8 @@ enum ScalingListMode enum ScalingListSize { - SCALING_LIST_2x2 = 0, + SCALING_LIST_1x1 = 0, + SCALING_LIST_2x2, SCALING_LIST_4x4, SCALING_LIST_8x8, SCALING_LIST_16x16, @@ -642,23 +614,18 @@ enum ScalingListSize SCALING_LIST_64x64, SCALING_LIST_128x128, SCALING_LIST_SIZE_NUM, - SCALING_LIST_FIRST_CODED = SCALING_LIST_4x4, // smallest scaling coded as High Level Parameter - SCALING_LIST_LAST_CODED = SCALING_LIST_32x32 + //for user define matrix + SCALING_LIST_FIRST_CODED = SCALING_LIST_2x2, + SCALING_LIST_LAST_CODED = SCALING_LIST_64x64 }; -#endif - -// Slice / Slice segment encoding modes -enum SliceConstraint +enum ScalingList1dStartIdx { - NO_SLICES = 0, ///< don't use slices / slice segments - FIXED_NUMBER_OF_CTU = 1, ///< Limit maximum number of largest coding tree units in a slice / slice segments - FIXED_NUMBER_OF_BYTES = 2, ///< Limit maximum number of bytes in a slice / slice segment -#if HEVC_TILES_WPP - FIXED_NUMBER_OF_TILES = 3, ///< slices / slice segments span an integer number of tiles - NUMBER_OF_SLICE_CONSTRAINT_MODES = 4 -#else - NUMBER_OF_SLICE_CONSTRAINT_MODES = 3 -#endif + SCALING_LIST_1D_START_2x2 = 0, + SCALING_LIST_1D_START_4x4 = 2, + SCALING_LIST_1D_START_8x8 = 8, + SCALING_LIST_1D_START_16x16 = 14, + SCALING_LIST_1D_START_32x32 = 20, + SCALING_LIST_1D_START_64x64 = 26, }; // For use with decoded picture hash SEI messages, generated by encoder. @@ -719,13 +686,9 @@ namespace Profile { enum Name { - NONE = 0, - MAIN = 1, - MAIN10 = 2, - MAINSTILLPICTURE = 3, - MAINREXT = 4, - HIGHTHROUGHPUTREXT = 5, - NEXT = 6 + NONE = 0, + MAIN_10 = 1, + MAIN_444_10 = 2 }; } @@ -735,6 +698,7 @@ namespace Level { MAIN = 0, HIGH = 1, + NUMBER_OF_TIERS=2 }; enum Name @@ -805,135 +769,45 @@ enum PPSExtensionFlagIndex // effort can be done without use of macros to alter the names used to indicate the different NAL unit types. enum NalUnitType { -#if JVET_M0101_HLS - NAL_UNIT_CODED_SLICE_TRAIL = 0, // 0 - NAL_UNIT_CODED_SLICE_STSA, // 1 + NAL_UNIT_CODED_SLICE_TRAIL = 0, // 0 + NAL_UNIT_CODED_SLICE_STSA, // 1 + NAL_UNIT_CODED_SLICE_RADL, // 2 + NAL_UNIT_CODED_SLICE_RASL, // 3 - //KJS: keep RADL/RASL since there is no real decision on these types yet - NAL_UNIT_CODED_SLICE_RADL, // 2 should be NAL_UNIT_RESERVED_VCL_2, - NAL_UNIT_CODED_SLICE_RASL, // 3 should be NAL_UNIT_RESERVED_VCL_3, - NAL_UNIT_RESERVED_VCL_4, NAL_UNIT_RESERVED_VCL_5, NAL_UNIT_RESERVED_VCL_6, - NAL_UNIT_RESERVED_VCL_7, - - NAL_UNIT_CODED_SLICE_IDR_W_RADL, // 8 - NAL_UNIT_CODED_SLICE_IDR_N_LP, // 9 - NAL_UNIT_CODED_SLICE_CRA, // 10 - - NAL_UNIT_RESERVED_IRAP_VCL11, - NAL_UNIT_RESERVED_IRAP_VCL12, - NAL_UNIT_RESERVED_IRAP_VCL13, - - NAL_UNIT_RESERVED_VCL14, - -#if HEVC_VPS - NAL_UNIT_VPS, // probably not coming back -#else - NAL_UNIT_RESERVED_VCL15, -#endif - - NAL_UNIT_RESERVED_NVCL16, // probably DPS - - NAL_UNIT_SPS, // 17 - NAL_UNIT_PPS, // 18 - NAL_UNIT_APS, // 19 NAL unit type number needs to be reaaranged. + NAL_UNIT_CODED_SLICE_IDR_W_RADL, // 7 + NAL_UNIT_CODED_SLICE_IDR_N_LP, // 8 + NAL_UNIT_CODED_SLICE_CRA, // 9 + NAL_UNIT_CODED_SLICE_GDR, // 10 + + NAL_UNIT_RESERVED_IRAP_VCL_11, + NAL_UNIT_RESERVED_IRAP_VCL_12, + + NAL_UNIT_DPS, // 13 + NAL_UNIT_VPS, // 14 + NAL_UNIT_SPS, // 15 + NAL_UNIT_PPS, // 16 + NAL_UNIT_PREFIX_APS, // 17 + NAL_UNIT_SUFFIX_APS, // 18 + NAL_UNIT_PH, // 19 NAL_UNIT_ACCESS_UNIT_DELIMITER, // 20 NAL_UNIT_EOS, // 21 NAL_UNIT_EOB, // 22 NAL_UNIT_PREFIX_SEI, // 23 NAL_UNIT_SUFFIX_SEI, // 24 - NAL_UNIT_FILLER_DATA, // 25 keep: may be added with HRD - NAL_UNIT_RESERVED_NVCL26, - NAL_UNIT_RESERVED_NVCL27, + NAL_UNIT_FD, // 25 + + NAL_UNIT_RESERVED_NVCL_26, + NAL_UNIT_RESERVED_NVCL_27, + NAL_UNIT_UNSPECIFIED_28, NAL_UNIT_UNSPECIFIED_29, NAL_UNIT_UNSPECIFIED_30, NAL_UNIT_UNSPECIFIED_31, - NAL_UNIT_INVALID, -#else - NAL_UNIT_CODED_SLICE_TRAIL_N = 0, // 0 - NAL_UNIT_CODED_SLICE_TRAIL_R, // 1 - - NAL_UNIT_CODED_SLICE_TSA_N, // 2 - NAL_UNIT_CODED_SLICE_TSA_R, // 3 - - NAL_UNIT_CODED_SLICE_STSA_N, // 4 - NAL_UNIT_CODED_SLICE_STSA_R, // 5 - - NAL_UNIT_CODED_SLICE_RADL_N, // 6 - NAL_UNIT_CODED_SLICE_RADL_R, // 7 - - NAL_UNIT_CODED_SLICE_RASL_N, // 8 - NAL_UNIT_CODED_SLICE_RASL_R, // 9 - - NAL_UNIT_RESERVED_VCL_N10, - NAL_UNIT_RESERVED_VCL_R11, - NAL_UNIT_RESERVED_VCL_N12, - NAL_UNIT_RESERVED_VCL_R13, - NAL_UNIT_RESERVED_VCL_N14, - NAL_UNIT_RESERVED_VCL_R15, - - NAL_UNIT_CODED_SLICE_BLA_W_LP, // 16 - NAL_UNIT_CODED_SLICE_BLA_W_RADL, // 17 - NAL_UNIT_CODED_SLICE_BLA_N_LP, // 18 - NAL_UNIT_CODED_SLICE_IDR_W_RADL, // 19 - NAL_UNIT_CODED_SLICE_IDR_N_LP, // 20 - NAL_UNIT_CODED_SLICE_CRA, // 21 - NAL_UNIT_RESERVED_IRAP_VCL22, - NAL_UNIT_RESERVED_IRAP_VCL23, - - NAL_UNIT_RESERVED_VCL24, - NAL_UNIT_RESERVED_VCL25, - NAL_UNIT_RESERVED_VCL26, - NAL_UNIT_RESERVED_VCL27, - NAL_UNIT_RESERVED_VCL28, - NAL_UNIT_RESERVED_VCL29, - NAL_UNIT_RESERVED_VCL30, - NAL_UNIT_RESERVED_VCL31, - -#if HEVC_VPS - NAL_UNIT_VPS, // 32 -#else - NAL_UNIT_RESERVED_32, -#endif - NAL_UNIT_SPS, // 33 - NAL_UNIT_PPS, // 34 - NAL_UNIT_APS, //NAL unit type number needs to be reaaranged. - NAL_UNIT_ACCESS_UNIT_DELIMITER, // 35 - NAL_UNIT_EOS, // 36 - NAL_UNIT_EOB, // 37 - NAL_UNIT_FILLER_DATA, // 38 - NAL_UNIT_PREFIX_SEI, // 39 - NAL_UNIT_SUFFIX_SEI, // 40 - - NAL_UNIT_RESERVED_NVCL41, - NAL_UNIT_RESERVED_NVCL42, - NAL_UNIT_RESERVED_NVCL43, - NAL_UNIT_RESERVED_NVCL44, - NAL_UNIT_RESERVED_NVCL45, - NAL_UNIT_RESERVED_NVCL46, - NAL_UNIT_RESERVED_NVCL47, - NAL_UNIT_UNSPECIFIED_48, - NAL_UNIT_UNSPECIFIED_49, - NAL_UNIT_UNSPECIFIED_50, - NAL_UNIT_UNSPECIFIED_51, - NAL_UNIT_UNSPECIFIED_52, - NAL_UNIT_UNSPECIFIED_53, - NAL_UNIT_UNSPECIFIED_54, - NAL_UNIT_UNSPECIFIED_55, - NAL_UNIT_UNSPECIFIED_56, - NAL_UNIT_UNSPECIFIED_57, - NAL_UNIT_UNSPECIFIED_58, - NAL_UNIT_UNSPECIFIED_59, - NAL_UNIT_UNSPECIFIED_60, - NAL_UNIT_UNSPECIFIED_61, - NAL_UNIT_UNSPECIFIED_62, - NAL_UNIT_UNSPECIFIED_63, - NAL_UNIT_INVALID, -#endif + NAL_UNIT_INVALID }; #if SHARP_LUMA_DELTA_QP @@ -945,13 +819,6 @@ enum LumaLevelToDQPMode }; #endif -enum SaveLoadTag -{ - SAVE_LOAD_INIT = 0, - SAVE_ENC_INFO = 1, - LOAD_ENC_INFO = 2 -}; - enum MergeType { MRG_TYPE_DEFAULT_N = 0, // 0 @@ -967,12 +834,6 @@ enum TriangleSplit TRIANGLE_DIR_NUM }; -enum SharedMrgState -{ - NO_SHARE = 0, - GEN_ON_SHARED_BOUND = 1, - SHARING = 2 -}; ////////////////////////////////////////////////////////////////////////// // Encoder modes to try out ////////////////////////////////////////////////////////////////////////// @@ -991,8 +852,9 @@ enum EncModeFeature enum ImvMode { IMV_OFF = 0, - IMV_DEFAULT, + IMV_FPEL, IMV_4PEL, + IMV_HPEL, NUM_IMV_MODES }; @@ -1041,6 +903,12 @@ struct BitDepths int recon[MAX_NUM_CHANNEL_TYPE]; ///< the bit depth as indicated in the SPS }; +enum PLTRunMode +{ + PLT_RUN_INDEX = 0, + PLT_RUN_COPY = 1, + NUM_PLT_RUN = 2 +}; /// parameters for deblocking filter struct LFCUParam { @@ -1357,13 +1225,13 @@ template<typename T> class dynamic_cache { std::vector<T*> m_cache; -#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM +#if ENABLE_SPLIT_PARALLELISM int64_t m_cacheId; #endif public: -#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM +#if ENABLE_SPLIT_PARALLELISM dynamic_cache() { static int cacheId = 0; @@ -1395,7 +1263,7 @@ public: { ret = m_cache.back(); m_cache.pop_back(); -#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM +#if ENABLE_SPLIT_PARALLELISM CHECK( ret->cacheId != m_cacheId, "Putting item into wrong cache!" ); CHECK( !ret->cacheUsed, "Fetched an element that should've been in cache!!" ); #endif @@ -1405,7 +1273,7 @@ public: ret = new T; } -#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM +#if ENABLE_SPLIT_PARALLELISM ret->cacheId = m_cacheId; ret->cacheUsed = false; @@ -1415,7 +1283,7 @@ public: void cache( T* el ) { -#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM +#if ENABLE_SPLIT_PARALLELISM CHECK( el->cacheId != m_cacheId, "Putting item into wrong cache!" ); CHECK( el->cacheUsed, "Putting cached item back into cache!" ); @@ -1427,7 +1295,7 @@ public: void cache( std::vector<T*>& vel ) { -#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM +#if ENABLE_SPLIT_PARALLELISM for( auto el : vel ) { CHECK( el->cacheId != m_cacheId, "Putting item into wrong cache!" ); @@ -1455,137 +1323,6 @@ struct XUCache #define SIGN(x) ( (x) >= 0 ? 1 : -1 ) -#define MAX_NUM_ALF_CLASSES 25 -#define MAX_NUM_ALF_LUMA_COEFF 13 -#define MAX_NUM_ALF_CHROMA_COEFF 7 -#define MAX_ALF_FILTER_LENGTH 7 -#define MAX_NUM_ALF_COEFF (MAX_ALF_FILTER_LENGTH * MAX_ALF_FILTER_LENGTH / 2 + 1) - -enum AlfFilterType -{ - ALF_FILTER_5, - ALF_FILTER_7, - ALF_NUM_OF_FILTER_TYPES -}; - -struct AlfFilterShape -{ - AlfFilterShape( int size ) - : filterLength( size ), - numCoeff( size * size / 4 + 1 ), - filterSize( size * size / 2 + 1 ) - { - if( size == 5 ) - { - pattern = { - 0, - 1, 2, 3, - 4, 5, 6, 5, 4, - 3, 2, 1, - 0 - }; - - weights = { - 2, - 2, 2, 2, - 2, 2, 1, 1 - }; - - golombIdx = { - 0, - 0, 1, 0, - 0, 1, 2, 2 - }; - - filterType = ALF_FILTER_5; - } - else if( size == 7 ) - { - pattern = { - 0, - 1, 2, 3, - 4, 5, 6, 7, 8, - 9, 10, 11, 12, 11, 10, 9, - 8, 7, 6, 5, 4, - 3, 2, 1, - 0 - }; - - weights = { - 2, - 2, 2, 2, - 2, 2, 2, 2, 2, - 2, 2, 2, 1, 1 - }; - - golombIdx = { - 0, - 0, 1, 0, - 0, 1, 2, 1, 0, - 0, 1, 2, 3, 3 - }; - - filterType = ALF_FILTER_7; - } - else - { - filterType = ALF_NUM_OF_FILTER_TYPES; - CHECK( 0, "Wrong ALF filter shape" ); - } - } - - AlfFilterType filterType; - int filterLength; - int numCoeff; //TO DO: check whether we need both numCoeff and filterSize - int filterSize; - std::vector<int> pattern; - std::vector<int> weights; - std::vector<int> golombIdx; -}; - -struct AlfSliceParam -{ - bool enabledFlag[MAX_NUM_COMPONENT]; // alf_slice_enable_flag, alf_chroma_idc - short lumaCoeff[MAX_NUM_ALF_CLASSES * MAX_NUM_ALF_LUMA_COEFF]; // alf_coeff_luma_delta[i][j] - short chromaCoeff[MAX_NUM_ALF_CHROMA_COEFF]; // alf_coeff_chroma[i] - short filterCoeffDeltaIdx[MAX_NUM_ALF_CLASSES]; // filter_coeff_delta[i] - bool alfLumaCoeffFlag[MAX_NUM_ALF_CLASSES]; // alf_luma_coeff_flag[i] - int numLumaFilters; // number_of_filters_minus1 + 1 - bool alfLumaCoeffDeltaFlag; // alf_luma_coeff_delta_flag - bool alfLumaCoeffDeltaPredictionFlag; // alf_luma_coeff_delta_prediction_flag - std::vector<AlfFilterShape>* filterShapes; - - AlfSliceParam() - { - reset(); - } - - void reset() - { - std::memset( enabledFlag, false, sizeof( enabledFlag ) ); - std::memset( lumaCoeff, 0, sizeof( lumaCoeff ) ); - std::memset( chromaCoeff, 0, sizeof( chromaCoeff ) ); - std::memset( filterCoeffDeltaIdx, 0, sizeof( filterCoeffDeltaIdx ) ); - std::memset( alfLumaCoeffFlag, true, sizeof( alfLumaCoeffFlag ) ); - numLumaFilters = 1; - alfLumaCoeffDeltaFlag = false; - alfLumaCoeffDeltaPredictionFlag = false; - } - - const AlfSliceParam& operator = ( const AlfSliceParam& src ) - { - std::memcpy( enabledFlag, src.enabledFlag, sizeof( enabledFlag ) ); - std::memcpy( lumaCoeff, src.lumaCoeff, sizeof( lumaCoeff ) ); - std::memcpy( chromaCoeff, src.chromaCoeff, sizeof( chromaCoeff ) ); - std::memcpy( filterCoeffDeltaIdx, src.filterCoeffDeltaIdx, sizeof( filterCoeffDeltaIdx ) ); - std::memcpy( alfLumaCoeffFlag, src.alfLumaCoeffFlag, sizeof( alfLumaCoeffFlag ) ); - numLumaFilters = src.numLumaFilters; - alfLumaCoeffDeltaFlag = src.alfLumaCoeffDeltaFlag; - alfLumaCoeffDeltaPredictionFlag = src.alfLumaCoeffDeltaPredictionFlag; - filterShapes = src.filterShapes; - return *this; - } -}; //! \} diff --git a/source/Lib/CommonLib/Unit.cpp b/source/Lib/CommonLib/Unit.cpp index 7ca06ebc69661b066d2539bef29f550becdedc50..041b241b8e494577d48561d1af13d8cc07b6e6bb 100644 --- a/source/Lib/CommonLib/Unit.cpp +++ b/source/Lib/CommonLib/Unit.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * -* Copyright (c) 2010-2019, ITU/ISO/IEC +* Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -265,26 +265,51 @@ CodingUnit& CodingUnit::operator=( const CodingUnit& other ) mmvdSkip = other.mmvdSkip; affine = other.affine; affineType = other.affineType; + colorTransform = other.colorTransform; triangle = other.triangle; - transQuantBypass = other.transQuantBypass; - ipcm = other.ipcm; + bdpcmMode = other.bdpcmMode; + bdpcmModeChroma = other.bdpcmModeChroma; qp = other.qp; chromaQpAdj = other.chromaQpAdj; rootCbf = other.rootCbf; sbtInfo = other.sbtInfo; -#if HEVC_TILES_WPP + mtsFlag = other.mtsFlag; + lfnstIdx = other.lfnstIdx; tileIdx = other.tileIdx; -#endif imv = other.imv; imvNumCand = other.imvNumCand; - GBiIdx = other.GBiIdx; + BcwIdx = other.BcwIdx; for (int i = 0; i<2; i++) refIdxBi[i] = other.refIdxBi[i]; - shareParentPos = other.shareParentPos; - shareParentSize = other.shareParentSize; smvdMode = other.smvdMode; ispMode = other.ispMode; + mipFlag = other.mipFlag; + + for (int idx = 0; idx < MAX_NUM_CHANNEL_TYPE; idx++) + { + curPLTSize[idx] = other.curPLTSize[idx]; + useEscape[idx] = other.useEscape[idx]; + useRotation[idx] = other.useRotation[idx]; + reusePLTSize[idx] = other.reusePLTSize[idx]; + lastPLTSize[idx] = other.lastPLTSize[idx]; + if (slice->getSPS()->getPLTMode()) + { + memcpy(reuseflag[idx], other.reuseflag[idx], MAXPLTPREDSIZE * sizeof(bool)); + } + } + + if (slice->getSPS()->getPLTMode()) + { + for (int idx = 0; idx < MAX_NUM_COMPONENT; idx++) + { + memcpy(curPLT[idx], other.curPLT[idx], MAXPLTSIZE * sizeof(Pel)); + } + } + + treeType = other.treeType; + modeType = other.modeType; + modeTypeSeries = other.modeTypeSeries; return *this; } @@ -300,26 +325,121 @@ void CodingUnit::initData() mmvdSkip = false; affine = false; affineType = 0; + colorTransform = false; triangle = false; - transQuantBypass = false; - ipcm = false; + bdpcmMode = 0; + bdpcmModeChroma = 0; qp = 0; chromaQpAdj = 0; rootCbf = true; sbtInfo = 0; -#if HEVC_TILES_WPP + mtsFlag = 0; + lfnstIdx = 0; tileIdx = 0; -#endif imv = 0; imvNumCand = 0; - GBiIdx = GBI_DEFAULT; + BcwIdx = BCW_DEFAULT; for (int i = 0; i < 2; i++) refIdxBi[i] = -1; - shareParentPos = Position(-1, -1); - shareParentSize.width = -1; - shareParentSize.height = -1; smvdMode = 0; ispMode = 0; + mipFlag = false; + + for (int idx = 0; idx < MAX_NUM_CHANNEL_TYPE; idx++) + { + curPLTSize[idx] = 0; + reusePLTSize[idx] = 0; + lastPLTSize[idx] = 0; + useEscape[idx] = false; + useRotation[idx] = false; + memset(reuseflag[idx], false, MAXPLTPREDSIZE * sizeof(bool)); + } + + for (int idx = 0; idx < MAX_NUM_COMPONENT; idx++) + { + memset(curPLT[idx], 0, MAXPLTSIZE * sizeof(Pel)); + } + + treeType = TREE_D; + modeType = MODE_TYPE_ALL; + modeTypeSeries = 0; +} + +const bool CodingUnit::isSepTree() const +{ + return treeType != TREE_D || CS::isDualITree( *cs ); +} + +const bool CodingUnit::checkCCLMAllowed() const +{ + bool allowCCLM = false; + + if( !CS::isDualITree( *cs ) ) //single tree I slice or non-I slice (Note: judging chType is no longer equivalent to checking dual-tree I slice since the local dual-tree is introduced) + { + allowCCLM = true; + } + else if( slice->getSPS()->getCTUSize() <= 32 ) //dual tree, CTUsize < 64 + { + allowCCLM = true; + } + else //dual tree, CTU size 64 or 128 + { + int depthFor64x64Node = slice->getSPS()->getCTUSize() == 128 ? 1 : 0; + const PartSplit cuSplitTypeDepth1 = CU::getSplitAtDepth( *this, depthFor64x64Node ); + const PartSplit cuSplitTypeDepth2 = CU::getSplitAtDepth( *this, depthFor64x64Node + 1 ); + + //allow CCLM if 64x64 chroma tree node uses QT split or HBT+VBT split combination + if( cuSplitTypeDepth1 == CU_QUAD_SPLIT || (cuSplitTypeDepth1 == CU_HORZ_SPLIT && cuSplitTypeDepth2 == CU_VERT_SPLIT) ) + { + if( chromaFormat == CHROMA_420 ) + { + CHECK( !(blocks[COMPONENT_Cb].width <= 16 && blocks[COMPONENT_Cb].height <= 16), "chroma cu size shall be <= 16x16 for YUV420 format" ); + } + allowCCLM = true; + } + //allow CCLM if 64x64 chroma tree node uses NS (No Split) and becomes a chroma CU containing 32x32 chroma blocks + else if( cuSplitTypeDepth1 == CU_DONT_SPLIT ) + { + if( chromaFormat == CHROMA_420 ) + { + CHECK( !(blocks[COMPONENT_Cb].width == 32 && blocks[COMPONENT_Cb].height == 32), "chroma cu size shall be 32x32 for YUV420 format" ); + } + allowCCLM = true; + } + //allow CCLM if 64x32 chroma tree node uses NS and becomes a chroma CU containing 32x16 chroma blocks + else if( cuSplitTypeDepth1 == CU_HORZ_SPLIT && cuSplitTypeDepth2 == CU_DONT_SPLIT ) + { + if( chromaFormat == CHROMA_420 ) + { + CHECK( !(blocks[COMPONENT_Cb].width == 32 && blocks[COMPONENT_Cb].height == 16), "chroma cu size shall be 32x16 for YUV420 format" ); + } + allowCCLM = true; + } + + //further check luma conditions + if( allowCCLM ) + { + //disallow CCLM if luma 64x64 block uses BT or TT or NS with ISP + const Position lumaRefPos( chromaPos().x << getComponentScaleX( COMPONENT_Cb, chromaFormat ), chromaPos().y << getComponentScaleY( COMPONENT_Cb, chromaFormat ) ); + const CodingUnit* colLumaCu = cs->picture->cs->getCU( lumaRefPos, CHANNEL_TYPE_LUMA ); + + if( colLumaCu->lwidth() < 64 || colLumaCu->lheight() < 64 ) //further split at 64x64 luma node + { + const PartSplit cuSplitTypeDepth1Luma = CU::getSplitAtDepth( *colLumaCu, depthFor64x64Node ); + CHECK( !(cuSplitTypeDepth1Luma >= CU_QUAD_SPLIT && cuSplitTypeDepth1Luma <= CU_TRIV_SPLIT), "split mode shall be BT, TT or QT" ); + if( cuSplitTypeDepth1Luma != CU_QUAD_SPLIT ) + { + allowCCLM = false; + } + } + else if( colLumaCu->lwidth() == 64 && colLumaCu->lheight() == 64 && colLumaCu->ispMode ) //not split at 64x64 luma node and use ISP mode + { + allowCCLM = false; + } + } + } + + return allowCCLM; } const uint8_t CodingUnit::checkAllowedSbt() const @@ -330,11 +450,15 @@ const uint8_t CodingUnit::checkAllowedSbt() const } //check on prediction mode - if( predMode == MODE_INTRA || predMode == MODE_IBC ) //intra or IBC + if (predMode == MODE_INTRA || predMode == MODE_IBC || predMode == MODE_PLT ) //intra, palette or IBC + { + return 0; + } + if( firstPU->ciipFlag ) { return 0; } - if( firstPU->mhIntraFlag ) + if( triangle ) { return 0; } @@ -346,7 +470,7 @@ const uint8_t CodingUnit::checkAllowedSbt() const memset( allow_type, false, NUMBER_SBT_IDX * sizeof( bool ) ); //parameter - int maxSbtCUSize = cs->sps->getMaxSbtSize(); + int maxSbtCUSize = cs->sps->getMaxTbSize(); int minSbtCUSize = 1 << ( MIN_CU_LOG2 + 1 ); //check on size @@ -397,10 +521,12 @@ void PredictionUnit::initData() // intra data - need this default initialization for PCM intraDir[0] = DC_IDX; intraDir[1] = PLANAR_IDX; + mipTransposedFlag = false; multiRefIdx = 0; // inter data mergeFlag = false; + regularMergeFlag = false; mergeIdx = MAX_UCHAR; triangleSplitDir = MAX_UCHAR; triangleMergeIdx0 = MAX_UCHAR; @@ -432,10 +558,7 @@ void PredictionUnit::initData() mvAffi[i][j].setZero(); } } - mhIntraFlag = false; - shareParentPos = Position(-1, -1); - shareParentSize.width = -1; - shareParentSize.height = -1; + ciipFlag = false; mmvdEncOptMode = 0; } @@ -445,6 +568,7 @@ PredictionUnit& PredictionUnit::operator=(const IntraPredictionData& predData) { intraDir[i] = predData.intraDir[i]; } + mipTransposedFlag = predData.mipTransposedFlag; multiRefIdx = predData.multiRefIdx; return *this; @@ -453,6 +577,7 @@ PredictionUnit& PredictionUnit::operator=(const IntraPredictionData& predData) PredictionUnit& PredictionUnit::operator=(const InterPredictionData& predData) { mergeFlag = predData.mergeFlag; + regularMergeFlag = predData.regularMergeFlag; mergeIdx = predData.mergeIdx; triangleSplitDir = predData.triangleSplitDir ; triangleMergeIdx0 = predData.triangleMergeIdx0 ; @@ -484,9 +609,7 @@ PredictionUnit& PredictionUnit::operator=(const InterPredictionData& predData) mvAffi[i][j] = predData.mvAffi[i][j]; } } - mhIntraFlag = predData.mhIntraFlag; - shareParentPos = predData.shareParentPos; - shareParentSize = predData.shareParentSize; + ciipFlag = predData.ciipFlag; return *this; } @@ -496,9 +619,11 @@ PredictionUnit& PredictionUnit::operator=( const PredictionUnit& other ) { intraDir[ i ] = other.intraDir[ i ]; } + mipTransposedFlag = other.mipTransposedFlag; multiRefIdx = other.multiRefIdx; mergeFlag = other.mergeFlag; + regularMergeFlag = other.regularMergeFlag; mergeIdx = other.mergeIdx; triangleSplitDir = other.triangleSplitDir ; triangleMergeIdx0 = other.triangleMergeIdx0 ; @@ -530,9 +655,7 @@ PredictionUnit& PredictionUnit::operator=( const PredictionUnit& other ) mvAffi[i][j] = other.mvAffi[i][j]; } } - mhIntraFlag = other.mhIntraFlag; - shareParentPos = other.shareParentPos; - shareParentSize = other.shareParentSize; + ciipFlag = other.ciipFlag; return *this; } @@ -583,6 +706,11 @@ TransformUnit::TransformUnit(const UnitArea& unit) : UnitArea(unit), cu(nullptr) m_pcmbuf[i] = nullptr; } + for (unsigned i = 0; i < MAX_NUM_TBLOCKS - 1; i++) + { + m_runType[i] = nullptr; + } + initData(); } @@ -594,6 +722,11 @@ TransformUnit::TransformUnit(const ChromaFormat _chromaFormat, const Area &_area m_pcmbuf[i] = nullptr; } + for (unsigned i = 0; i < MAX_NUM_TBLOCKS - 1; i++) + { + m_runType[i] = nullptr; + } + initData(); } @@ -604,14 +737,14 @@ void TransformUnit::initData() cbf[i] = 0; rdpcm[i] = NUMBER_OF_RDPCM_MODES; compAlpha[i] = 0; + mtsIdx[i] = MTS_DCT2_DCT2; } depth = 0; - mtsIdx = 0; noResidual = false; + jointCbCr = 0; m_chromaResScaleInv = 0; } - -void TransformUnit::init(TCoeff **coeffs, Pel **pcmbuf) +void TransformUnit::init(TCoeff **coeffs, Pel **pcmbuf, bool **runType) { uint32_t numBlocks = getNumberValidTBlocks(*cs->pcv); @@ -620,6 +753,11 @@ void TransformUnit::init(TCoeff **coeffs, Pel **pcmbuf) m_coeffs[i] = coeffs[i]; m_pcmbuf[i] = pcmbuf[i]; } + + for (uint32_t i = 0; i < numBlocks - 1; i++) + { + m_runType[i] = runType[i]; + } } TransformUnit& TransformUnit::operator=(const TransformUnit& other) @@ -635,14 +773,18 @@ TransformUnit& TransformUnit::operator=(const TransformUnit& other) if (m_coeffs[i] && other.m_coeffs[i] && m_coeffs[i] != other.m_coeffs[i]) memcpy(m_coeffs[i], other.m_coeffs[i], sizeof(TCoeff) * area); if (m_pcmbuf[i] && other.m_pcmbuf[i] && m_pcmbuf[i] != other.m_pcmbuf[i]) memcpy(m_pcmbuf[i], other.m_pcmbuf[i], sizeof(Pel ) * area); - + if (cu->slice->getSPS()->getPLTMode() && i < 2) + { + if (m_runType[i] && other.m_runType[i] && m_runType[i] != other.m_runType[i] ) memcpy(m_runType[i], other.m_runType[i], sizeof(bool) * area); + } cbf[i] = other.cbf[i]; rdpcm[i] = other.rdpcm[i]; compAlpha[i] = other.compAlpha[i]; + mtsIdx[i] = other.mtsIdx[i]; } depth = other.depth; - mtsIdx = other.mtsIdx; noResidual = other.noResidual; + jointCbCr = other.jointCbCr; return *this; } @@ -656,14 +798,19 @@ void TransformUnit::copyComponentFrom(const TransformUnit& other, const Componen if (m_coeffs[i] && other.m_coeffs[i] && m_coeffs[i] != other.m_coeffs[i]) memcpy(m_coeffs[i], other.m_coeffs[i], sizeof(TCoeff) * area); if (m_pcmbuf[i] && other.m_pcmbuf[i] && m_pcmbuf[i] != other.m_pcmbuf[i]) memcpy(m_pcmbuf[i], other.m_pcmbuf[i], sizeof(Pel ) * area); + if ((i == COMPONENT_Y || i == COMPONENT_Cb)) + { + if (m_runType[i] && other.m_runType[i] && m_runType[i] != other.m_runType[i]) memcpy(m_runType[i], other.m_runType[i], sizeof(bool) * area); + } cbf[i] = other.cbf[i]; rdpcm[i] = other.rdpcm[i]; compAlpha[i] = other.compAlpha[i]; depth = other.depth; - mtsIdx = isLuma( i ) ? other.mtsIdx : mtsIdx; + mtsIdx[i] = other.mtsIdx[i]; noResidual = other.noResidual; + jointCbCr = isChroma( i ) ? other.jointCbCr : jointCbCr; } CoeffBuf TransformUnit::getCoeffs(const ComponentID id) { return CoeffBuf(m_coeffs[id], blocks[id]); } @@ -672,6 +819,18 @@ const CCoeffBuf TransformUnit::getCoeffs(const ComponentID id) const { return CC PelBuf TransformUnit::getPcmbuf(const ComponentID id) { return PelBuf (m_pcmbuf[id], blocks[id]); } const CPelBuf TransformUnit::getPcmbuf(const ComponentID id) const { return CPelBuf (m_pcmbuf[id], blocks[id]); } + PelBuf TransformUnit::getcurPLTIdx(const ComponentID id) { return PelBuf(m_pcmbuf[id], blocks[id]); } +const CPelBuf TransformUnit::getcurPLTIdx(const ComponentID id) const { return CPelBuf(m_pcmbuf[id], blocks[id]); } + + PLTtypeBuf TransformUnit::getrunType (const ComponentID id) { return PLTtypeBuf(m_runType[id], blocks[id]); } +const CPLTtypeBuf TransformUnit::getrunType (const ComponentID id) const { return CPLTtypeBuf(m_runType[id], blocks[id]); } + + PLTescapeBuf TransformUnit::getescapeValue(const ComponentID id) { return PLTescapeBuf(m_coeffs[id], blocks[id]); } +const CPLTescapeBuf TransformUnit::getescapeValue(const ComponentID id) const { return CPLTescapeBuf(m_coeffs[id], blocks[id]); } + + Pel* TransformUnit::getPLTIndex (const ComponentID id) { return m_pcmbuf[id]; } + bool* TransformUnit::getRunTypes (const ComponentID id) { return m_runType[id]; } + void TransformUnit::checkTuNoResidual( unsigned idx ) { if( CU::getSbtIdx( cu->sbtInfo ) == SBT_OFF_DCT ) @@ -684,5 +843,23 @@ void TransformUnit::checkTuNoResidual( unsigned idx ) noResidual = true; } } + +int TransformUnit::getTbAreaAfterCoefZeroOut(ComponentID compID) const +{ + int tbArea = blocks[compID].width * blocks[compID].height; + int tbZeroOutWidth = blocks[compID].width; + int tbZeroOutHeight = blocks[compID].height; + + if ( cs->sps->getUseMTS() && cu->sbtInfo != 0 && blocks[compID].width <= 32 && blocks[compID].height <= 32 && compID == COMPONENT_Y ) + { + tbZeroOutWidth = (blocks[compID].width == 32) ? 16 : tbZeroOutWidth; + tbZeroOutHeight = (blocks[compID].height == 32) ? 16 : tbZeroOutHeight; + } + tbZeroOutWidth = std::min<int>(JVET_C0024_ZERO_OUT_TH, tbZeroOutWidth); + tbZeroOutHeight = std::min<int>(JVET_C0024_ZERO_OUT_TH, tbZeroOutHeight); + tbArea = tbZeroOutWidth * tbZeroOutHeight; + return tbArea; +} + int TransformUnit::getChromaAdj() const { return m_chromaResScaleInv; } void TransformUnit::setChromaAdj(int i) { m_chromaResScaleInv = i; } diff --git a/source/Lib/CommonLib/Unit.h b/source/Lib/CommonLib/Unit.h index 69542a605ebba55693780746e2b43ec17b4dc603..91130a7221d62eb12ddbc15dbdbfa5d5caccd706 100644 --- a/source/Lib/CommonLib/Unit.h +++ b/source/Lib/CommonLib/Unit.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * -* Copyright (c) 2010-2019, ITU/ISO/IEC +* Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -48,7 +48,10 @@ // --------------------------------------------------------------------------- // tools // --------------------------------------------------------------------------- - +struct PLTBuf { + uint8_t curPLTSize[MAX_NUM_CHANNEL_TYPE]; + Pel curPLT[MAX_NUM_COMPONENT][MAXPLTPREDSIZE]; +}; inline Position recalcPosition(const ChromaFormat _cf, const ComponentID srcCId, const ComponentID dstCId, const Position &pos) { if( toChannelType( srcCId ) == toChannelType( dstCId ) ) @@ -267,9 +270,8 @@ struct UnitAreaRelative : public UnitArea }; class SPS; -#if HEVC_VPS class VPS; -#endif +class DPS; class PPS; class Slice; @@ -299,27 +301,38 @@ struct CodingUnit : public UnitArea int8_t chromaQpAdj; int8_t qp; SplitSeries splitSeries; + TreeType treeType; + ModeType modeType; + ModeTypeSeries modeTypeSeries; bool skip; bool mmvdSkip; bool affine; int affineType; + bool colorTransform; bool triangle; - bool transQuantBypass; - bool ipcm; + int bdpcmMode; + int bdpcmModeChroma; uint8_t imv; bool rootCbf; uint8_t sbtInfo; -#if HEVC_TILES_WPP uint32_t tileIdx; -#endif - uint8_t GBiIdx; + uint8_t mtsFlag; + uint32_t lfnstIdx; + uint8_t BcwIdx; int refIdxBi[2]; + bool mipFlag; + // needed for fast imv mode decisions int8_t imvNumCand; - Position shareParentPos; - Size shareParentSize; uint8_t smvdMode; uint8_t ispMode; + bool useEscape[MAX_NUM_CHANNEL_TYPE]; + bool useRotation[MAX_NUM_CHANNEL_TYPE]; + bool reuseflag[MAX_NUM_CHANNEL_TYPE][MAXPLTPREDSIZE]; + uint8_t lastPLTSize[MAX_NUM_CHANNEL_TYPE]; + uint8_t reusePLTSize[MAX_NUM_CHANNEL_TYPE]; + uint8_t curPLTSize[MAX_NUM_CHANNEL_TYPE]; + Pel curPLT[MAX_NUM_COMPONENT][MAXPLTSIZE]; CodingUnit() : chType( CH_L ) { } CodingUnit(const UnitArea &unit); @@ -337,7 +350,7 @@ struct CodingUnit : public UnitArea TransformUnit *firstTU; TransformUnit *lastTU; -#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM +#if ENABLE_SPLIT_PARALLELISM int64_t cacheId; bool cacheUsed; @@ -348,6 +361,10 @@ struct CodingUnit : public UnitArea void setSbtPos( uint8_t pos ) { CHECK( pos >= 4, "sbt_pos wrong" ); sbtInfo = ( pos << 4 ) + ( sbtInfo & 0xcf ); } uint8_t getSbtTuSplit() const; const uint8_t checkAllowedSbt() const; + const bool checkCCLMAllowed() const; + const bool isSepTree() const; + const bool isConsInter() const { return modeType == MODE_TYPE_INTER; } + const bool isConsIntra() const { return modeType == MODE_TYPE_INTRA; } }; // --------------------------------------------------------------------------- @@ -357,12 +374,14 @@ struct CodingUnit : public UnitArea struct IntraPredictionData { uint32_t intraDir[MAX_NUM_CHANNEL_TYPE]; + bool mipTransposedFlag; int multiRefIdx; }; struct InterPredictionData { bool mergeFlag; + bool regularMergeFlag; uint8_t mergeIdx; uint8_t triangleSplitDir; uint8_t triangleMergeIdx0; @@ -380,10 +399,8 @@ struct InterPredictionData Mv mvdL0SubPu[MAX_NUM_SUBCU_DMVR]; Mv mvdAffi [NUM_REF_PIC_LIST_01][3]; Mv mvAffi[NUM_REF_PIC_LIST_01][3]; - bool mhIntraFlag; + bool ciipFlag; - Position shareParentPos; - Size shareParentSize; Mv bv; // block vector for IBC Mv bvd; // block vector difference for IBC uint8_t mmvdEncOptMode; // 0: no action 1: skip chroma MC for MMVD candidate pre-selection 2: skip chroma MC and BIO for MMVD candidate pre-selection @@ -408,8 +425,6 @@ struct PredictionUnit : public UnitArea, public IntraPredictionData, public Inte PredictionUnit& operator=(const MotionInfo& mi); unsigned idx; - Position shareParentPos; - Size shareParentSize; PredictionUnit *next; @@ -419,7 +434,7 @@ struct PredictionUnit : public UnitArea, public IntraPredictionData, public Inte MotionBuf getMotionBuf(); CMotionBuf getMotionBuf() const; -#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM +#if ENABLE_SPLIT_PARALLELISM int64_t cacheId; bool cacheUsed; @@ -438,8 +453,9 @@ struct TransformUnit : public UnitArea int m_chromaResScaleInv; uint8_t depth; - uint8_t mtsIdx; + uint8_t mtsIdx [ MAX_NUM_TBLOCKS ]; bool noResidual; + uint8_t jointCbCr; uint8_t cbf [ MAX_NUM_TBLOCKS ]; RDPCMMode rdpcm [ MAX_NUM_TBLOCKS ]; int8_t compAlpha [ MAX_NUM_TBLOCKS ]; @@ -453,12 +469,12 @@ struct TransformUnit : public UnitArea unsigned idx; TransformUnit *next; TransformUnit *prev; - - void init(TCoeff **coeffs, Pel **pcmbuf); + void init(TCoeff **coeffs, Pel **pcmbuf, bool **runType); TransformUnit& operator=(const TransformUnit& other); void copyComponentFrom (const TransformUnit& other, const ComponentID compID); void checkTuNoResidual( unsigned idx ); + int getTbAreaAfterCoefZeroOut(ComponentID compID) const; CoeffBuf getCoeffs(const ComponentID id); const CCoeffBuf getCoeffs(const ComponentID id) const; @@ -466,8 +482,16 @@ struct TransformUnit : public UnitArea const CPelBuf getPcmbuf(const ComponentID id) const; int getChromaAdj( ) const; void setChromaAdj(int i); - -#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM + PelBuf getcurPLTIdx(const ComponentID id); + const CPelBuf getcurPLTIdx(const ComponentID id) const; + PLTtypeBuf getrunType(const ComponentID id); + const CPLTtypeBuf getrunType(const ComponentID id) const; + PLTescapeBuf getescapeValue(const ComponentID id); + const CPLTescapeBuf getescapeValue(const ComponentID id) const; + Pel* getPLTIndex(const ComponentID id); + bool* getRunTypes(const ComponentID id); + +#if ENABLE_SPLIT_PARALLELISM int64_t cacheId; bool cacheUsed; @@ -475,6 +499,7 @@ struct TransformUnit : public UnitArea private: TCoeff *m_coeffs[ MAX_NUM_TBLOCKS ]; Pel *m_pcmbuf[ MAX_NUM_TBLOCKS ]; + bool *m_runType[ MAX_NUM_TBLOCKS - 1 ]; }; // --------------------------------------------------------------------------- diff --git a/source/Lib/CommonLib/UnitPartitioner.cpp b/source/Lib/CommonLib/UnitPartitioner.cpp index 36c65850bbee50ca31c0d0974a9c94f984ee8f53..71b35c0b274b80d1913d2f3705fb67d415048ae5 100644 --- a/source/Lib/CommonLib/UnitPartitioner.cpp +++ b/source/Lib/CommonLib/UnitPartitioner.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * -* Copyright (c) 2010-2019, ITU/ISO/IEC +* Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -55,6 +55,7 @@ PartLevel::PartLevel() , canQtSplit ( true ) , qgEnable ( true ) , qgChromaEnable ( true ) +, modeType ( MODE_TYPE_ALL ) { } @@ -69,6 +70,7 @@ PartLevel::PartLevel( const PartSplit _split, const Partitioning& _parts ) , canQtSplit ( true ) , qgEnable ( true ) , qgChromaEnable ( true ) +, modeType ( MODE_TYPE_ALL ) { } @@ -83,6 +85,7 @@ PartLevel::PartLevel( const PartSplit _split, Partitioning&& _parts ) , canQtSplit ( true ) , qgEnable ( true ) , qgChromaEnable ( true ) +, modeType ( MODE_TYPE_ALL ) { } @@ -106,6 +109,27 @@ SplitSeries Partitioner::getSplitSeries() const return splitSeries; } +ModeTypeSeries Partitioner::getModeTypeSeries() const +{ + ModeTypeSeries modeTypeSeries = 0; + int depth = 0; + + for( const auto &level : m_partStack ) + { + if( level.split == CTU_LEVEL ) continue; + else modeTypeSeries += static_cast<int>(level.modeType) << (depth * 3); + + depth++; + } + + return modeTypeSeries; +} + +bool Partitioner::isSepTree( const CodingStructure &cs ) +{ + return treeType != TREE_D || CS::isDualITree( cs ); +} + void Partitioner::setCUData( CodingUnit& cu ) { cu.depth = currDepth; @@ -113,6 +137,7 @@ void Partitioner::setCUData( CodingUnit& cu ) cu.mtDepth = currMtDepth; cu.qtDepth = currQtDepth; cu.splitSeries = getSplitSeries(); + cu.modeTypeSeries = getModeTypeSeries(); } void Partitioner::copyState( const Partitioner& other ) @@ -141,22 +166,15 @@ void Partitioner::copyState( const Partitioner& other ) void AdaptiveDepthPartitioner::setMaxMinDepth( unsigned& minDepth, unsigned& maxDepth, const CodingStructure& cs ) const { unsigned stdMinDepth = 0; - unsigned stdMaxDepth = ( g_aucLog2[cs.sps->getCTUSize()] - g_aucLog2[cs.sps->getMinQTSize( cs.slice->getSliceType(), chType )]); + unsigned stdMaxDepth = ( floorLog2(cs.sps->getCTUSize()) - floorLog2(cs.sps->getMinQTSize( cs.slice->getSliceType(), chType ))); const Position pos = currArea().blocks[chType].pos(); const unsigned curSliceIdx = cs.slice->getIndependentSliceIdx(); -#if HEVC_TILES_WPP - const unsigned curTileIdx = cs.picture->tileMap->getTileIdxMap( currArea().lumaPos() ); - - const CodingUnit* cuLeft = cs.getCURestricted( pos.offset( -1, 0 ), curSliceIdx, curTileIdx, chType ); - const CodingUnit* cuBelowLeft = cs.getCURestricted( pos.offset( -1, currArea().blocks[chType].height), curSliceIdx, curTileIdx, chType ); - const CodingUnit* cuAbove = cs.getCURestricted( pos.offset( 0, -1 ), curSliceIdx, curTileIdx, chType ); - const CodingUnit* cuAboveRight = cs.getCURestricted( pos.offset( currArea().blocks[chType].width, -1 ), curSliceIdx, curTileIdx, chType ); -#else - const CodingUnit* cuLeft = cs.getCURestricted( pos.offset( -1, 0 ), curSliceIdx, chType ); - const CodingUnit* cuBelowLeft = cs.getCURestricted( pos.offset( -1, currArea().blocks[chType].height), curSliceIdx, chType ); - const CodingUnit* cuAbove = cs.getCURestricted( pos.offset( 0, -1 ), curSliceIdx, chType ); - const CodingUnit* cuAboveRight = cs.getCURestricted( pos.offset( currArea().blocks[chType].width, -1 ), curSliceIdx, chType ); -#endif + const unsigned curTileIdx = cs.pps->getTileIdx( currArea().lumaPos() ); + + const CodingUnit* cuLeft = cs.getCURestricted( pos.offset( -1, 0 ), pos, curSliceIdx, curTileIdx, chType ); + const CodingUnit* cuBelowLeft = cs.getCURestricted( pos.offset( -1, currArea().blocks[chType].height), pos, curSliceIdx, curTileIdx, chType ); + const CodingUnit* cuAbove = cs.getCURestricted( pos.offset( 0, -1 ), pos, curSliceIdx, curTileIdx, chType ); + const CodingUnit* cuAboveRight = cs.getCURestricted( pos.offset( currArea().blocks[chType].width, -1 ), pos, curSliceIdx, curTileIdx, chType ); minDepth = stdMaxDepth; maxDepth = stdMinDepth; @@ -241,6 +259,8 @@ void QTBTPartitioner::initCtu( const UnitArea& ctuArea, const ChannelType _chTyp m_partStack.clear(); m_partStack.push_back( PartLevel( CTU_LEVEL, Partitioning{ ctuArea } ) ); + treeType = TREE_D; + modeType = MODE_TYPE_ALL; } void QTBTPartitioner::splitCurrArea( const PartSplit split, const CodingStructure& cs ) @@ -256,14 +276,17 @@ void QTBTPartitioner::splitCurrArea( const PartSplit split, const CodingStructur { case CU_QUAD_SPLIT: m_partStack.push_back( PartLevel( split, PartitionerImpl::getCUSubPartitions( currArea(), cs ) ) ); + m_partStack.back().modeType = modeType; break; case CU_HORZ_SPLIT: case CU_VERT_SPLIT: m_partStack.push_back( PartLevel( split, PartitionerImpl::getCUSubPartitions( currArea(), cs, split ) ) ); + m_partStack.back().modeType = modeType; break; case CU_TRIH_SPLIT: case CU_TRIV_SPLIT: m_partStack.push_back( PartLevel( split, PartitionerImpl::getCUSubPartitions( currArea(), cs, split ) ) ); + m_partStack.back().modeType = modeType; break; case TU_MAX_TR_SPLIT: m_partStack.push_back( PartLevel( split, PartitionerImpl::getMaxTuTiling( currArea(), cs ) ) ); @@ -325,8 +348,8 @@ void QTBTPartitioner::splitCurrArea( const PartSplit split, const CodingStructur currQtDepth++; currSubdiv++; } - qgEnable &= (currSubdiv <= cs.pps->getCuQpDeltaSubdiv()); - qgChromaEnable &= (currSubdiv <= cs.pps->getPpsRangeExtension().getCuChromaQpOffsetSubdiv()); + qgEnable &= (currSubdiv <= cs.slice->getCuQpDeltaSubdiv()); + qgChromaEnable &= (currSubdiv <= cs.slice->getCuChromaQpOffsetSubdiv()); m_partStack.back().qgEnable = qgEnable; m_partStack.back().qgChromaEnable = qgChromaEnable; if (qgEnable) @@ -351,6 +374,7 @@ void QTBTPartitioner::canSplit( const CodingStructure &cs, bool& canNo, bool& ca // the minimal and maximal sizes are given in luma samples const CompArea& area = currArea().Y(); + const CompArea& areaC = currArea().Cb(); PartLevel& level = m_partStack.back(); const PartSplit lastSplit = level.split; @@ -359,14 +383,19 @@ void QTBTPartitioner::canSplit( const CodingStructure &cs, bool& canNo, bool& ca // don't allow QT-splitting below a BT split if( lastSplit != CTU_LEVEL && lastSplit != CU_QUAD_SPLIT ) canQt = false; if( area.width <= minQtSize ) canQt = false; - + if( chType == CHANNEL_TYPE_CHROMA && areaC.width <= MIN_DUALTREE_CHROMA_WIDTH ) canQt = false; + if( treeType == TREE_C ) + { + canQt = canBh = canTh = canBv = canTv = false; + return; + } if( implicitSplit != CU_DONT_SPLIT ) { canNo = canTh = canTv = false; canBh = implicitSplit == CU_HORZ_SPLIT; canBv = implicitSplit == CU_VERT_SPLIT; - + if (chType == CHANNEL_TYPE_CHROMA && areaC.width == 4) canBv = false; return; } @@ -394,30 +423,34 @@ void QTBTPartitioner::canSplit( const CodingStructure &cs, bool& canNo, bool& ca return; } + if( area.width > maxBtSize || area.height > maxBtSize ) + { + canBh = canBv = false; + } + // specific check for BT splits - if( area.height <= minBtSize || area.height > maxBtSize ) canBh = false; + if( area.height <= minBtSize ) canBh = false; if( area.width > MAX_TB_SIZEY && area.height <= MAX_TB_SIZEY ) canBh = false; - - if( area.width <= minBtSize || area.width > maxBtSize ) canBv = false; + if( chType == CHANNEL_TYPE_CHROMA && areaC.width * areaC.height <= MIN_DUALTREE_CHROMA_SIZE ) canBh = false; + if( area.width <= minBtSize ) canBv = false; if( area.width <= MAX_TB_SIZEY && area.height > MAX_TB_SIZEY ) canBv = false; - + if (chType == CHANNEL_TYPE_CHROMA && (areaC.width * areaC.height <= MIN_DUALTREE_CHROMA_SIZE || areaC.width == 4)) canBv = false; + if( modeType == MODE_TYPE_INTER && area.width * area.height == 32 ) canBv = canBh = false; if( area.height <= 2 * minTtSize || area.height > maxTtSize || area.width > maxTtSize ) canTh = false; if( area.width > MAX_TB_SIZEY || area.height > MAX_TB_SIZEY ) canTh = false; - + if( chType == CHANNEL_TYPE_CHROMA && areaC.width * areaC.height <= MIN_DUALTREE_CHROMA_SIZE*2 ) canTh = false; if( area.width <= 2 * minTtSize || area.width > maxTtSize || area.height > maxTtSize ) canTv = false; if( area.width > MAX_TB_SIZEY || area.height > MAX_TB_SIZEY ) canTv = false; + if (chType == CHANNEL_TYPE_CHROMA && (areaC.width * areaC.height <= MIN_DUALTREE_CHROMA_SIZE * 2 || areaC.width == 8)) canTv = false; + if( modeType == MODE_TYPE_INTER && area.width * area.height == 64 ) canTv = canTh = false; } bool QTBTPartitioner::canSplit( const PartSplit split, const CodingStructure &cs ) { const CompArea area = currArea().Y(); -#if MAX_TB_SIZE_SIGNALLING const unsigned maxTrSize = cs.sps->getMaxTbSize(); -#else - const unsigned maxTrSize = MAX_TB_SIZEY; -#endif bool canNo, canQt, canBh, canTh, canBv, canTv; @@ -723,16 +756,6 @@ bool TUIntraSubPartitioner::canSplit( const PartSplit split, const CodingStructu } } - -////////////////////////////////////////////////////////////////////////// -// PartitionerFactory -////////////////////////////////////////////////////////////////////////// - -Partitioner* PartitionerFactory::get( const Slice& slice ) -{ - return new QTBTPartitioner; -} - ////////////////////////////////////////////////////////////////////////// // Partitioner methods describing the actual partitioning logic ////////////////////////////////////////////////////////////////////////// @@ -935,11 +958,11 @@ void PartitionerImpl::getTUIntraSubPartitions( Partitioning &sub, const UnitArea uint32_t nPartitions; uint32_t splitDimensionSize = CU::getISPSplitDim( tuArea.lumaSize().width, tuArea.lumaSize().height, splitType ); - bool isDualTree = CS::isDualITree( cs ); + bool isDualTree = CS::isDualITree( cs ) || cs.treeType != TREE_D; if( splitType == TU_1D_HORZ_SPLIT ) { - nPartitions = tuArea.lumaSize().height >> g_aucLog2[splitDimensionSize]; + nPartitions = tuArea.lumaSize().height >> floorLog2(splitDimensionSize); sub.resize( nPartitions ); @@ -956,7 +979,7 @@ void PartitionerImpl::getTUIntraSubPartitions( Partitioning &sub, const UnitArea } else if( splitType == TU_1D_VERT_SPLIT ) { - nPartitions = tuArea.lumaSize().width >> g_aucLog2[splitDimensionSize]; + nPartitions = tuArea.lumaSize().width >> floorLog2(splitDimensionSize); sub.resize( nPartitions ); @@ -1005,9 +1028,9 @@ static const int g_zScanToY[1 << ( g_maxRtGridSize << 1 )] = 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3, 4, 4, 5, 5, 4, 4, 5, 5, - 6, 6, 7, 7, 6, 5, 7, 7, + 6, 6, 7, 7, 6, 6, 7, 7, 4, 4, 5, 5, 4, 4, 5, 5, - 6, 6, 7, 7, 6, 5, 7, 7, + 6, 6, 7, 7, 6, 6, 7, 7, }; static const int g_rsScanToZ[1 << ( g_maxRtGridSize << 1 )] = { @@ -1025,12 +1048,8 @@ Partitioning PartitionerImpl::getMaxTuTiling( const UnitArea &cuArea, const Codi { static_assert( MAX_LOG2_DIFF_CU_TR_SIZE <= g_maxRtGridSize, "Z-scan tables are only provided for MAX_LOG2_DIFF_CU_TR_SIZE for up to 3 (8x8 tiling)!" ); - const CompArea area = cuArea.Y().valid() ? cuArea.Y() : cuArea.Cb(); -#if MAX_TB_SIZE_SIGNALLING - const int maxTrSize = cs.sps->getMaxTbSize() >> ( isLuma( area.compID ) ? 0 : 1 ); -#else - const int maxTrSize = MAX_TB_SIZEY >> ( isLuma( area.compID ) ? 0 : 1 ); -#endif + const Size area = cuArea.lumaSize(); + const int maxTrSize = (area.width>64 || area.height>64) ? 64 : cs.sps->getMaxTbSize(); const int numTilesH = std::max<int>( 1, area.width / maxTrSize ); const int numTilesV = std::max<int>( 1, area.height / maxTrSize ); const int numTiles = numTilesH * numTilesV; diff --git a/source/Lib/CommonLib/UnitPartitioner.h b/source/Lib/CommonLib/UnitPartitioner.h index 4fbe68f312f0629ac25932646445a26e74cc0a09..590947759772b858bdbf5e3a047afbd3021aca77 100644 --- a/source/Lib/CommonLib/UnitPartitioner.h +++ b/source/Lib/CommonLib/UnitPartitioner.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * -* Copyright (c) 2010-2019, ITU/ISO/IEC +* Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -94,6 +94,7 @@ struct PartLevel bool canQtSplit; bool qgEnable; bool qgChromaEnable; + int modeType; PartLevel(); PartLevel( const PartSplit _split, const Partitioning& _parts ); @@ -123,6 +124,8 @@ public: unsigned currImplicitBtDepth; ChannelType chType; + TreeType treeType; + ModeType modeType; virtual ~Partitioner () { } @@ -134,6 +137,7 @@ public: const bool currQgChromaEnable () const { return currPartLevel().qgChromaEnable; } SplitSeries getSplitSeries () const; + ModeTypeSeries getModeTypeSeries () const; virtual void initCtu ( const UnitArea& ctuArea, const ChannelType _chType, const Slice& slice ) = 0; virtual void splitCurrArea ( const PartSplit split, const CodingStructure &cs ) = 0; @@ -150,6 +154,9 @@ public: virtual bool canSplit ( const PartSplit split, const CodingStructure &cs ) = 0; virtual bool isSplitImplicit ( const PartSplit split, const CodingStructure &cs ) = 0; virtual PartSplit getImplicitSplit ( const CodingStructure &cs ) = 0; + bool isSepTree ( const CodingStructure &cs ); + bool isConsInter () { return modeType == MODE_TYPE_INTER; } + bool isConsIntra () { return modeType == MODE_TYPE_INTRA; } }; class AdaptiveDepthPartitioner : public Partitioner @@ -190,6 +197,8 @@ public: #if _DEBUG m_currArea = _initialState.currArea(); #endif + treeType = _initialState.treeType; + modeType = _initialState.modeType; } void initCtu (const UnitArea& ctuArea, const ChannelType chType, const Slice& slice) {}; // not needed @@ -203,14 +212,6 @@ public: PartSplit getImplicitSplit (const CodingStructure &cs) { return CU_DONT_SPLIT; }; //not needed }; - - - -namespace PartitionerFactory -{ - Partitioner* get( const Slice& slice ); -}; - ////////////////////////////////////////////////////////////////////////// // Partitioner namespace - contains methods calculating the actual splits ////////////////////////////////////////////////////////////////////////// diff --git a/source/Lib/CommonLib/UnitTools.cpp b/source/Lib/CommonLib/UnitTools.cpp index 534ec8017a984c5543336823cc3209d91520d3f8..b11d68861538736236a89972506bbaba0f5aa877 100644 --- a/source/Lib/CommonLib/UnitTools.cpp +++ b/source/Lib/CommonLib/UnitTools.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -58,12 +58,12 @@ uint64_t CS::getEstBits(const CodingStructure &cs) bool CS::isDualITree( const CodingStructure &cs ) { - return cs.slice->isIRAP() && !cs.pcv->ISingleTree; + return cs.slice->isIntra() && !cs.pcv->ISingleTree; } UnitArea CS::getArea( const CodingStructure &cs, const UnitArea &area, const ChannelType chType ) { - return isDualITree( cs ) ? area.singleChan( chType ) : area; + return isDualITree( cs ) || cs.treeType != TREE_D ? area.singleChan( chType ) : area; } void CS::setRefinedMotionField(CodingStructure &cs) { @@ -87,6 +87,8 @@ void CS::setRefinedMotionField(CodingStructure &cs) subPu.mv[1] = pu.mv[1]; subPu.mv[REF_PIC_LIST_0] += pu.mvdL0SubPu[num]; subPu.mv[REF_PIC_LIST_1] -= pu.mvdL0SubPu[num]; + subPu.mv[REF_PIC_LIST_0].clipToStorageBitDepth(); + subPu.mv[REF_PIC_LIST_1].clipToStorageBitDepth(); pu.mvdL0SubPu[num].setZero(); num++; PU::spanMotionInfo(subPu); @@ -98,6 +100,36 @@ void CS::setRefinedMotionField(CodingStructure &cs) } // CU tools +bool CU::getRprScaling( const SPS* sps, const PPS* curPPS, Picture* refPic, int& xScale, int& yScale ) +{ + const Window& curScalingWindow = curPPS->getScalingWindow(); +#if JVET_Q0487_SCALING_WINDOW_ISSUES + int curPicWidth = curPPS->getPicWidthInLumaSamples() - SPS::getWinUnitX( sps->getChromaFormatIdc() ) * (curScalingWindow.getWindowLeftOffset() + curScalingWindow.getWindowRightOffset()); + int curPicHeight = curPPS->getPicHeightInLumaSamples() - SPS::getWinUnitY( sps->getChromaFormatIdc() ) * (curScalingWindow.getWindowTopOffset() + curScalingWindow.getWindowBottomOffset()); +#else + int curPicWidth = curPPS->getPicWidthInLumaSamples() - curScalingWindow.getWindowLeftOffset() - curScalingWindow.getWindowRightOffset(); + int curPicHeight = curPPS->getPicHeightInLumaSamples() - curScalingWindow.getWindowTopOffset() - curScalingWindow.getWindowBottomOffset(); +#endif + + const Window& refScalingWindow = refPic->getScalingWindow(); +#if JVET_Q0487_SCALING_WINDOW_ISSUES + int refPicWidth = refPic->getPicWidthInLumaSamples() - SPS::getWinUnitX( sps->getChromaFormatIdc() ) * (refScalingWindow.getWindowLeftOffset() + refScalingWindow.getWindowRightOffset()); + int refPicHeight = refPic->getPicHeightInLumaSamples() - SPS::getWinUnitY( sps->getChromaFormatIdc() ) * (refScalingWindow.getWindowTopOffset() + refScalingWindow.getWindowBottomOffset()); +#else + int refPicWidth = refPic->getPicWidthInLumaSamples() - refScalingWindow.getWindowLeftOffset() - refScalingWindow.getWindowRightOffset(); + int refPicHeight = refPic->getPicHeightInLumaSamples() - refScalingWindow.getWindowTopOffset() - refScalingWindow.getWindowBottomOffset(); +#endif + + xScale = ( ( refPicWidth << SCALE_RATIO_BITS ) + ( curPicWidth >> 1 ) ) / curPicWidth; + yScale = ( ( refPicHeight << SCALE_RATIO_BITS ) + ( curPicHeight >> 1 ) ) / curPicHeight; + +#if JVET_Q0487_SCALING_WINDOW_ISSUES + return refPic->isRefScaled( curPPS ); +#else + return refPicWidth != curPicWidth || refPicHeight != curPicHeight; +#endif +} + bool CU::isIntra(const CodingUnit &cu) { return cu.predMode == MODE_INTRA; @@ -113,36 +145,36 @@ bool CU::isIBC(const CodingUnit &cu) return cu.predMode == MODE_IBC; } -bool CU::isRDPCMEnabled(const CodingUnit& cu) +bool CU::isPLT(const CodingUnit &cu) { - return cu.cs->sps->getSpsRangeExtension().getRdpcmEnabledFlag(cu.predMode == MODE_INTRA ? RDPCM_SIGNAL_IMPLICIT : RDPCM_SIGNAL_EXPLICIT); + return cu.predMode == MODE_PLT; } -bool CU::isLosslessCoded(const CodingUnit &cu) +bool CU::isRDPCMEnabled(const CodingUnit& cu) { - return cu.cs->pps->getTransquantBypassEnabledFlag() && cu.transQuantBypass; + return cu.cs->sps->getSpsRangeExtension().getRdpcmEnabledFlag(cu.predMode == MODE_INTRA ? RDPCM_SIGNAL_IMPLICIT : RDPCM_SIGNAL_EXPLICIT); } + bool CU::isSameSlice(const CodingUnit& cu, const CodingUnit& cu2) { return cu.slice->getIndependentSliceIdx() == cu2.slice->getIndependentSliceIdx(); } -#if HEVC_TILES_WPP bool CU::isSameTile(const CodingUnit& cu, const CodingUnit& cu2) { return cu.tileIdx == cu2.tileIdx; } + bool CU::isSameSliceAndTile(const CodingUnit& cu, const CodingUnit& cu2) { return ( cu.slice->getIndependentSliceIdx() == cu2.slice->getIndependentSliceIdx() ) && ( cu.tileIdx == cu2.tileIdx ); } -#endif bool CU::isSameCtu(const CodingUnit& cu, const CodingUnit& cu2) { - uint32_t ctuSizeBit = g_aucLog2[cu.cs->sps->getMaxCUWidth()]; + uint32_t ctuSizeBit = floorLog2(cu.cs->sps->getMaxCUWidth()); Position pos1Ctu(cu.lumaPos().x >> ctuSizeBit, cu.lumaPos().y >> ctuSizeBit); Position pos2Ctu(cu2.lumaPos().x >> ctuSizeBit, cu2.lumaPos().y >> ctuSizeBit); @@ -150,30 +182,13 @@ bool CU::isSameCtu(const CodingUnit& cu, const CodingUnit& cu2) return pos1Ctu.x == pos2Ctu.x && pos1Ctu.y == pos2Ctu.y; } -uint32_t CU::getIntraSizeIdx(const CodingUnit &cu) -{ - uint8_t uiWidth = cu.lumaSize().width; - - uint32_t uiCnt = 0; - - while (uiWidth) - { - uiCnt++; - uiWidth >>= 1; - } - - uiCnt -= 2; - - return uiCnt > 6 ? 6 : uiCnt; -} - bool CU::isLastSubCUOfCtu( const CodingUnit &cu ) { - const SPS &sps = *cu.cs->sps; - const Area cuAreaY = CS::isDualITree( *cu.cs ) ? Area( recalcPosition( cu.chromaFormat, cu.chType, CHANNEL_TYPE_LUMA, cu.blocks[cu.chType].pos() ), recalcSize( cu.chromaFormat, cu.chType, CHANNEL_TYPE_LUMA, cu.blocks[cu.chType].size() ) ) : ( const Area& ) cu.Y(); + const Area cuAreaY = cu.isSepTree() ? Area( recalcPosition( cu.chromaFormat, cu.chType, CHANNEL_TYPE_LUMA, cu.blocks[cu.chType].pos() ), recalcSize( cu.chromaFormat, cu.chType, CHANNEL_TYPE_LUMA, cu.blocks[cu.chType].size() ) ) : (const Area&)cu.Y(); - return ( ( ( ( cuAreaY.x + cuAreaY.width ) & cu.cs->pcv->maxCUWidthMask ) == 0 || cuAreaY.x + cuAreaY.width == sps.getPicWidthInLumaSamples() ) && - ( ( ( cuAreaY.y + cuAreaY.height ) & cu.cs->pcv->maxCUHeightMask ) == 0 || cuAreaY.y + cuAreaY.height == sps.getPicHeightInLumaSamples() ) ); + + return ( ( ( ( cuAreaY.x + cuAreaY.width ) & cu.cs->pcv->maxCUWidthMask ) == 0 || cuAreaY.x + cuAreaY.width == cu.cs->pps->getPicWidthInLumaSamples() ) && + ( ( ( cuAreaY.y + cuAreaY.height ) & cu.cs->pcv->maxCUHeightMask ) == 0 || cuAreaY.y + cuAreaY.height == cu.cs->pps->getPicHeightInLumaSamples() ) ); } uint32_t CU::getCtuAddr( const CodingUnit &cu ) @@ -185,7 +200,15 @@ int CU::predictQP( const CodingUnit& cu, const int prevQP ) { const CodingStructure &cs = *cu.cs; - if ( !cu.blocks[cu.chType].x && !( cu.blocks[cu.chType].y & ( cs.pcv->maxCUHeightMask >> getChannelTypeScaleY( cu.chType, cu.chromaFormat ) ) ) && ( cs.getCU( cu.blocks[cu.chType].pos().offset( 0, -1 ), cu.chType) != NULL ) ) + uint32_t ctuRsAddr = getCtuAddr( cu ); + uint32_t ctuXPosInCtus = ctuRsAddr % cs.pcv->widthInCtus; + uint32_t tileColIdx = cu.slice->getPPS()->ctuToTileCol( ctuXPosInCtus ); + uint32_t tileXPosInCtus = cu.slice->getPPS()->getTileColumnBd( tileColIdx ); + if( ctuXPosInCtus == tileXPosInCtus && + !( cu.blocks[cu.chType].x & ( cs.pcv->maxCUWidthMask >> getChannelTypeScaleX( cu.chType, cu.chromaFormat ) ) ) && + !( cu.blocks[cu.chType].y & ( cs.pcv->maxCUHeightMask >> getChannelTypeScaleY( cu.chType, cu.chromaFormat ) ) ) && + ( cs.getCU( cu.blocks[cu.chType].pos().offset( 0, -1 ), cu.chType) != NULL ) && + CU::isSameSliceAndTile( *cs.getCU( cu.blocks[cu.chType].pos().offset( 0, -1 ), cu.chType), cu ) ) { return ( ( cs.getCU( cu.blocks[cu.chType].pos().offset( 0, -1 ), cu.chType ) )->qp ); } @@ -217,6 +240,19 @@ void CU::addPUs( CodingUnit& cu ) cu.cs->addPU( CS::getArea( *cu.cs, cu, cu.chType ), cu.chType ); } +void CU::saveMotionInHMVP( const CodingUnit& cu, const bool isToBeDone ) +{ + const PredictionUnit& pu = *cu.firstPU; + + if (!cu.triangle && !cu.affine && !isToBeDone ) + { + MotionInfo mi = pu.getMotionInfo(); + + mi.BcwIdx = (mi.interDir == 3) ? cu.BcwIdx : BCW_DEFAULT; + + cu.cs->addMiToLut(CU::isIBC(cu) ? cu.cs->motionLut.lutIbc : cu.cs->motionLut.lut, mi); + } +} PartSplit CU::getSplitAtDepth( const CodingUnit& cu, const unsigned depth ) { @@ -235,31 +271,14 @@ PartSplit CU::getSplitAtDepth( const CodingUnit& cu, const unsigned depth ) else { THROW( "Unknown split mode" ); return CU_QUAD_SPLIT; } } -bool CU::hasNonTsCodedBlock( const CodingUnit& cu ) +ModeType CU::getModeTypeAtDepth( const CodingUnit& cu, const unsigned depth ) { - bool hasAnyNonTSCoded = false; - - for( auto &currTU : traverseTUs( cu ) ) - { - for( uint32_t i = 0; i < ::getNumberValidTBlocks( *cu.cs->pcv ); i++ ) - { - hasAnyNonTSCoded |= ( currTU.blocks[i].valid() && ( isLuma(ComponentID(i)) ? currTU.mtsIdx != 1 : true ) && TU::getCbf( currTU, ComponentID( i ) ) ); - } - } - - return hasAnyNonTSCoded; + ModeType modeType = ModeType( (cu.modeTypeSeries >> (depth * 3)) & 0x07 ); + CHECK( depth > cu.depth, " depth is wrong" ); + return modeType; } -uint32_t CU::getNumNonZeroCoeffNonTs( const CodingUnit& cu ) -{ - uint32_t count = 0; - for( auto &currTU : traverseTUs( cu ) ) - { - count += TU::getNumNonZeroCoeffsNonTS( currTU ); - } - return count; -} bool CU::divideTuInRows( const CodingUnit &cu ) { @@ -267,75 +286,6 @@ bool CU::divideTuInRows( const CodingUnit &cu ) return cu.ispMode == HOR_INTRA_SUBPARTITIONS ? true : false; } -bool CU::firstTestISPHorSplit( const int width, const int height, const ComponentID compID, const CodingUnit *cuLeft, const CodingUnit *cuAbove ) -{ - //this function decides which split mode (horizontal or vertical) is tested first (encoder only) - //we check the logarithmic aspect ratios of the block - int aspectRatio = g_aucLog2[width] - g_aucLog2[height]; - if( aspectRatio > 0 ) - { - return true; - } - else if( aspectRatio < 0 ) - { - return false; - } - else //if (aspectRatio == 0) - { - //we gather data from the neighboring CUs - const int cuLeftWidth = cuLeft != nullptr ? cuLeft->blocks[compID].width : -1; - const int cuLeftHeight = cuLeft != nullptr ? cuLeft->blocks[compID].height : -1; - const int cuAboveWidth = cuAbove != nullptr ? cuAbove->blocks[compID].width : -1; - const int cuAboveHeight = cuAbove != nullptr ? cuAbove->blocks[compID].height : -1; - const int cuLeft1dSplit = cuLeft != nullptr && cuLeft->predMode == MODE_INTRA ? cuLeft->ispMode : 0; - const int cuAbove1dSplit = cuAbove != nullptr && cuAbove->predMode == MODE_INTRA ? cuAbove->ispMode : 0; - if( cuLeftWidth != -1 && cuAboveWidth == -1 ) - { - int cuLeftAspectRatio = g_aucLog2[cuLeftWidth] - g_aucLog2[cuLeftHeight]; - return cuLeftAspectRatio < 0 ? false : cuLeftAspectRatio > 0 ? true : cuLeft1dSplit == VER_INTRA_SUBPARTITIONS ? false : true; - } - else if( cuLeftWidth == -1 && cuAboveWidth != -1 ) - { - int cuAboveAspectRatio = g_aucLog2[cuAboveWidth] - g_aucLog2[cuAboveHeight]; - return cuAboveAspectRatio < 0 ? false : cuAboveAspectRatio > 0 ? true : cuAbove1dSplit == VER_INTRA_SUBPARTITIONS ? false : true; - } - else if( cuLeftWidth != -1 && cuAboveWidth != -1 ) - { - int cuLeftAspectRatio = g_aucLog2[cuLeftWidth] - g_aucLog2[cuLeftHeight]; - int cuAboveAspectRatio = g_aucLog2[cuAboveWidth] - g_aucLog2[cuAboveHeight]; - if( cuLeftAspectRatio < 0 && cuAboveAspectRatio < 0 ) - { - return false; - } - else if( cuLeftAspectRatio > 0 && cuAboveAspectRatio > 0 ) - { - return true; - } - else if( cuLeftAspectRatio == 0 && cuAboveAspectRatio == 0 ) - { - if( cuLeft1dSplit != 0 && cuAbove1dSplit != 0 ) - { - return cuLeft1dSplit == VER_INTRA_SUBPARTITIONS && cuAbove1dSplit == VER_INTRA_SUBPARTITIONS ? false : true; - } - else if( cuLeft1dSplit != 0 && cuAbove1dSplit == 0 ) - { - return cuLeft1dSplit == VER_INTRA_SUBPARTITIONS ? false : true; - } - else if( cuLeft1dSplit == 0 && cuAbove1dSplit != 0 ) - { - return cuAbove1dSplit == VER_INTRA_SUBPARTITIONS ? false : true; - } - return true; - } - else - { - return cuLeftAspectRatio > cuAboveAspectRatio ? cuLeftAspectRatio > 0 : cuAboveAspectRatio > 0; - } - //return true; - } - return true; - } -} PartSplit CU::getISPType( const CodingUnit &cu, const ComponentID compID ) { @@ -370,43 +320,45 @@ bool CU::isISPFirst( const CodingUnit &cu, const CompArea &tuArea, const Compone return tuArea == cu.firstTU->blocks[compID]; } -ISPType CU::canUseISPSplit( const CodingUnit &cu, const ComponentID compID ) +bool CU::canUseISP( const CodingUnit &cu, const ComponentID compID ) { const int width = cu.blocks[compID].width; const int height = cu.blocks[compID].height; -#if MAX_TB_SIZE_SIGNALLING const int maxTrSize = cu.cs->sps->getMaxTbSize(); -#else - const int maxTrSize = MAX_TB_SIZEY; -#endif - return CU::canUseISPSplit( width, height, maxTrSize ); + return CU::canUseISP( width, height, maxTrSize ); } -ISPType CU::canUseISPSplit( const int width, const int height, const int maxTrSize ) +bool CU::canUseISP( const int width, const int height, const int maxTrSize ) { - bool widthCannotBeUsed = false, heightCannotBeUsed = false; - - const uint32_t minTuSizeForISP = MIN_TB_SIZEY; - bool notEnoughSamplesToSplit = ( g_aucLog2[width] + g_aucLog2[height] <= ( g_aucLog2[minTuSizeForISP] << 1 ) ); - widthCannotBeUsed = width > maxTrSize || notEnoughSamplesToSplit; - heightCannotBeUsed = height > maxTrSize || notEnoughSamplesToSplit; - - if( !widthCannotBeUsed && !heightCannotBeUsed ) - { - return CAN_USE_VER_AND_HORL_SPLITS; //both splits can be used - } - else if( widthCannotBeUsed && !heightCannotBeUsed ) + bool notEnoughSamplesToSplit = ( floorLog2(width) + floorLog2(height) <= ( floorLog2(MIN_TB_SIZEY) << 1 ) ); + bool cuSizeLargerThanMaxTrSize = width > maxTrSize || height > maxTrSize; + if ( notEnoughSamplesToSplit || cuSizeLargerThanMaxTrSize ) { - return VER_INTRA_SUBPARTITIONS; //only the vertical split can be performed + return false; } - else if( !widthCannotBeUsed && heightCannotBeUsed ) + return true; +} + +bool CU::canUseLfnstWithISP( const CompArea& cuArea, const ISPType ispSplitType ) +{ + if( ispSplitType == NOT_INTRA_SUBPARTITIONS ) { - return HOR_INTRA_SUBPARTITIONS; //only the horizontal split can be performed + return false; } - else + Size tuSize = ( ispSplitType == HOR_INTRA_SUBPARTITIONS ) ? Size( cuArea.width, CU::getISPSplitDim( cuArea.width, cuArea.height, TU_1D_HORZ_SPLIT ) ) : + Size( CU::getISPSplitDim( cuArea.width, cuArea.height, TU_1D_VERT_SPLIT ), cuArea.height ); + + if( !( tuSize.width >= MIN_TB_SIZEY && tuSize.height >= MIN_TB_SIZEY ) ) { - return NOT_INTRA_SUBPARTITIONS; //neither of the splits can be used + return false; } + return true; +} + +bool CU::canUseLfnstWithISP( const CodingUnit& cu, const ChannelType chType ) +{ + CHECK( !isLuma( chType ), "Wrong ISP mode!" ); + return CU::canUseLfnstWithISP( cu.blocks[chType == CHANNEL_TYPE_LUMA ? 0 : 1], (ISPType)cu.ispMode ); } uint32_t CU::getISPSplitDim( const int width, const int height, const PartSplit ispType ) @@ -425,14 +377,36 @@ uint32_t CU::getISPSplitDim( const int width, const int height, const PartSplit nonSplitDimensionSize = height; } - const int minNumberOfSamplesPerCu = 1 << ( ( g_aucLog2[MIN_TB_SIZEY] << 1 ) ); - const int factorToMinSamples = nonSplitDimensionSize < minNumberOfSamplesPerCu ? minNumberOfSamplesPerCu >> g_aucLog2[nonSplitDimensionSize] : 1; + const int minNumberOfSamplesPerCu = 1 << ( ( floorLog2(MIN_TB_SIZEY) << 1 ) ); + const int factorToMinSamples = nonSplitDimensionSize < minNumberOfSamplesPerCu ? minNumberOfSamplesPerCu >> floorLog2(nonSplitDimensionSize) : 1; partitionSize = ( splitDimensionSize >> divShift ) < factorToMinSamples ? factorToMinSamples : ( splitDimensionSize >> divShift ); - CHECK( g_aucLog2[partitionSize] + g_aucLog2[nonSplitDimensionSize] < g_aucLog2[minNumberOfSamplesPerCu], "A partition has less than the minimum amount of samples!" ); + CHECK( floorLog2(partitionSize) + floorLog2(nonSplitDimensionSize) < floorLog2(minNumberOfSamplesPerCu), "A partition has less than the minimum amount of samples!" ); return partitionSize; } +bool CU::allLumaCBFsAreZero(const CodingUnit& cu) +{ + if (!cu.ispMode) + { + return TU::getCbf(*cu.firstTU, COMPONENT_Y) == false; + } + else + { + int numTotalTUs = cu.ispMode == HOR_INTRA_SUBPARTITIONS ? cu.lheight() >> floorLog2(cu.firstTU->lheight()) : cu.lwidth() >> floorLog2(cu.firstTU->lwidth()); + TransformUnit* tuPtr = cu.firstTU; + for (int tuIdx = 0; tuIdx < numTotalTUs; tuIdx++) + { + if (TU::getCbf(*tuPtr, COMPONENT_Y) == true) + { + return false; + } + tuPtr = tuPtr->next; + } + return true; + } +} + PUTraverser CU::traversePUs( CodingUnit& cu ) { @@ -459,10 +433,8 @@ cTUTraverser CU::traverseTUs( const CodingUnit& cu ) int PU::getIntraMPMs( const PredictionUnit &pu, unsigned* mpm, const ChannelType &channelType /*= CHANNEL_TYPE_LUMA*/ ) { const int numMPMs = NUM_MOST_PROBABLE_MODES; - const int extendRefLine = (channelType == CHANNEL_TYPE_LUMA) ? pu.multiRefIdx : 0; - const ISPType ispType = isLuma( channelType ) ? ISPType( pu.cu->ispMode ) : NOT_INTRA_SUBPARTITIONS; - const bool isHorSplit = ispType == HOR_INTRA_SUBPARTITIONS; { + CHECK(channelType != CHANNEL_TYPE_LUMA, "Not harmonized yet"); int numCand = -1; int leftIntraDir = PLANAR_IDX, aboveIntraDir = PLANAR_IDX; @@ -474,14 +446,14 @@ int PU::getIntraMPMs( const PredictionUnit &pu, unsigned* mpm, const ChannelType const PredictionUnit *puLeft = pu.cs->getPURestricted(posLB.offset(-1, 0), pu, channelType); if (puLeft && CU::isIntra(*puLeft->cu)) { - leftIntraDir = puLeft->intraDir[channelType]; + leftIntraDir = PU::getIntraDirLuma( *puLeft ); } // Get intra direction of above PU const PredictionUnit *puAbove = pu.cs->getPURestricted(posRT.offset(0, -1), pu, channelType); if (puAbove && CU::isIntra(*puAbove->cu) && CU::isSameCtu(*pu.cu, *puAbove->cu)) { - aboveIntraDir = puAbove->intraDir[channelType]; + aboveIntraDir = PU::getIntraDirLuma( *puAbove ); } CHECK(2 >= numMPMs, "Invalid number of most probable modes"); @@ -489,185 +461,9 @@ int PU::getIntraMPMs( const PredictionUnit &pu, unsigned* mpm, const ChannelType const int offset = (int)NUM_LUMA_MODE - 6; const int mod = offset + 3; - if (extendRefLine) - { - int modeIdx = 0; - int angularMode[2] = { 0, 0 }; - - if (leftIntraDir > DC_IDX) - { - angularMode[modeIdx++] = leftIntraDir; - } - if (aboveIntraDir > DC_IDX && aboveIntraDir != leftIntraDir) - { - angularMode[modeIdx++] = aboveIntraDir; - } - if (modeIdx == 0) - { - mpm[0] = VER_IDX; - mpm[1] = HOR_IDX; - mpm[2] = 2; - mpm[3] = DIA_IDX; - mpm[4] = VDIA_IDX; - mpm[5] = 26; - } - else if (modeIdx == 1) - { - mpm[0] = angularMode[0]; - mpm[1] = ((angularMode[0] + offset) % mod) + 2; - mpm[2] = ((angularMode[0] - 1) % mod) + 2; - mpm[3] = ((angularMode[0] + offset - 1) % mod) + 2; - mpm[4] = (angularMode[0] % mod) + 2; - mpm[5] = ((angularMode[0] + offset - 2) % mod) + 2; - } - else - { - mpm[0] = angularMode[0]; - mpm[1] = angularMode[1]; - int maxCandModeIdx = mpm[0] > mpm[1] ? 0 : 1; - int minCandModeIdx = 1 - maxCandModeIdx; - if (mpm[maxCandModeIdx] - mpm[minCandModeIdx] == 1) - { - mpm[2] = ((angularMode[minCandModeIdx] + offset) % mod) + 2; - mpm[3] = ((angularMode[maxCandModeIdx] - 1) % mod) + 2; - mpm[4] = ((angularMode[minCandModeIdx] + offset - 1) % mod) + 2; - mpm[5] = ( angularMode[maxCandModeIdx] % mod) + 2; - } - else if (mpm[maxCandModeIdx] - mpm[minCandModeIdx] >= 62) - { - mpm[2] = ((angularMode[minCandModeIdx] - 1) % mod) + 2; - mpm[3] = ((angularMode[maxCandModeIdx] + offset) % mod) + 2; - mpm[4] = ((angularMode[minCandModeIdx]) % mod) + 2; - mpm[5] = ((angularMode[maxCandModeIdx] + offset - 1) % mod) + 2; - } - else if (mpm[maxCandModeIdx] - mpm[minCandModeIdx] == 2) - { - mpm[2] = ((angularMode[minCandModeIdx] - 1) % mod) + 2; - mpm[3] = ((angularMode[minCandModeIdx] + offset) % mod) + 2; - mpm[4] = ((angularMode[maxCandModeIdx] - 1) % mod) + 2; - mpm[5] = ((angularMode[minCandModeIdx] + offset - 1) % mod) + 2; - } - else - { - mpm[2] = ((angularMode[minCandModeIdx] + offset) % mod) + 2; - mpm[3] = ((angularMode[minCandModeIdx] - 1) % mod) + 2; - mpm[4] = ((angularMode[maxCandModeIdx] + offset) % mod) + 2; - mpm[5] = ((angularMode[maxCandModeIdx] - 1) % mod) + 2; - } - } - } - else if( ispType != NOT_INTRA_SUBPARTITIONS ) { - //default case mpm[0] = PLANAR_IDX; - if( isHorSplit ) - { - mpm[1] = HOR_IDX; - mpm[2] = 25; - mpm[3] = 10; - mpm[4] = 65; - mpm[5] = VER_IDX; - } - else - { - mpm[1] = VER_IDX; - mpm[2] = 43; - mpm[3] = 60; - mpm[4] = 3; - mpm[5] = HOR_IDX; - } - int canonicalMode = mpm[1]; - if( leftIntraDir == aboveIntraDir ) //L=A - { - numCand = 1; - if( leftIntraDir > DC_IDX ) - { - mpm[0] = leftIntraDir; - mpm[1] = ( ( leftIntraDir + offset ) % mod ) + 2; - mpm[2] = ( ( leftIntraDir - 1 ) % mod ) + 2; - if( ( isHorSplit && leftIntraDir < DIA_IDX ) || ( !isHorSplit && leftIntraDir >= DIA_IDX ) ) - { - mpm[3] = ( ( leftIntraDir + offset - 1 ) % mod ) + 2; - mpm[4] = ( leftIntraDir % mod ) + 2; - mpm[5] = ( ( leftIntraDir + offset - 2 ) % mod ) + 2;; - } - else - { - if( isHorSplit ) - { - mpm[3] = HOR_IDX; - mpm[4] = 5; - } - else - { - mpm[3] = VER_IDX; - mpm[4] = VDIA_IDX - 3; - } - mpm[5] = PLANAR_IDX; - } - } - } - else //L!=A - { - numCand = 2; - if( ( leftIntraDir > DC_IDX ) && ( aboveIntraDir > DC_IDX ) ) - { - int distLeftToCanonicalMode = abs( leftIntraDir - canonicalMode ); - int distAboveToCanonicalMode = abs( aboveIntraDir - canonicalMode ); - mpm[0] = aboveIntraDir; - mpm[1] = leftIntraDir; - if( distLeftToCanonicalMode <= distAboveToCanonicalMode ) - { - mpm[0] = leftIntraDir; - mpm[1] = aboveIntraDir; - } - int maxCandModeIdx = mpm[0] > mpm[1] ? 0 : 1; - int minCandModeIdx = 1 - maxCandModeIdx; - if( mpm[maxCandModeIdx] - mpm[minCandModeIdx] == 1 ) - { - mpm[2] = ( ( mpm[minCandModeIdx] + offset ) % mod ) + 2; - mpm[3] = ( ( mpm[maxCandModeIdx] - 1 ) % mod ) + 2; - mpm[4] = ( ( mpm[minCandModeIdx] + offset - 1 ) % mod ) + 2; - mpm[5] = ( mpm[maxCandModeIdx] % mod ) + 2; - } - else if( mpm[maxCandModeIdx] - mpm[minCandModeIdx] >= 62 ) - { - mpm[2] = ( ( mpm[minCandModeIdx] - 1 ) % mod ) + 2; - mpm[3] = ( ( mpm[maxCandModeIdx] + offset ) % mod ) + 2; - mpm[4] = ( ( mpm[minCandModeIdx] ) % mod ) + 2; - mpm[5] = ( ( mpm[maxCandModeIdx] + offset - 1 ) % mod ) + 2; - } - else if( mpm[maxCandModeIdx] - mpm[minCandModeIdx] == 2 ) - { - mpm[2] = ( ( mpm[minCandModeIdx] - 1 ) % mod ) + 2; - mpm[3] = ( ( mpm[minCandModeIdx] + offset ) % mod ) + 2; - mpm[4] = ( ( mpm[maxCandModeIdx] - 1 ) % mod ) + 2; - mpm[5] = ( ( mpm[minCandModeIdx] + offset - 1 ) % mod ) + 2; - } - else - { - mpm[2] = ( ( mpm[minCandModeIdx] + offset ) % mod ) + 2; - mpm[3] = ( ( mpm[minCandModeIdx] - 1 ) % mod ) + 2; - mpm[4] = ( ( mpm[maxCandModeIdx] + offset ) % mod ) + 2; - mpm[5] = ( ( mpm[maxCandModeIdx] - 1 ) % mod ) + 2; - } - } - else if( leftIntraDir + aboveIntraDir > 2 ) - { - //mpm[0] = PLANAR_IDX; - int angMode = leftIntraDir > DC_IDX ? leftIntraDir : aboveIntraDir; - mpm[1] = angMode; - mpm[2] = ( ( angMode + offset ) % mod ) + 2; - mpm[3] = ( ( angMode - 1 ) % mod ) + 2; - mpm[4] = ( ( angMode + offset - 1 ) % mod ) + 2; - mpm[5] = ( ( angMode ) % mod ) + 2; - } - } - } - else - { - mpm[0] = leftIntraDir; - mpm[1] = (mpm[0] == PLANAR_IDX) ? DC_IDX : PLANAR_IDX; + mpm[1] = DC_IDX; mpm[2] = VER_IDX; mpm[3] = HOR_IDX; mpm[4] = VER_IDX - 4; @@ -678,42 +474,60 @@ int PU::getIntraMPMs( const PredictionUnit &pu, unsigned* mpm, const ChannelType numCand = 1; if (leftIntraDir > DC_IDX) { - mpm[0] = leftIntraDir; - mpm[1] = PLANAR_IDX; - mpm[2] = DC_IDX; - mpm[3] = ((leftIntraDir + offset) % mod) + 2; - mpm[4] = ((leftIntraDir - 1) % mod) + 2; - mpm[5] = ((leftIntraDir + offset - 1) % mod) + 2; + mpm[0] = PLANAR_IDX; + mpm[1] = leftIntraDir; + mpm[2] = ((leftIntraDir + offset) % mod) + 2; + mpm[3] = ((leftIntraDir - 1) % mod) + 2; + mpm[4] = ((leftIntraDir + offset - 1) % mod) + 2; + mpm[5] = ( leftIntraDir % mod) + 2; } } else //L!=A { numCand = 2; - mpm[0] = leftIntraDir; - mpm[1] = aboveIntraDir; - bool maxCandModeIdx = mpm[0] > mpm[1] ? 0 : 1; + int maxCandModeIdx = mpm[0] > mpm[1] ? 0 : 1; if ((leftIntraDir > DC_IDX) && (aboveIntraDir > DC_IDX)) { - mpm[2] = PLANAR_IDX; - mpm[3] = DC_IDX; - if ((mpm[maxCandModeIdx] - mpm[!maxCandModeIdx] < 63) && (mpm[maxCandModeIdx] - mpm[!maxCandModeIdx] > 1)) + mpm[0] = PLANAR_IDX; + mpm[1] = leftIntraDir; + mpm[2] = aboveIntraDir; + maxCandModeIdx = mpm[1] > mpm[2] ? 1 : 2; + int minCandModeIdx = mpm[1] > mpm[2] ? 2 : 1; + if (mpm[maxCandModeIdx] - mpm[minCandModeIdx] == 1) { + mpm[3] = ((mpm[minCandModeIdx] + offset) % mod) + 2; + mpm[4] = ((mpm[maxCandModeIdx] - 1) % mod) + 2; + mpm[5] = ((mpm[minCandModeIdx] + offset - 1) % mod) + 2; + } + else if (mpm[maxCandModeIdx] - mpm[minCandModeIdx] >= 62) + { + mpm[3] = ((mpm[minCandModeIdx] - 1) % mod) + 2; mpm[4] = ((mpm[maxCandModeIdx] + offset) % mod) + 2; - mpm[5] = ((mpm[maxCandModeIdx] - 1) % mod) + 2; + mpm[5] = ( mpm[minCandModeIdx] % mod) + 2; + } + else if (mpm[maxCandModeIdx] - mpm[minCandModeIdx] == 2) + { + mpm[3] = ((mpm[minCandModeIdx] - 1) % mod) + 2; + mpm[4] = ((mpm[minCandModeIdx] + offset) % mod) + 2; + mpm[5] = ((mpm[maxCandModeIdx] - 1) % mod) + 2; } else { - mpm[4] = ((mpm[maxCandModeIdx] + offset - 1) % mod) + 2; - mpm[5] = ((mpm[maxCandModeIdx]) % mod) + 2; + mpm[3] = ((mpm[minCandModeIdx] + offset) % mod) + 2; + mpm[4] = ((mpm[minCandModeIdx] - 1) % mod) + 2; + mpm[5] = ((mpm[maxCandModeIdx] + offset) % mod) + 2; } } else if (leftIntraDir + aboveIntraDir >= 2) { - mpm[2] = (mpm[!maxCandModeIdx] == PLANAR_IDX) ? DC_IDX : PLANAR_IDX; - mpm[3] = ((mpm[maxCandModeIdx] + offset) % mod) + 2; - mpm[4] = ((mpm[maxCandModeIdx] - 1) % mod) + 2; - mpm[5] = ((mpm[maxCandModeIdx] + offset - 1) % mod) + 2; + mpm[0] = PLANAR_IDX; + mpm[1] = (leftIntraDir < aboveIntraDir) ? aboveIntraDir : leftIntraDir; + maxCandModeIdx = 1; + mpm[2] = ((mpm[maxCandModeIdx] + offset) % mod) + 2; + mpm[3] = ((mpm[maxCandModeIdx] - 1) % mod) + 2; + mpm[4] = ((mpm[maxCandModeIdx] + offset - 1) % mod) + 2; + mpm[5] = ( mpm[maxCandModeIdx] % mod) + 2; } } } @@ -726,6 +540,24 @@ int PU::getIntraMPMs( const PredictionUnit &pu, unsigned* mpm, const ChannelType } } +bool PU::isMIP(const PredictionUnit &pu, const ChannelType &chType) +{ + return (chType == CHANNEL_TYPE_LUMA && pu.cu->mipFlag); +} + + +uint32_t PU::getIntraDirLuma( const PredictionUnit &pu ) +{ + if (isMIP(pu)) + { + return PLANAR_IDX; + } + else + { + return pu.intraDir[CHANNEL_TYPE_LUMA]; + } +} + void PU::getIntraChromaCandModes( const PredictionUnit &pu, unsigned modeList[NUM_CHROMA_MODE] ) { @@ -739,10 +571,7 @@ void PU::getIntraChromaCandModes( const PredictionUnit &pu, unsigned modeList[NU modeList[6] = MDLM_T_IDX; modeList[7] = DM_CHROMA_IDX; - Position topLeftPos = pu.blocks[pu.chType].lumaPos(); - Position refPos = topLeftPos.offset( pu.blocks[pu.chType].lumaSize().width >> 1, pu.blocks[pu.chType].lumaSize().height >> 1 ); - const PredictionUnit *lumaPU = CS::isDualITree( *pu.cs ) ? pu.cs->picture->cs->getPU( refPos, CHANNEL_TYPE_LUMA ) : &pu; - const uint32_t lumaMode = lumaPU->intraDir[CHANNEL_TYPE_LUMA]; + const uint32_t lumaMode = getCoLocatedIntraLumaMode(pu); for( int i = 0; i < 4; i++ ) { if( lumaMode == modeList[i] ) @@ -754,180 +583,33 @@ void PU::getIntraChromaCandModes( const PredictionUnit &pu, unsigned modeList[NU } } - bool PU::isLMCMode(unsigned mode) { return (mode >= LM_CHROMA_IDX && mode <= MDLM_T_IDX); } + bool PU::isLMCModeEnabled(const PredictionUnit &pu, unsigned mode) { - if ( pu.cs->sps->getUseLMChroma() ) + if ( pu.cs->sps->getUseLMChroma() && pu.cu->checkCCLMAllowed() ) { return true; } return false; } -int PU::getLMSymbolList(const PredictionUnit &pu, int *pModeList) +int PU::getLMSymbolList(const PredictionUnit &pu, int *modeList) { - int iIdx = 0; + int idx = 0; - pModeList[ iIdx++ ] = LM_CHROMA_IDX; - pModeList[ iIdx++ ] = -1; - pModeList[iIdx++] = MDLM_L_IDX; - pModeList[iIdx++] = MDLM_T_IDX; - return iIdx; + modeList[idx++] = LM_CHROMA_IDX; + modeList[idx++] = MDLM_L_IDX; + modeList[idx++] = MDLM_T_IDX; + return idx; } - - bool PU::isChromaIntraModeCrossCheckMode( const PredictionUnit &pu ) { - return pu.intraDir[CHANNEL_TYPE_CHROMA] == DM_CHROMA_IDX; -} - -int PU::getMHIntraMPMs(const PredictionUnit &pu, unsigned* mpm, const ChannelType &channelType /*= CHANNEL_TYPE_LUMA*/, const bool isChromaMDMS /*= false*/, const unsigned startIdx /*= 0*/) -{ - const int numMPMs = 3; // Multi-hypothesis intra uses only 3 MPM - { - int numCand = -1; - uint32_t leftIntraDir = DC_IDX, aboveIntraDir = DC_IDX; - - const CompArea& area = pu.block(getFirstComponentOfChannel(channelType)); - const Position& pos = area.pos(); - - // Get intra direction of left PU - const PredictionUnit *puLeft = pu.cs->getPURestricted(pos.offset(-1, 0), pu, channelType); - - if (puLeft && (CU::isIntra(*puLeft->cu) || puLeft->mhIntraFlag)) - { - leftIntraDir = puLeft->intraDir[channelType]; - - if (isChroma(channelType) && leftIntraDir == DM_CHROMA_IDX) - { - leftIntraDir = puLeft->intraDir[0]; - } - } - - // Get intra direction of above PU - const PredictionUnit* puAbove = pu.cs->getPURestricted(pos.offset(0, -1), pu, channelType); - - if (puAbove && (CU::isIntra(*puAbove->cu) || puAbove->mhIntraFlag) && CU::isSameCtu(*pu.cu, *puAbove->cu)) - { - aboveIntraDir = puAbove->intraDir[channelType]; - - if (isChroma(channelType) && aboveIntraDir == DM_CHROMA_IDX) - { - aboveIntraDir = puAbove->intraDir[0]; - } - } - - CHECK(2 >= numMPMs, "Invalid number of most probable modes"); - - uint32_t leftIntraDir2 = leftIntraDir; - uint32_t aboveIntraDir2 = aboveIntraDir; - - leftIntraDir2 = (leftIntraDir2 > DC_IDX) ? ((leftIntraDir2 <= DIA_IDX) ? HOR_IDX : VER_IDX) : leftIntraDir2; - aboveIntraDir2 = (aboveIntraDir2 > DC_IDX) ? ((aboveIntraDir2 <= DIA_IDX) ? HOR_IDX : VER_IDX) : aboveIntraDir2; - - if (leftIntraDir2 == aboveIntraDir2) - { - numCand = 1; - - if (leftIntraDir2 > DC_IDX) // angular modes - { - mpm[0] = leftIntraDir2; - mpm[1] = PLANAR_IDX; - mpm[2] = DC_IDX; - } - else //non-angular - { - mpm[0] = PLANAR_IDX; - mpm[1] = DC_IDX; - mpm[2] = VER_IDX; - } - } - else - { - numCand = 2; - - mpm[0] = leftIntraDir2; - mpm[1] = aboveIntraDir2; - - if (leftIntraDir2 && aboveIntraDir2) //both modes are non-planar - { - mpm[2] = PLANAR_IDX; - } - else - { - mpm[2] = (leftIntraDir2 + aboveIntraDir2) < 2 ? VER_IDX : DC_IDX; - } - } - int narrowCase = getNarrowShape(pu.lwidth(), pu.lheight()); - if (narrowCase > 0) - { - bool isMPM[NUM_LUMA_MODE]; - for (int idx = 0; idx < NUM_LUMA_MODE; idx++) - { - isMPM[idx] = false; - } - for (int idx = 0; idx < numMPMs; idx++) - { - isMPM[mpm[idx]] = true; - } - if (narrowCase == 1 && isMPM[HOR_IDX]) - { - for (int idx = 0; idx < numMPMs; idx++) - { - if (mpm[idx] == HOR_IDX) - { - if (!isMPM[PLANAR_IDX]) - mpm[idx] = PLANAR_IDX; - else if (!isMPM[DC_IDX]) - mpm[idx] = DC_IDX; - else if (!isMPM[VER_IDX]) - mpm[idx] = VER_IDX; - break; - } - } - } - if (narrowCase == 2 && isMPM[VER_IDX]) - { - for (int idx = 0; idx < numMPMs; idx++) - { - if (mpm[idx] == VER_IDX) - { - if (!isMPM[PLANAR_IDX]) - mpm[idx] = PLANAR_IDX; - else if (!isMPM[DC_IDX]) - mpm[idx] = DC_IDX; - else if (!isMPM[HOR_IDX]) - mpm[idx] = HOR_IDX; - break; - } - } - } - } - CHECK(numCand == 0, "No candidates found"); - CHECK(mpm[0] == mpm[1] || mpm[0] == mpm[2] || mpm[2] == mpm[1], "redundant MPM"); - return numCand; - } -} -int PU::getNarrowShape(const int width, const int height) -{ - int longSide = (width > height) ? width : height; - int shortSide = (width > height) ? height : width; - if (longSide > (2 * shortSide)) - { - if (longSide == width) - return 1; - else - return 2; - } - else - { - return 0; - } + return !pu.cu->bdpcmModeChroma && pu.intraDir[CHANNEL_TYPE_CHROMA] == DM_CHROMA_IDX; } uint32_t PU::getFinalIntraMode( const PredictionUnit &pu, const ChannelType &chType ) @@ -936,111 +618,100 @@ uint32_t PU::getFinalIntraMode( const PredictionUnit &pu, const ChannelType &chT if( uiIntraMode == DM_CHROMA_IDX && !isLuma( chType ) ) { - Position topLeftPos = pu.blocks[pu.chType].lumaPos(); - Position refPos = topLeftPos.offset( pu.blocks[pu.chType].lumaSize().width >> 1, pu.blocks[pu.chType].lumaSize().height >> 1 ); - const PredictionUnit &lumaPU = CS::isDualITree( *pu.cs ) ? *pu.cs->picture->cs->getPU( refPos, CHANNEL_TYPE_LUMA ) : *pu.cs->getPU( topLeftPos, CHANNEL_TYPE_LUMA ); - - uiIntraMode = lumaPU.intraDir[0]; + uiIntraMode = getCoLocatedIntraLumaMode(pu); } - if( pu.chromaFormat == CHROMA_422 && !isLuma( chType ) ) + if( pu.chromaFormat == CHROMA_422 && !isLuma( chType ) && uiIntraMode < NUM_LUMA_MODE ) // map directional, planar and dc { uiIntraMode = g_chroma422IntraAngleMappingTable[uiIntraMode]; } return uiIntraMode; } -bool PU::xCheckSimilarMotion(const int mergeCandIndex, const int prevCnt, const MergeCtx mergeCandList, bool hasPruned[MRG_MAX_NUM_CANDS]) +uint32_t PU::getCoLocatedIntraLumaMode( const PredictionUnit &pu ) +{ + Position topLeftPos = pu.blocks[pu.chType].lumaPos(); + Position refPos = topLeftPos.offset( pu.blocks[pu.chType].lumaSize().width >> 1, pu.blocks[pu.chType].lumaSize().height >> 1 ); + const PredictionUnit &lumaPU = pu.cu->isSepTree() ? *pu.cs->picture->cs->getPU( refPos, CHANNEL_TYPE_LUMA ) : *pu.cs->getPU( topLeftPos, CHANNEL_TYPE_LUMA ); + + return PU::getIntraDirLuma( lumaPU ); +} + +int PU::getWideAngIntraMode( const TransformUnit &tu, const uint32_t dirMode, const ComponentID compID ) { - for (uint32_t ui = 0; ui < prevCnt; ui++) + if( dirMode < 2 ) { - if (hasPruned[ui]) - { - continue; - } - if (mergeCandList.interDirNeighbours[ui] == mergeCandList.interDirNeighbours[mergeCandIndex]) - { - if (mergeCandList.interDirNeighbours[ui] == 3) - { - int offset0 = (ui * 2); - int offset1 = (mergeCandIndex * 2); - if (mergeCandList.mvFieldNeighbours[offset0].refIdx == mergeCandList.mvFieldNeighbours[offset1].refIdx && - mergeCandList.mvFieldNeighbours[offset0 + 1].refIdx == mergeCandList.mvFieldNeighbours[offset1 + 1].refIdx && - mergeCandList.mvFieldNeighbours[offset0].mv == mergeCandList.mvFieldNeighbours[offset1].mv && - mergeCandList.mvFieldNeighbours[offset0 + 1].mv == mergeCandList.mvFieldNeighbours[offset1 + 1].mv - ) - { - hasPruned[ui] = true; - return true; - } - } - else - { - int offset0 = (ui * 2) + mergeCandList.interDirNeighbours[ui] - 1; - int offset1 = (mergeCandIndex * 2) + mergeCandList.interDirNeighbours[ui] - 1; - if (mergeCandList.mvFieldNeighbours[offset0].refIdx == mergeCandList.mvFieldNeighbours[offset1].refIdx && - mergeCandList.mvFieldNeighbours[offset0].mv == mergeCandList.mvFieldNeighbours[offset1].mv - ) - { - hasPruned[ui] = true; - return true; - } - } - } + return ( int ) dirMode; } - return false; -} + CodingStructure& cs = *tu.cs; + const CompArea& area = tu.blocks[ compID ]; + PelBuf pred = cs.getPredBuf( area ); + int width = int( pred.width ); + int height = int( pred.height ); + int modeShift[ ] = { 0, 6, 10, 12, 14, 15 }; + int deltaSize = abs( floorLog2( width ) - floorLog2( height ) ); + int predMode = dirMode; -#if JVET_L0090_PAIR_AVG + if( width > height && dirMode < 2 + modeShift[ deltaSize ] ) + { + predMode += ( VDIA_IDX - 1 ); + } + else if( height > width && predMode > VDIA_IDX - modeShift[ deltaSize ] ) + { + predMode -= ( VDIA_IDX + 1 ); + } -bool PU::addMergeHMVPCand(const CodingStructure &cs, MergeCtx& mrgCtx, bool canFastExit, const int& mrgCandIdx, const uint32_t maxNumMergeCandMin1, int &cnt, const int prevCnt, bool isAvailableSubPu, unsigned subPuMvpPos - , bool ibcFlag - , bool isShared -) -#else + return predMode; +} -bool PU::addMergeHMVPCand(const CodingStructure &cs, MergeCtx& mrgCtx, bool isCandInter[MRG_MAX_NUM_CANDS], bool canFastExit, const int& mrgCandIdx, const uint32_t maxNumMergeCandMin1, int &cnt, const int prevCnt, bool isAvailableSubPu, unsigned subPuMvpPos - , int mmvdList -) -#endif + +bool PU::addMergeHMVPCand(const CodingStructure &cs, MergeCtx& mrgCtx, const int& mrgCandIdx, const uint32_t maxNumMergeCandMin1, int &cnt + , const bool isAvailableA1, const MotionInfo miLeft, const bool isAvailableB1, const MotionInfo miAbove + , const bool ibcFlag + , const bool isGt4x4 + ) { const Slice& slice = *cs.slice; MotionInfo miNeighbor; - bool hasPruned[MRG_MAX_NUM_CANDS]; - memset(hasPruned, 0, MRG_MAX_NUM_CANDS * sizeof(bool)); - if (isAvailableSubPu) - { - hasPruned[subPuMvpPos] = true; - } - auto &lut = ibcFlag ? ( isShared ? cs.motionLut.lutShareIbc : cs.motionLut.lutIbc ) : ( isShared ? cs.motionLut.lutShare : cs.motionLut.lut ); - int num_avai_candInLUT = (int) lut.size(); + + auto &lut = ibcFlag ? cs.motionLut.lutIbc : cs.motionLut.lut; + int num_avai_candInLUT = (int)lut.size(); for (int mrgIdx = 1; mrgIdx <= num_avai_candInLUT; mrgIdx++) { miNeighbor = lut[num_avai_candInLUT - mrgIdx]; - mrgCtx.interDirNeighbours[cnt] = miNeighbor.interDir; - mrgCtx.mvFieldNeighbours[cnt << 1].setMvField(miNeighbor.mv[0], miNeighbor.refIdx[0]); - if (slice.isInterB()) - { - mrgCtx.mvFieldNeighbours[(cnt << 1) + 1].setMvField(miNeighbor.mv[1], miNeighbor.refIdx[1]); - } - if (mrgIdx > 2 || !xCheckSimilarMotion(cnt, prevCnt, mrgCtx, hasPruned)) + + if ( mrgIdx > 2 || ((mrgIdx > 1 || !isGt4x4) && ibcFlag) + || ((!isAvailableA1 || (miLeft != miNeighbor)) && (!isAvailableB1 || (miAbove != miNeighbor))) ) { -#if !JVET_L0090_PAIR_AVG - isCandInter[cnt] = true; -#endif - mrgCtx.GBiIdx[cnt] = (mrgCtx.interDirNeighbours[cnt] == 3) ? miNeighbor.GBiIdx : GBI_DEFAULT; - if (mrgCandIdx == cnt && canFastExit) + mrgCtx.interDirNeighbours[cnt] = miNeighbor.interDir; + mrgCtx.useAltHpelIf [cnt] = !ibcFlag && miNeighbor.useAltHpelIf; + mrgCtx.BcwIdx [cnt] = (miNeighbor.interDir == 3) ? miNeighbor.BcwIdx : BCW_DEFAULT; + + mrgCtx.mvFieldNeighbours[cnt << 1].setMvField(miNeighbor.mv[0], miNeighbor.refIdx[0]); + if (slice.isInterB()) + { + mrgCtx.mvFieldNeighbours[(cnt << 1) + 1].setMvField(miNeighbor.mv[1], miNeighbor.refIdx[1]); + } + + if (mrgCandIdx == cnt) { return true; } cnt ++; + if (cnt == maxNumMergeCandMin1) { break; } } } + + if (cnt < maxNumMergeCandMin1) + { + mrgCtx.useAltHpelIf[cnt] = false; + } + return false; } @@ -1048,16 +719,16 @@ void PU::getIBCMergeCandidates(const PredictionUnit &pu, MergeCtx& mrgCtx, const { const CodingStructure &cs = *pu.cs; const Slice &slice = *pu.cs->slice; - const uint32_t maxNumMergeCand = slice.getMaxNumMergeCand(); - const bool canFastExit = pu.cs->pps->getLog2ParallelMergeLevelMinus2() == 0; + const uint32_t maxNumMergeCand = slice.getPicHeader()->getMaxNumIBCMergeCand(); for (uint32_t ui = 0; ui < maxNumMergeCand; ++ui) { - mrgCtx.GBiIdx[ui] = GBI_DEFAULT; + mrgCtx.BcwIdx[ui] = BCW_DEFAULT; mrgCtx.interDirNeighbours[ui] = 0; mrgCtx.mrgTypeNeighbours[ui] = MRG_TYPE_IBC; mrgCtx.mvFieldNeighbours[ui * 2].refIdx = NOT_VALID; mrgCtx.mvFieldNeighbours[ui * 2 + 1].refIdx = NOT_VALID; + mrgCtx.useAltHpelIf[ui] = false; } mrgCtx.numValidMergeCand = maxNumMergeCand; @@ -1065,16 +736,16 @@ void PU::getIBCMergeCandidates(const PredictionUnit &pu, MergeCtx& mrgCtx, const int cnt = 0; - const Position posLT = pu.shareParentPos; - const Position posRT = pu.shareParentPos.offset(pu.shareParentSize.width - 1, 0); - const Position posLB = pu.shareParentPos.offset(0, pu.shareParentSize.height - 1); + const Position posRT = pu.Y().topRight(); + const Position posLB = pu.Y().bottomLeft(); MotionInfo miAbove, miLeft, miAboveLeft, miAboveRight, miBelowLeft; //left const PredictionUnit* puLeft = cs.getPURestricted(posLB.offset(-1, 0), pu, pu.chType); + bool isGt4x4 = pu.lwidth() * pu.lheight() > 16; const bool isAvailableA1 = puLeft && isDiffMER(pu, *puLeft) && pu.cu != puLeft->cu && CU::isIBC(*puLeft->cu); - if (isAvailableA1) + if (isGt4x4 && isAvailableA1) { miLeft = puLeft->getMotionInfo(posLB.offset(-1, 0)); @@ -1082,7 +753,7 @@ void PU::getIBCMergeCandidates(const PredictionUnit &pu, MergeCtx& mrgCtx, const mrgCtx.interDirNeighbours[cnt] = miLeft.interDir; // get Mv from Left mrgCtx.mvFieldNeighbours[cnt << 1].setMvField(miLeft.mv[0], miLeft.refIdx[0]); - if (mrgCandIdx == cnt && canFastExit) + if (mrgCandIdx == cnt) { return; } @@ -1095,11 +766,10 @@ void PU::getIBCMergeCandidates(const PredictionUnit &pu, MergeCtx& mrgCtx, const return; } - // above const PredictionUnit *puAbove = cs.getPURestricted(posRT.offset(0, -1), pu, pu.chType); bool isAvailableB1 = puAbove && isDiffMER(pu, *puAbove) && pu.cu != puAbove->cu && CU::isIBC(*puAbove->cu); - if (isAvailableB1) + if (isGt4x4 && isAvailableB1) { miAbove = puAbove->getMotionInfo(posRT.offset(0, -1)); @@ -1109,72 +779,7 @@ void PU::getIBCMergeCandidates(const PredictionUnit &pu, MergeCtx& mrgCtx, const mrgCtx.interDirNeighbours[cnt] = miAbove.interDir; // get Mv from Above mrgCtx.mvFieldNeighbours[cnt << 1].setMvField(miAbove.mv[0], miAbove.refIdx[0]); - if (mrgCandIdx == cnt && canFastExit) - { - return; - } - - cnt++; - } - } - - // early termination - if (cnt == maxNumMergeCand) - { - return; - } - - int spatialCandPos = cnt; - - // above right - const PredictionUnit *puAboveRight = cs.getPURestricted(posRT.offset(1, -1), pu, pu.chType); - bool isAvailableB0 = puAboveRight && isDiffMER(pu, *puAboveRight) && CU::isIBC(*puAboveRight->cu); - if (isAvailableB0) - { - miAboveRight = puAboveRight->getMotionInfo(posRT.offset(1, -1)); - -#if HM_JEM_MERGE_CANDS - if ((!isAvailableB1 || (miAbove != miAboveRight)) && (!isAvailableA1 || (miLeft != miAboveRight))) -#else - if (!isAvailableB1 || (miAbove != miAboveRight)) -#endif - { - // get Inter Dir - mrgCtx.interDirNeighbours[cnt] = miAboveRight.interDir; - // get Mv from Above-right - mrgCtx.mvFieldNeighbours[cnt << 1].setMvField(miAboveRight.mv[0], miAboveRight.refIdx[0]); - - if (mrgCandIdx == cnt && canFastExit) - { - return; - } - - cnt++; - } - } - // early termination - if (cnt == maxNumMergeCand) - { - return; - } - - //left bottom - const PredictionUnit *puLeftBottom = cs.getPURestricted(posLB.offset(-1, 1), pu, pu.chType); - bool isAvailableA0 = puLeftBottom && isDiffMER(pu, *puLeftBottom) && CU::isIBC(*puLeftBottom->cu); - if (isAvailableA0) - { - miBelowLeft = puLeftBottom->getMotionInfo(posLB.offset(-1, 1)); - -#if HM_JEM_MERGE_CANDS - if ((!isAvailableA1 || (miBelowLeft != miLeft)) && (!isAvailableB1 || (miBelowLeft != miAbove)) && (!isAvailableB0 || (miBelowLeft != miAboveRight))) -#else - if (!isAvailableA1 || (miBelowLeft != miLeft)) -#endif - { - // get Inter Dir - mrgCtx.interDirNeighbours[cnt] = miBelowLeft.interDir; - mrgCtx.mvFieldNeighbours[cnt << 1].setMvField(miBelowLeft.mv[0], miBelowLeft.refIdx[0]); - if (mrgCandIdx == cnt && canFastExit) + if (mrgCandIdx == cnt) { return; } @@ -1182,104 +787,39 @@ void PU::getIBCMergeCandidates(const PredictionUnit &pu, MergeCtx& mrgCtx, const cnt++; } } - // early termination - if (cnt == maxNumMergeCand) - { - return; - } - - // above left - if (cnt < 4) - { - const PredictionUnit *puAboveLeft = cs.getPURestricted(posLT.offset(-1, -1), pu, pu.chType); - bool isAvailableB2 = puAboveLeft && isDiffMER(pu, *puAboveLeft) && CU::isIBC(*puAboveLeft->cu); - if (isAvailableB2) - { - miAboveLeft = puAboveLeft->getMotionInfo(posLT.offset(-1, -1)); - -#if HM_JEM_MERGE_CANDS - if ((!isAvailableA1 || (miLeft != miAboveLeft)) && (!isAvailableB1 || (miAbove != miAboveLeft)) && (!isAvailableA0 || (miBelowLeft != miAboveLeft)) && (!isAvailableB0 || (miAboveRight != miAboveLeft))) -#else - if ((!isAvailableA1 || (miLeft != miAboveLeft)) && (!isAvailableB1 || (miAbove != miAboveLeft))) -#endif - { - // get Inter Dir - mrgCtx.interDirNeighbours[cnt] = miAboveLeft.interDir; - mrgCtx.mvFieldNeighbours[cnt << 1].setMvField(miAboveLeft.mv[0], miAboveLeft.refIdx[0]); - if (mrgCandIdx == cnt && canFastExit) - { - return; - } - cnt++; - } - } - } // early termination if (cnt == maxNumMergeCand) { return; } - int maxNumMergeCandMin1 = maxNumMergeCand - 1; - if (cnt != maxNumMergeCandMin1) + if (cnt != maxNumMergeCand) { - bool isAvailableSubPu = false; - unsigned subPuMvpPos = 0; - - bool isShared = ((pu.Y().lumaSize().width != pu.shareParentSize.width) || (pu.Y().lumaSize().height != pu.shareParentSize.height)); - -#if JVET_L0090_PAIR_AVG - bool bFound = addMergeHMVPCand(cs, mrgCtx, canFastExit - , mrgCandIdx - , maxNumMergeCandMin1, cnt - , spatialCandPos - , isAvailableSubPu, subPuMvpPos + bool bFound = addMergeHMVPCand(cs, mrgCtx, mrgCandIdx, maxNumMergeCand, cnt + , isAvailableA1, miLeft, isAvailableB1, miAbove , true - , isShared - ); -#else - bool bFound = addMergeHMVPCand(slice, mrgCtx, isCandInter, canFastExit - , mrgCandIdx - , maxNumMergeCandMin1, cnt, cnt, isAvailableSubPu, subPuMvpPos - ); -#endif + , isGt4x4 + ); + if (bFound) { return; } } -#if JVET_L0090_PAIR_AVG - // pairwise-average candidates - if (cnt>1 && cnt <maxNumMergeCand) + while (cnt < maxNumMergeCand) { - mrgCtx.mvFieldNeighbours[cnt * 2 ].setMvField(Mv(0, 0), NOT_VALID); - mrgCtx.mvFieldNeighbours[cnt * 2 + 1].setMvField(Mv(0, 0), NOT_VALID); - - const Mv& MvI = mrgCtx.mvFieldNeighbours[0 * 2].mv; - const Mv& MvJ = mrgCtx.mvFieldNeighbours[1 * 2].mv; - // average two MVs - Mv avgMv = MvI; - - avgMv += MvJ; - mrgCtx.mrgTypeNeighbours[cnt] = MRG_TYPE_IBC; - roundAffineMv(avgMv.hor, avgMv.ver, 1); - avgMv.roundToPrecision(MV_PRECISION_INTERNAL, MV_PRECISION_INT); - mrgCtx.mvFieldNeighbours[cnt * 2 ].setMvField(avgMv, MAX_NUM_REF); + mrgCtx.mvFieldNeighbours[cnt * 2].setMvField(Mv(0, 0), MAX_NUM_REF); mrgCtx.interDirNeighbours[cnt] = 1; + if (mrgCandIdx == cnt) + { + return; + } cnt++; } - // early termination - if (cnt == maxNumMergeCand) - { - return; - } -#endif - mrgCtx.numValidMergeCand = cnt; - } void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, @@ -1288,24 +828,16 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, { const CodingStructure &cs = *pu.cs; const Slice &slice = *pu.cs->slice; - const uint32_t maxNumMergeCand = slice.getMaxNumMergeCand(); - const bool canFastExit = pu.cs->pps->getLog2ParallelMergeLevelMinus2() == 0; - -#if !JVET_L0090_PAIR_AVG - // this variable is unused if remove HEVC combined candidates - bool isCandInter[MRG_MAX_NUM_CANDS]; -#endif + const uint32_t maxNumMergeCand = slice.getPicHeader()->getMaxNumMergeCand(); for (uint32_t ui = 0; ui < maxNumMergeCand; ++ui) { -#if !JVET_L0090_PAIR_AVG - isCandInter[ui] = false; -#endif - mrgCtx.GBiIdx[ui] = GBI_DEFAULT; + mrgCtx.BcwIdx[ui] = BCW_DEFAULT; mrgCtx.interDirNeighbours[ui] = 0; mrgCtx.mrgTypeNeighbours [ui] = MRG_TYPE_DEFAULT_N; mrgCtx.mvFieldNeighbours[(ui << 1) ].refIdx = NOT_VALID; mrgCtx.mvFieldNeighbours[(ui << 1) + 1].refIdx = NOT_VALID; + mrgCtx.useAltHpelIf[ui] = false; } mrgCtx.numValidMergeCand = maxNumMergeCand; @@ -1313,36 +845,32 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, int cnt = 0; - - const Position posLT = pu.shareParentPos; - const Position posRT = pu.shareParentPos.offset(pu.shareParentSize.width - 1, 0); - const Position posLB = pu.shareParentPos.offset(0, pu.shareParentSize.height - 1); + const Position posLT = pu.Y().topLeft(); + const Position posRT = pu.Y().topRight(); + const Position posLB = pu.Y().bottomLeft(); MotionInfo miAbove, miLeft, miAboveLeft, miAboveRight, miBelowLeft; - //left - const PredictionUnit* puLeft = cs.getPURestricted( posLB.offset( -1, 0 ), pu, pu.chType ); + // above + const PredictionUnit *puAbove = cs.getPURestricted(posRT.offset(0, -1), pu, pu.chType); - const bool isAvailableA1 = puLeft && isDiffMER( pu, *puLeft ) && pu.cu != puLeft->cu && CU::isInter( *puLeft->cu ); + bool isAvailableB1 = puAbove && isDiffMER(pu, *puAbove) && pu.cu != puAbove->cu && CU::isInter(*puAbove->cu); - if( isAvailableA1 ) + if (isAvailableB1) { - miLeft = puLeft->getMotionInfo( posLB.offset(-1, 0) ); - -#if !JVET_L0090_PAIR_AVG - isCandInter[cnt] = true; -#endif + miAbove = puAbove->getMotionInfo(posRT.offset(0, -1)); // get Inter Dir - mrgCtx.interDirNeighbours[cnt] = miLeft.interDir; - mrgCtx.GBiIdx[cnt] = (mrgCtx.interDirNeighbours[cnt] == 3) ? puLeft->cu->GBiIdx : GBI_DEFAULT; - // get Mv from Left - mrgCtx.mvFieldNeighbours[cnt << 1].setMvField(miLeft.mv[0], miLeft.refIdx[0]); + mrgCtx.interDirNeighbours[cnt] = miAbove.interDir; + mrgCtx.useAltHpelIf[cnt] = miAbove.useAltHpelIf; + // get Mv from Above + mrgCtx.BcwIdx[cnt] = (mrgCtx.interDirNeighbours[cnt] == 3) ? puAbove->cu->BcwIdx : BCW_DEFAULT; + mrgCtx.mvFieldNeighbours[cnt << 1].setMvField(miAbove.mv[0], miAbove.refIdx[0]); if (slice.isInterB()) { - mrgCtx.mvFieldNeighbours[(cnt << 1) + 1].setMvField(miLeft.mv[1], miLeft.refIdx[1]); + mrgCtx.mvFieldNeighbours[(cnt << 1) + 1].setMvField(miAbove.mv[1], miAbove.refIdx[1]); } - if (mrgCandIdx == cnt && canFastExit) + if (mrgCandIdx == cnt) { return; } @@ -1356,33 +884,29 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, return; } + //left + const PredictionUnit* puLeft = cs.getPURestricted(posLB.offset(-1, 0), pu, pu.chType); - // above - const PredictionUnit *puAbove = cs.getPURestricted( posRT.offset( 0, -1 ), pu, pu.chType ); - - bool isAvailableB1 = puAbove && isDiffMER( pu, *puAbove ) && pu.cu != puAbove->cu && CU::isInter( *puAbove->cu ); + const bool isAvailableA1 = puLeft && isDiffMER(pu, *puLeft) && pu.cu != puLeft->cu && CU::isInter(*puLeft->cu); - if( isAvailableB1 ) + if (isAvailableA1) { - miAbove = puAbove->getMotionInfo( posRT.offset( 0, -1 ) ); + miLeft = puLeft->getMotionInfo(posLB.offset(-1, 0)); - if( !isAvailableA1 || ( miAbove != miLeft ) ) + if (!isAvailableB1 || (miAbove != miLeft)) { -#if !JVET_L0090_PAIR_AVG - isCandInter[cnt] = true; -#endif - // get Inter Dir - mrgCtx.interDirNeighbours[cnt] = miAbove.interDir; - // get Mv from Above - mrgCtx.GBiIdx[cnt] = (mrgCtx.interDirNeighbours[cnt] == 3) ? puAbove->cu->GBiIdx : GBI_DEFAULT; - mrgCtx.mvFieldNeighbours[cnt << 1].setMvField( miAbove.mv[0], miAbove.refIdx[0] ); + mrgCtx.interDirNeighbours[cnt] = miLeft.interDir; + mrgCtx.useAltHpelIf[cnt] = miLeft.useAltHpelIf; + mrgCtx.BcwIdx[cnt] = (mrgCtx.interDirNeighbours[cnt] == 3) ? puLeft->cu->BcwIdx : BCW_DEFAULT; + // get Mv from Left + mrgCtx.mvFieldNeighbours[cnt << 1].setMvField(miLeft.mv[0], miLeft.refIdx[0]); - if( slice.isInterB() ) + if (slice.isInterB()) { - mrgCtx.mvFieldNeighbours[( cnt << 1 ) + 1].setMvField( miAbove.mv[1], miAbove.refIdx[1] ); + mrgCtx.mvFieldNeighbours[(cnt << 1) + 1].setMvField(miLeft.mv[1], miLeft.refIdx[1]); } - if (mrgCandIdx == cnt && canFastExit) + if (mrgCandIdx == cnt) { return; } @@ -1397,8 +921,6 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, return; } - int spatialCandPos = cnt; - // above right const PredictionUnit *puAboveRight = cs.getPURestricted( posRT.offset( 1, -1 ), pu, pu.chType ); @@ -1408,20 +930,14 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, { miAboveRight = puAboveRight->getMotionInfo( posRT.offset( 1, -1 ) ); -#if HM_JEM_MERGE_CANDS - if( ( !isAvailableB1 || ( miAbove != miAboveRight ) ) && ( !isAvailableA1 || ( miLeft != miAboveRight ) ) ) -#else if( !isAvailableB1 || ( miAbove != miAboveRight ) ) -#endif { -#if !JVET_L0090_PAIR_AVG - isCandInter[cnt] = true; -#endif // get Inter Dir mrgCtx.interDirNeighbours[cnt] = miAboveRight.interDir; + mrgCtx.useAltHpelIf[cnt] = miAboveRight.useAltHpelIf; // get Mv from Above-right - mrgCtx.GBiIdx[cnt] = (mrgCtx.interDirNeighbours[cnt] == 3) ? puAboveRight->cu->GBiIdx : GBI_DEFAULT; + mrgCtx.BcwIdx[cnt] = (mrgCtx.interDirNeighbours[cnt] == 3) ? puAboveRight->cu->BcwIdx : BCW_DEFAULT; mrgCtx.mvFieldNeighbours[cnt << 1].setMvField( miAboveRight.mv[0], miAboveRight.refIdx[0] ); if( slice.isInterB() ) @@ -1429,7 +945,7 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, mrgCtx.mvFieldNeighbours[( cnt << 1 ) + 1].setMvField( miAboveRight.mv[1], miAboveRight.refIdx[1] ); } - if (mrgCandIdx == cnt && canFastExit) + if (mrgCandIdx == cnt) { return; } @@ -1452,19 +968,13 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, { miBelowLeft = puLeftBottom->getMotionInfo( posLB.offset( -1, 1 ) ); -#if HM_JEM_MERGE_CANDS - if( ( !isAvailableA1 || ( miBelowLeft != miLeft ) ) && ( !isAvailableB1 || ( miBelowLeft != miAbove ) ) && ( !isAvailableB0 || ( miBelowLeft != miAboveRight ) ) ) -#else if( !isAvailableA1 || ( miBelowLeft != miLeft ) ) -#endif { -#if !JVET_L0090_PAIR_AVG - isCandInter[cnt] = true; -#endif // get Inter Dir mrgCtx.interDirNeighbours[cnt] = miBelowLeft.interDir; - mrgCtx.GBiIdx[cnt] = (mrgCtx.interDirNeighbours[cnt] == 3) ? puLeftBottom->cu->GBiIdx : GBI_DEFAULT; + mrgCtx.useAltHpelIf[cnt] = miBelowLeft.useAltHpelIf; + mrgCtx.BcwIdx[cnt] = (mrgCtx.interDirNeighbours[cnt] == 3) ? puLeftBottom->cu->BcwIdx : BCW_DEFAULT; // get Mv from Bottom-Left mrgCtx.mvFieldNeighbours[cnt << 1].setMvField( miBelowLeft.mv[0], miBelowLeft.refIdx[0] ); @@ -1473,7 +983,7 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, mrgCtx.mvFieldNeighbours[( cnt << 1 ) + 1].setMvField( miBelowLeft.mv[1], miBelowLeft.refIdx[1] ); } - if (mrgCandIdx == cnt && canFastExit) + if (mrgCandIdx == cnt) { return; } @@ -1499,19 +1009,13 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, { miAboveLeft = puAboveLeft->getMotionInfo( posLT.offset( -1, -1 ) ); -#if HM_JEM_MERGE_CANDS - if( ( !isAvailableA1 || ( miLeft != miAboveLeft ) ) && ( !isAvailableB1 || ( miAbove != miAboveLeft ) ) && ( !isAvailableA0 || ( miBelowLeft != miAboveLeft ) ) && ( !isAvailableB0 || ( miAboveRight != miAboveLeft ) ) ) -#else if( ( !isAvailableA1 || ( miLeft != miAboveLeft ) ) && ( !isAvailableB1 || ( miAbove != miAboveLeft ) ) ) -#endif { -#if !JVET_L0090_PAIR_AVG - isCandInter[cnt] = true; -#endif // get Inter Dir mrgCtx.interDirNeighbours[cnt] = miAboveLeft.interDir; - mrgCtx.GBiIdx[cnt] = (mrgCtx.interDirNeighbours[cnt] == 3) ? puAboveLeft->cu->GBiIdx : GBI_DEFAULT; + mrgCtx.useAltHpelIf[cnt] = miAboveLeft.useAltHpelIf; + mrgCtx.BcwIdx[cnt] = (mrgCtx.interDirNeighbours[cnt] == 3) ? puAboveLeft->cu->BcwIdx : BCW_DEFAULT; // get Mv from Above-Left mrgCtx.mvFieldNeighbours[cnt << 1].setMvField( miAboveLeft.mv[0], miAboveLeft.refIdx[0] ); @@ -1520,7 +1024,7 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, mrgCtx.mvFieldNeighbours[( cnt << 1 ) + 1].setMvField( miAboveLeft.mv[1], miAboveLeft.refIdx[1] ); } - if (mrgCandIdx == cnt && canFastExit) + if (mrgCandIdx == cnt) { return; } @@ -1535,45 +1039,23 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, return; } - if (slice.getEnableTMVPFlag()) + if (slice.getPicHeader()->getEnableTMVPFlag() && (pu.lumaSize().width + pu.lumaSize().height > 12)) { //>> MTK colocated-RightBottom // offset the pos to be sure to "point" to the same position the uiAbsPartIdx would've pointed to - Position posRB = pu.shareParentPos.offset(pu.shareParentSize.width-3, pu.shareParentSize.height - 3); + Position posRB = pu.Y().bottomRight().offset( -3, -3 ); const PreCalcValues& pcv = *cs.pcv; Position posC0; - Position posC1 = pu.shareParentPos.offset((pu.shareParentSize.width/2), (pu.shareParentSize.height/2)); - + Position posC1 = pu.Y().center(); bool C0Avail = false; - bool C1Avail = (posC1.x < pcv.lumaWidth) && (posC1.y < pcv.lumaHeight); - if (((posRB.x + pcv.minCUWidth) < pcv.lumaWidth) && ((posRB.y + pcv.minCUHeight) < pcv.lumaHeight)) { + int posYInCtu = posRB.y & pcv.maxCUHeightMask; + if (posYInCtu + 4 < pcv.maxCUHeight) { - Position posInCtu( posRB.x & pcv.maxCUWidthMask, posRB.y & pcv.maxCUHeightMask ); - - if( ( posInCtu.x + 4 < pcv.maxCUWidth ) && // is not at the last column of CTU - ( posInCtu.y + 4 < pcv.maxCUHeight ) ) // is not at the last row of CTU - { - posC0 = posRB.offset( 4, 4 ); - C0Avail = true; - } - else if( posInCtu.x + 4 < pcv.maxCUWidth ) // is not at the last column of CTU But is last row of CTU - { - posC0 = posRB.offset( 4, 4 ); - // in the reference the CTU address is not set - thus probably resulting in no using this C0 possibility - } - else if( posInCtu.y + 4 < pcv.maxCUHeight ) // is not at the last row of CTU But is last column of CTU - { - posC0 = posRB.offset( 4, 4 ); - C0Avail = true; - } - else //is the right bottom corner of CTU - { - posC0 = posRB.offset( 4, 4 ); - // same as for last column but not last row - } + posC0 = posRB.offset(4, 4); + C0Avail = true; } } @@ -1581,9 +1063,8 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, int iRefIdx = 0; int dir = 0; unsigned uiArrayAddr = cnt; - bool bExistMV = ( C0Avail && getColocatedMVP(pu, REF_PIC_LIST_0, posC0, cColMv, iRefIdx ) ) - || ( C1Avail && getColocatedMVP(pu, REF_PIC_LIST_0, posC1, cColMv, iRefIdx )); - + bool bExistMV = ( C0Avail && getColocatedMVP(pu, REF_PIC_LIST_0, posC0, cColMv, iRefIdx, false ) ) + || getColocatedMVP( pu, REF_PIC_LIST_0, posC1, cColMv, iRefIdx, false ); if (bExistMV) { dir |= 1; @@ -1592,8 +1073,8 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, if (slice.isInterB()) { - bExistMV = ( C0Avail && getColocatedMVP(pu, REF_PIC_LIST_1, posC0, cColMv, iRefIdx ) ) - || (C1Avail && getColocatedMVP(pu, REF_PIC_LIST_1, posC1, cColMv, iRefIdx ) ); + bExistMV = ( C0Avail && getColocatedMVP(pu, REF_PIC_LIST_1, posC0, cColMv, iRefIdx, false ) ) + || getColocatedMVP( pu, REF_PIC_LIST_1, posC1, cColMv, iRefIdx, false ); if (bExistMV) { dir |= 2; @@ -1604,26 +1085,12 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, if( dir != 0 ) { bool addTMvp = true; -#if HM_JEM_MERGE_CANDS - int iSpanCand = cnt; - for( int i = 0; i < iSpanCand; i++ ) - { - if( mrgCtx.interDirNeighbours[ i ] == dir && - mrgCtx.mvFieldNeighbours [ i << 1 ] == mrgCtx.mvFieldNeighbours[ uiArrayAddr << 1 ] && - mrgCtx.mvFieldNeighbours [( i << 1 ) + 1] == mrgCtx.mvFieldNeighbours[( uiArrayAddr << 1 ) + 1] ) - { - addTMvp = false; - } - } -#endif if( addTMvp ) { mrgCtx.interDirNeighbours[uiArrayAddr] = dir; -#if !JVET_L0090_PAIR_AVG - isCandInter [uiArrayAddr] = true; -#endif - mrgCtx.GBiIdx[uiArrayAddr] = GBI_DEFAULT; - if (mrgCandIdx == cnt && canFastExit) + mrgCtx.BcwIdx[uiArrayAddr] = BCW_DEFAULT; + mrgCtx.useAltHpelIf[uiArrayAddr] = false; + if (mrgCandIdx == cnt) { return; } @@ -1642,32 +1109,19 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, int maxNumMergeCandMin1 = maxNumMergeCand - 1; if (cnt != maxNumMergeCandMin1) { - bool isAvailableSubPu = false; - unsigned subPuMvpPos = 0; -#if JVET_L0090_PAIR_AVG - bool isShared = ((pu.Y().lumaSize().width != pu.shareParentSize.width) || (pu.Y().lumaSize().height != pu.shareParentSize.height)); - bool bFound = addMergeHMVPCand(cs, mrgCtx, canFastExit - , mrgCandIdx - , maxNumMergeCandMin1, cnt - , spatialCandPos - , isAvailableSubPu, subPuMvpPos + bool isGt4x4 = true; + bool bFound = addMergeHMVPCand(cs, mrgCtx, mrgCandIdx, maxNumMergeCandMin1, cnt + , isAvailableA1, miLeft, isAvailableB1, miAbove , CU::isIBC(*pu.cu) - , isShared - ); -#else - bool bFound = addMergeHMVPCand(slice, mrgCtx, isCandInter, canFastExit - , (mmvdList != 0 && mrgCandIdx != -1) ? (const int)mrgCandIdxIBC : mrgCandIdx - , maxNumMergeCandMin1, cnt, cnt, isAvailableSubPu, subPuMvpPos - , mmvdList - ); -#endif + , isGt4x4 + ); + if (bFound) { return; } } -#if JVET_L0090_PAIR_AVG // pairwise-average candidates { @@ -1680,6 +1134,7 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, unsigned char interDir = 0; + mrgCtx.useAltHpelIf[cnt] = (mrgCtx.useAltHpelIf[0] == mrgCtx.useAltHpelIf[1]) ? mrgCtx.useAltHpelIf[0] : false; for( int refListId = 0; refListId < (slice.isInterB() ? 2 : 1); refListId++ ) { const short refIdxI = mrgCtx.mvFieldNeighbours[0 * 2 + refListId].refIdx; @@ -1731,53 +1186,8 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, return; } } -#endif uint32_t uiArrayAddr = cnt; -#if !JVET_L0090_PAIR_AVG - uint32_t uiCutoff = std::min( uiArrayAddr, 3u ); - if (slice.isInterB()) - { - static const uint32_t NUM_PRIORITY_LIST = 12; - static const uint32_t uiPriorityList0[NUM_PRIORITY_LIST] = { 0 , 1, 0, 2, 1, 2, 0, 3, 1, 3, 2, 3 }; - static const uint32_t uiPriorityList1[NUM_PRIORITY_LIST] = { 1 , 0, 2, 0, 2, 1, 3, 0, 3, 1, 3, 2 }; - - for (int idx = 0; idx < uiCutoff * (uiCutoff - 1) && uiArrayAddr != maxNumMergeCand; idx++) - { - CHECK( idx >= NUM_PRIORITY_LIST, "Invalid priority list number" ); - int i = uiPriorityList0[idx]; - int j = uiPriorityList1[idx]; - if (isCandInter[i] && isCandInter[j] && (mrgCtx.interDirNeighbours[i] & 0x1) && (mrgCtx.interDirNeighbours[j] & 0x2)) - { - isCandInter[uiArrayAddr] = true; - mrgCtx.interDirNeighbours[uiArrayAddr] = 3; - mrgCtx.GBiIdx[uiArrayAddr] = ((mrgCtx.interDirNeighbours[uiArrayAddr] == 3)) ? CU::deriveGbiIdx(mrgCtx.GBiIdx[i], mrgCtx.GBiIdx[j]) : GBI_DEFAULT; - - // get Mv from cand[i] and cand[j] - mrgCtx.mvFieldNeighbours[ uiArrayAddr << 1 ].setMvField(mrgCtx.mvFieldNeighbours[ i << 1 ].mv, mrgCtx.mvFieldNeighbours[ i << 1 ].refIdx); - mrgCtx.mvFieldNeighbours[(uiArrayAddr << 1) + 1].setMvField(mrgCtx.mvFieldNeighbours[(j << 1) + 1].mv, mrgCtx.mvFieldNeighbours[(j << 1) + 1].refIdx); - - int iRefPOCL0 = slice.getRefPOC(REF_PIC_LIST_0, mrgCtx.mvFieldNeighbours[(uiArrayAddr << 1) ].refIdx); - int iRefPOCL1 = slice.getRefPOC(REF_PIC_LIST_1, mrgCtx.mvFieldNeighbours[(uiArrayAddr << 1) + 1].refIdx); - - if( iRefPOCL0 == iRefPOCL1 && mrgCtx.mvFieldNeighbours[( uiArrayAddr << 1 )].mv == mrgCtx.mvFieldNeighbours[( uiArrayAddr << 1 ) + 1].mv ) - { - isCandInter[uiArrayAddr] = false; - } - else - { - uiArrayAddr++; - } - } - } - } - - // early termination - if (uiArrayAddr == maxNumMergeCand) - { - return; - } -#endif int iNumRefIdx = slice.isInterB() ? std::min(slice.getNumRefIdx(REF_PIC_LIST_0), slice.getNumRefIdx(REF_PIC_LIST_1)) : slice.getNumRefIdx(REF_PIC_LIST_0); @@ -1785,12 +1195,10 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, int refcnt = 0; while (uiArrayAddr < maxNumMergeCand) { -#if !JVET_L0090_PAIR_AVG - isCandInter [uiArrayAddr ] = true; -#endif mrgCtx.interDirNeighbours [uiArrayAddr ] = 1; - mrgCtx.GBiIdx [uiArrayAddr ] = GBI_DEFAULT; + mrgCtx.BcwIdx [uiArrayAddr ] = BCW_DEFAULT; mrgCtx.mvFieldNeighbours [uiArrayAddr << 1].setMvField(Mv(0, 0), r); + mrgCtx.useAltHpelIf[uiArrayAddr] = false; if (slice.isInterB()) { @@ -1817,18 +1225,35 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, } mrgCtx.numValidMergeCand = uiArrayAddr; } + bool PU::checkDMVRCondition(const PredictionUnit& pu) { - if (pu.cs->sps->getUseDMVR()) + WPScalingParam *wp0; + WPScalingParam *wp1; + int refIdx0 = pu.refIdx[REF_PIC_LIST_0]; + int refIdx1 = pu.refIdx[REF_PIC_LIST_1]; + pu.cu->slice->getWpScaling(REF_PIC_LIST_0, refIdx0, wp0); + pu.cu->slice->getWpScaling(REF_PIC_LIST_1, refIdx1, wp1); + if (pu.cs->sps->getUseDMVR() && (!pu.cs->picHeader->getDisDmvrFlag())) { return pu.mergeFlag && pu.mergeType == MRG_TYPE_DEFAULT_N + && !pu.ciipFlag && !pu.cu->affine && !pu.mmvdMergeFlag && !pu.cu->mmvdSkip && PU::isBiPredFromDifferentDirEqDistPoc(pu) && (pu.lheight() >= 8) - && ((pu.lheight() * pu.lwidth()) >= 64) + && (pu.lwidth() >= 8) + && ((pu.lheight() * pu.lwidth()) >= 128) + && (pu.cu->BcwIdx == BCW_DEFAULT) + && ((!wp0[COMPONENT_Y].bPresentFlag) && (!wp1[COMPONENT_Y].bPresentFlag)) +#if JVET_Q0487_SCALING_WINDOW_ISSUES + && ( refIdx0 < 0 ? true : (pu.cu->slice->getRefPic( REF_PIC_LIST_0, refIdx0 )->isRefScaled( pu.cs->pps ) == false) ) + && ( refIdx1 < 0 ? true : (pu.cu->slice->getRefPic( REF_PIC_LIST_1, refIdx1 )->isRefScaled( pu.cs->pps ) == false) ) +#else + && ( refIdx0 < 0 ? true : pu.cu->slice->getScalingRatio( REF_PIC_LIST_0, refIdx0 ) == SCALE_1X ) && ( refIdx1 < 0 ? true : pu.cu->slice->getScalingRatio( REF_PIC_LIST_1, refIdx1 ) == SCALE_1X ) +#endif ; } else @@ -1836,80 +1261,7 @@ bool PU::checkDMVRCondition(const PredictionUnit& pu) return false; } } -// for ibc pu validation -bool PU::isBlockVectorValid(PredictionUnit& pu, int xPos, int yPos, int width, int height, int picWidth, int picHeight, int xStartInCU, int yStartInCU, int xBv, int yBv, int ctuSize) -{ - const int ctuSizeLog2 = g_aucLog2[ctuSize]; - - int refRightX = xPos + xBv + width - 1; - int refBottomY = yPos + yBv + height - 1; - - int refLeftX = xPos + xBv; - int refTopY = yPos + yBv; - - if ((xPos + xBv) < 0) - { - return false; - } - if (refRightX >= picWidth) - { - return false; - } - - if ((yPos + yBv) < 0) - { - return false; - } - if (refBottomY >= picHeight) - { - return false; - } - if ((xBv + width) > 0 && (yBv + height) > 0) - { - return false; - } - // cannot be in the above CTU row - if (refTopY >> ctuSizeLog2 < yPos >> ctuSizeLog2) - return false; - - // cannot be in the below CTU row - if (refBottomY >> ctuSizeLog2 > yPos >> ctuSizeLog2) - { - return false; - } - - // in the same CTU line - if ((refRightX >> ctuSizeLog2 <= xPos >> ctuSizeLog2) && (refLeftX >> ctuSizeLog2 >= (xPos >> ctuSizeLog2) - 1)) - { - - // in the same CTU, or left CTU - // if part of ref block is in the left CTU, some area can be referred from the not-yet updated local CTU buffer - if ((refLeftX >> ctuSizeLog2) == ((xPos >> ctuSizeLog2) - 1)) - { - // ref block's collocated block in current CTU - const Position refPosCol = pu.Y().topLeft().offset(xBv + ctuSize, yBv); - int offset64x = (refPosCol.x >> (ctuSizeLog2 - 1)) << (ctuSizeLog2 - 1); - int offset64y = (refPosCol.y >> (ctuSizeLog2 - 1)) << (ctuSizeLog2 - 1); - const Position refPosCol64x64 = {offset64x, offset64y}; - if (pu.cs->isDecomp(refPosCol64x64, toChannelType(COMPONENT_Y))) - return false; - } - } - else - return false; - - // in the same CTU, or valid area from left CTU. Check if the reference block is already coded - const Position refPosLT = pu.Y().topLeft().offset(xBv, yBv); - const Position refPosBR = pu.Y().bottomRight().offset(xBv, yBv); - const ChannelType chType = toChannelType(COMPONENT_Y); - if (!pu.cs->isDecomp(refPosBR, chType)) - return false; - if (!pu.cs->isDecomp(refPosLT, chType)) - return false; - return true; - -}// for ibc pu validation static int xGetDistScaleFactor(const int &iCurrPOC, const int &iCurrRefPOC, const int &iColPOC, const int &iColRefPOC) { @@ -1999,6 +1351,7 @@ void PU::getInterMMVDMergeCandidates(const PredictionUnit &pu, MergeCtx& mrgCtx, mrgCtx.mmvdBaseMv[currBaseNum][0] = MvField(Mv(0, 0), -1); mrgCtx.mmvdBaseMv[currBaseNum][1] = mrgCtx.mvFieldNeighbours[(k << 1) + 1]; } + mrgCtx.mmvdUseAltHpelIf[currBaseNum] = mrgCtx.useAltHpelIf[k]; currBaseNum++; @@ -2006,20 +1359,8 @@ void PU::getInterMMVDMergeCandidates(const PredictionUnit &pu, MergeCtx& mrgCtx, break; } } - - if (currBaseNum < MMVD_BASE_MV_NUM) - { - for (k = currBaseNum; k < MMVD_BASE_MV_NUM; k++) - { - mrgCtx.mmvdBaseMv[k][0] = MvField(Mv(0, 0), 0); - const Slice &slice = *pu.cs->slice; - mrgCtx.mmvdBaseMv[k][1] = MvField(Mv(0, 0), (slice.isInterB() ? 0 : -1)); - mrgCtx.GBiIdx[k] = GBI_DEFAULT; - mrgCtx.interDirNeighbours[k] = (mrgCtx.mmvdBaseMv[k][0].refIdx >= 0) + (mrgCtx.mmvdBaseMv[k][1].refIdx >= 0) * 2; - } - } } -bool PU::getColocatedMVP(const PredictionUnit &pu, const RefPicList &eRefPicList, const Position &_pos, Mv& rcMv, const int &refIdx ) +bool PU::getColocatedMVP(const PredictionUnit &pu, const RefPicList &eRefPicList, const Position &_pos, Mv& rcMv, const int &refIdx, bool sbFlag) { // don't perform MV compression when generally disabled or subPuMvp is used const unsigned scale = 4 * std::max<int>(1, 4 * AMVP_DECIMATION_FACTOR / 4); @@ -2055,14 +1396,26 @@ bool PU::getColocatedMVP(const PredictionUnit &pu, const RefPicList &eRefPicList } int iColRefIdx = mi.refIdx[eColRefPicList]; - if (iColRefIdx < 0) + if (sbFlag && !slice.getCheckLDC()) + { + eColRefPicList = eRefPicList; + iColRefIdx = mi.refIdx[eColRefPicList]; + if (iColRefIdx < 0) + { + return false; + } + } + else { - eColRefPicList = RefPicList(1 - eColRefPicList); - iColRefIdx = mi.refIdx[eColRefPicList]; - if (iColRefIdx < 0) { - return false; + eColRefPicList = RefPicList(1 - eColRefPicList); + iColRefIdx = mi.refIdx[eColRefPicList]; + + if (iColRefIdx < 0) + { + return false; + } } } @@ -2127,90 +1480,124 @@ bool PU::isDiffMER(const PredictionUnit &pu1, const PredictionUnit &pu2) const unsigned xP = pu2.lumaPos().x; const unsigned yP = pu2.lumaPos().y; - unsigned plevel = pu1.cs->pps->getLog2ParallelMergeLevelMinus2() + 2; - - if ((xN >> plevel) != (xP >> plevel)) + if ((xN >> 2) != (xP >> 2)) { return true; } - if ((yN >> plevel) != (yP >> plevel)) + if ((yN >> 2) != (yP >> 2)) { return true; } return false; } -void PU::getIbcMVPsEncOnly(PredictionUnit &pu, Mv* MvPred, int& nbPred) + +bool PU::isAddNeighborMv(const Mv& currMv, Mv* neighborMvs, int numNeighborMv) { + bool existed = false; + for (uint32_t cand = 0; cand < numNeighborMv && !existed; cand++) + { + if (currMv == neighborMvs[cand]) + { + existed = true; + } + } - //-- Get Spatial MV - Position posLT = pu.Y().topLeft(); - Position posRT = pu.Y().topRight(); - Position posLB = pu.Y().bottomLeft(); + if (!existed) + { + return true; + } + else + { + return false; + } +} - unsigned int left = 0, above = 0; +void PU::getIbcMVPsEncOnly(PredictionUnit &pu, Mv* mvPred, int& nbPred) +{ + const PreCalcValues &pcv = *pu.cs->pcv; + const int cuWidth = pu.blocks[COMPONENT_Y].width; + const int cuHeight = pu.blocks[COMPONENT_Y].height; + const int log2UnitWidth = floorLog2(pcv.minCUWidth); + const int log2UnitHeight = floorLog2(pcv.minCUHeight); + const int totalAboveUnits = (cuWidth >> log2UnitWidth) + 1; + const int totalLeftUnits = (cuHeight >> log2UnitHeight) + 1; - //left - const PredictionUnit *neibLeftPU = NULL; - neibLeftPU = pu.cs->getPURestricted(posLB.offset(-1, 0), pu, pu.cs->chType); - left = (neibLeftPU) ? CU::isIBC(*neibLeftPU->cu) : 0; + nbPred = 0; + Position posLT = pu.Y().topLeft(); - if (left) + // above-left + const PredictionUnit *aboveLeftPU = pu.cs->getPURestricted(posLT.offset(-1, -1), pu, CHANNEL_TYPE_LUMA); + if (aboveLeftPU && CU::isIBC(*aboveLeftPU->cu)) { - MvPred[nbPred++] = neibLeftPU->bv; - if (getDerivedBV(pu, MvPred[nbPred - 1], MvPred[nbPred])) - nbPred++; + if (isAddNeighborMv(aboveLeftPU->bv, mvPred, nbPred)) + { + mvPred[nbPred++] = aboveLeftPU->bv; + } } - //above - const PredictionUnit *neibAbovePU = NULL; - neibAbovePU = pu.cs->getPURestricted(posRT.offset(0, -1), pu, pu.cs->chType); - above = (neibAbovePU) ? CU::isIBC(*neibAbovePU->cu) : 0; - - if (above) + // above neighbors + for (uint32_t dx = 0; dx < totalAboveUnits && nbPred < IBC_NUM_CANDIDATES; dx++) { - MvPred[nbPred++] = neibAbovePU->bv; - if (getDerivedBV(pu, MvPred[nbPred - 1], MvPred[nbPred])) - nbPred++; + const PredictionUnit* tmpPU = pu.cs->getPURestricted(posLT.offset((dx << log2UnitWidth), -1), pu, CHANNEL_TYPE_LUMA); + if (tmpPU && CU::isIBC(*tmpPU->cu)) + { + if (isAddNeighborMv(tmpPU->bv, mvPred, nbPred)) + { + mvPred[nbPred++] = tmpPU->bv; + } + } } - // Below Left predictor search - const PredictionUnit *neibBelowLeftPU = NULL; - neibBelowLeftPU = pu.cs->getPURestricted(posLB.offset(-1, 1), pu, pu.cs->chType); - unsigned int belowLeft = (neibBelowLeftPU) ? CU::isIBC(*neibBelowLeftPU->cu) : 0; - - if (belowLeft) + // left neighbors + for (uint32_t dy = 0; dy < totalLeftUnits && nbPred < IBC_NUM_CANDIDATES; dy++) { - MvPred[nbPred++] = neibBelowLeftPU->bv; - if (getDerivedBV(pu, MvPred[nbPred - 1], MvPred[nbPred])) - nbPred++; + const PredictionUnit* tmpPU = pu.cs->getPURestricted(posLT.offset(-1, (dy << log2UnitHeight)), pu, CHANNEL_TYPE_LUMA); + if (tmpPU && CU::isIBC(*tmpPU->cu)) + { + if (isAddNeighborMv(tmpPU->bv, mvPred, nbPred)) + { + mvPred[nbPred++] = tmpPU->bv; + } + } } - - // Above Right predictor search - const PredictionUnit *neibAboveRightPU = NULL; - neibAboveRightPU = pu.cs->getPURestricted(posRT.offset(1, -1), pu, pu.cs->chType); - unsigned int aboveRight = (neibAboveRightPU) ? CU::isIBC(*neibAboveRightPU->cu) : 0; - - if (aboveRight) + size_t numAvaiCandInLUT = pu.cs->motionLut.lutIbc.size(); + for (uint32_t cand = 0; cand < numAvaiCandInLUT && nbPred < IBC_NUM_CANDIDATES; cand++) { - MvPred[nbPred++] = neibAboveRightPU->bv; - if (getDerivedBV(pu, MvPred[nbPred - 1], MvPred[nbPred])) - nbPred++; + MotionInfo neibMi = pu.cs->motionLut.lutIbc[cand]; + if (isAddNeighborMv(neibMi.bv, mvPred, nbPred)) + { + mvPred[nbPred++] = neibMi.bv; + } } + bool isBvCandDerived[IBC_NUM_CANDIDATES]; + ::memset(isBvCandDerived, false, IBC_NUM_CANDIDATES); - // Above Left predictor search - const PredictionUnit *neibAboveLeftPU = NULL; - neibAboveLeftPU = pu.cs->getPURestricted(posLT.offset(-1, -1), pu, pu.cs->chType); - unsigned int aboveLeft = (neibAboveLeftPU) ? CU::isIBC(*neibAboveLeftPU->cu) : 0; - - if (aboveLeft) + int curNbPred = nbPred; + if (curNbPred < IBC_NUM_CANDIDATES) { - MvPred[nbPred++] = neibAboveLeftPU->bv; - if (getDerivedBV(pu, MvPred[nbPred - 1], MvPred[nbPred])) - nbPred++; + do + { + curNbPred = nbPred; + for (uint32_t idx = 0; idx < curNbPred && nbPred < IBC_NUM_CANDIDATES; idx++) + { + if (!isBvCandDerived[idx]) + { + Mv derivedBv; + if (getDerivedBV(pu, mvPred[idx], derivedBv)) + { + if (isAddNeighborMv(derivedBv, mvPred, nbPred)) + { + mvPred[nbPred++] = derivedBv; + } + } + isBvCandDerived[idx] = true; + } + } + } while (nbPred > curNbPred && nbPred < IBC_NUM_CANDIDATES); } } @@ -2224,13 +1611,13 @@ bool PU::getDerivedBV(PredictionUnit &pu, const Mv& currentMv, Mv& derivedMv) int offsetY = currentMv.getVer(); - if (rX < 0 || rY < 0 || rX >= pu.cs->slice->getSPS()->getPicWidthInLumaSamples() || rY >= pu.cs->slice->getSPS()->getPicHeightInLumaSamples()) + if( rX < 0 || rY < 0 || rX >= pu.cs->slice->getPPS()->getPicWidthInLumaSamples() || rY >= pu.cs->slice->getPPS()->getPicHeightInLumaSamples() ) { return false; } const PredictionUnit *neibRefPU = NULL; - neibRefPU = pu.cs->getPURestricted(pu.lumaPos().offset(offsetX, offsetY), pu, pu.cs->chType); + neibRefPU = pu.cs->getPURestricted(pu.lumaPos().offset(offsetX, offsetY), pu, CHANNEL_TYPE_LUMA); bool isIBC = (neibRefPU) ? CU::isIBC(*neibRefPU->cu) : 0; if (isIBC) @@ -2246,83 +1633,25 @@ bool PU::getDerivedBV(PredictionUnit &pu, const Mv& currentMv, Mv& derivedMv) */ void PU::fillIBCMvpCand(PredictionUnit &pu, AMVPInfo &amvpInfo) { - CodingStructure &cs = *pu.cs; AMVPInfo *pInfo = &amvpInfo; pInfo->numCand = 0; - //-- Get Spatial MV - Position posLT = pu.Y().topLeft(); - Position posRT = pu.Y().topRight(); - Position posLB = pu.Y().bottomLeft(); - - bool isScaledFlagLX = false; /// variable name from specification; true when the PUs below left or left are available (availableA0 || availableA1). - - const PredictionUnit* tmpPU = cs.getPURestricted(posLB.offset(-1, 1), pu, pu.chType); // getPUBelowLeft(idx, partIdxLB); - isScaledFlagLX = tmpPU != NULL && CU::isIBC(*tmpPU->cu); - if (!isScaledFlagLX) - { - tmpPU = cs.getPURestricted(posLB.offset(-1, 0), pu, pu.chType); - isScaledFlagLX = tmpPU != NULL && CU::isIBC(*tmpPU->cu); - } - - // Left predictor search - if (isScaledFlagLX) - { - bool isAdded = addIBCMVPCand(pu, posLB, MD_BELOW_LEFT, *pInfo); - - if (!isAdded) - { - isAdded = addIBCMVPCand(pu, posLB, MD_LEFT, *pInfo); - } - } - - // Above predictor search - bool isAdded = addIBCMVPCand(pu, posRT, MD_ABOVE_RIGHT, *pInfo); - - if (!isAdded) - { - isAdded = addIBCMVPCand(pu, posRT, MD_ABOVE, *pInfo); - - if (!isAdded) - { - addIBCMVPCand(pu, posLT, MD_ABOVE_LEFT, *pInfo); - } - } - - for( int i = 0; i < pInfo->numCand; i++ ) - { - pInfo->mvCand[i].roundToAmvrSignalPrecision(MV_PRECISION_INTERNAL, pu.cu->imv); - } - - if (pInfo->numCand == 2) - { - if (pInfo->mvCand[0] == pInfo->mvCand[1]) - { - pInfo->numCand = 1; - } - } - - if (pInfo->numCand < AMVP_MAX_NUM_CANDS) - { - addAMVPHMVPCand(pu, REF_PIC_LIST_0, REF_PIC_LIST_1, cs.slice->getPOC(), *pInfo, pu.cu->imv); - } - - if (pInfo->numCand > AMVP_MAX_NUM_CANDS) - { - pInfo->numCand = AMVP_MAX_NUM_CANDS; - } + MergeCtx mergeCtx; + PU::getIBCMergeCandidates(pu, mergeCtx, AMVP_MAX_NUM_CANDS - 1); + int candIdx = 0; while (pInfo->numCand < AMVP_MAX_NUM_CANDS) { - pInfo->mvCand[pInfo->numCand] = Mv(0, 0); + pInfo->mvCand[pInfo->numCand] = mergeCtx.mvFieldNeighbours[(candIdx << 1) + 0].mv;; pInfo->numCand++; + candIdx++; } for (Mv &mv : pInfo->mvCand) { - mv.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER); + mv.roundIbcPrecInternal2Amvr(pu.cu->imv); } } @@ -2352,21 +1681,6 @@ void PU::fillMvpCand(PredictionUnit &pu, const RefPicList &eRefPicList, const in Position posRT = pu.Y().topRight(); Position posLB = pu.Y().bottomLeft(); - bool isScaledFlagLX = false; /// variable name from specification; true when the PUs below left or left are available (availableA0 || availableA1). - - { - const PredictionUnit* tmpPU = cs.getPURestricted( posLB.offset( -1, 1 ), pu, pu.chType ); // getPUBelowLeft(idx, partIdxLB); - isScaledFlagLX = tmpPU != NULL && CU::isInter( *tmpPU->cu ); - - if( !isScaledFlagLX ) - { - tmpPU = cs.getPURestricted( posLB.offset( -1, 0 ), pu, pu.chType ); - isScaledFlagLX = tmpPU != NULL && CU::isInter( *tmpPU->cu ); - } - } - - // Left predictor search - if( isScaledFlagLX ) { bool bAdded = addMVPCandUnscaled( pu, eRefPicList, refIdx, posLB, MD_BELOW_LEFT, *pInfo ); @@ -2374,15 +1688,6 @@ void PU::fillMvpCand(PredictionUnit &pu, const RefPicList &eRefPicList, const in { bAdded = addMVPCandUnscaled( pu, eRefPicList, refIdx, posLB, MD_LEFT, *pInfo ); - if( !bAdded ) - { - bAdded = addMVPCandWithScaling( pu, eRefPicList, refIdx, posLB, MD_BELOW_LEFT, *pInfo ); - - if( !bAdded ) - { - addMVPCandWithScaling( pu, eRefPicList, refIdx, posLB, MD_LEFT, *pInfo ); - } - } } } @@ -2401,24 +1706,10 @@ void PU::fillMvpCand(PredictionUnit &pu, const RefPicList &eRefPicList, const in } } - if( !isScaledFlagLX ) - { - bool bAdded = addMVPCandWithScaling( pu, eRefPicList, refIdx, posRT, MD_ABOVE_RIGHT, *pInfo ); - - if( !bAdded ) - { - bAdded = addMVPCandWithScaling( pu, eRefPicList, refIdx, posRT, MD_ABOVE, *pInfo ); - - if( !bAdded ) - { - addMVPCandWithScaling( pu, eRefPicList, refIdx, posLT, MD_ABOVE_LEFT, *pInfo ); - } - } - } for( int i = 0; i < pInfo->numCand; i++ ) { - pInfo->mvCand[i].roundToAmvrSignalPrecision(MV_PRECISION_INTERNAL, pu.cu->imv); + pInfo->mvCand[i].roundTransPrecInternal2Amvr(pu.cu->imv); } if( pInfo->numCand == 2 ) @@ -2429,7 +1720,7 @@ void PU::fillMvpCand(PredictionUnit &pu, const RefPicList &eRefPicList, const in } } - if( cs.slice->getEnableTMVPFlag() && pInfo->numCand < AMVP_MAX_NUM_CANDS ) + if (cs.picHeader->getEnableTMVPFlag() && pInfo->numCand < AMVP_MAX_NUM_CANDS && (pu.lumaSize().width + pu.lumaSize().height > 12)) { // Get Temporal Motion Predictor const int refIdx_Col = refIdx; @@ -2441,49 +1732,28 @@ void PU::fillMvpCand(PredictionUnit &pu, const RefPicList &eRefPicList, const in Position posC0; bool C0Avail = false; Position posC1 = pu.Y().center(); - bool C1Avail = ( posC1.x < pcv.lumaWidth ) && ( posC1.y < pcv.lumaHeight ) ; - Mv cColMv; if( ( ( posRB.x + pcv.minCUWidth ) < pcv.lumaWidth ) && ( ( posRB.y + pcv.minCUHeight ) < pcv.lumaHeight ) ) { - Position posInCtu( posRB.x & pcv.maxCUWidthMask, posRB.y & pcv.maxCUHeightMask ); - - if ((posInCtu.x + 4 < pcv.maxCUWidth) && // is not at the last column of CTU - (posInCtu.y + 4 < pcv.maxCUHeight)) // is not at the last row of CTU - { - posC0 = posRB.offset(4, 4); - C0Avail = true; - } - else if (posInCtu.x + 4 < pcv.maxCUWidth) // is not at the last column of CTU But is last row of CTU - { - // in the reference the CTU address is not set - thus probably resulting in no using this C0 possibility - posC0 = posRB.offset(4, 4); - } - else if (posInCtu.y + 4 < pcv.maxCUHeight) // is not at the last row of CTU But is last column of CTU + int posYInCtu = posRB.y & pcv.maxCUHeightMask; + if (posYInCtu + 4 < pcv.maxCUHeight) { posC0 = posRB.offset(4, 4); C0Avail = true; } - else //is the right bottom corner of CTU - { - // same as for last column but not last row - posC0 = posRB.offset(4, 4); - } } - - if ((C0Avail && getColocatedMVP(pu, eRefPicList, posC0, cColMv, refIdx_Col)) || (C1Avail && getColocatedMVP(pu, eRefPicList, posC1, cColMv, refIdx_Col))) + if ( ( C0Avail && getColocatedMVP( pu, eRefPicList, posC0, cColMv, refIdx_Col, false ) ) || getColocatedMVP( pu, eRefPicList, posC1, cColMv, refIdx_Col, false ) ) { - cColMv.roundToAmvrSignalPrecision(MV_PRECISION_INTERNAL, pu.cu->imv); + cColMv.roundTransPrecInternal2Amvr(pu.cu->imv); pInfo->mvCand[pInfo->numCand++] = cColMv; } } if (pInfo->numCand < AMVP_MAX_NUM_CANDS) { - const int currRefPOC = cs.slice->getRefPic(eRefPicList, refIdx)->getPOC(); - const RefPicList eRefPicList2nd = (eRefPicList == REF_PIC_LIST_0) ? REF_PIC_LIST_1 : REF_PIC_LIST_0; - addAMVPHMVPCand(pu, eRefPicList, eRefPicList2nd, currRefPOC, *pInfo, pu.cu->imv); + const int currRefPOC = cs.slice->getRefPic(eRefPicList, refIdx)->getPOC(); + addAMVPHMVPCand(pu, eRefPicList, currRefPOC, *pInfo); } if (pInfo->numCand > AMVP_MAX_NUM_CANDS) @@ -2499,7 +1769,7 @@ void PU::fillMvpCand(PredictionUnit &pu, const RefPicList &eRefPicList, const in for (Mv &mv : pInfo->mvCand) { - mv.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER); + mv.roundTransPrecInternal2Amvr(pu.cu->imv); } } @@ -2556,28 +1826,13 @@ bool PU::addAffineMVPCandUnscaled( const PredictionUnit &pu, const RefPicList &r } xInheritedAffineMv( pu, neibPU, eRefPicListIndex, outputAffineMv ); - if ( pu.cu->imv == 0 ) - { - outputAffineMv[0].roundToPrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER); - outputAffineMv[1].roundToPrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER); - } - else if ( pu.cu->imv == 2 ) - { - outputAffineMv[0].roundToPrecision( MV_PRECISION_INTERNAL, MV_PRECISION_INT ); - outputAffineMv[1].roundToPrecision( MV_PRECISION_INTERNAL, MV_PRECISION_INT ); - } + outputAffineMv[0].roundAffinePrecInternal2Amvr(pu.cu->imv); + outputAffineMv[1].roundAffinePrecInternal2Amvr(pu.cu->imv); affiAMVPInfo.mvCandLT[affiAMVPInfo.numCand] = outputAffineMv[0]; affiAMVPInfo.mvCandRT[affiAMVPInfo.numCand] = outputAffineMv[1]; if ( pu.cu->affineType == AFFINEMODEL_6PARAM ) { - if ( pu.cu->imv == 0 ) - { - outputAffineMv[2].roundToPrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER); - } - else if ( pu.cu->imv == 2 ) - { - outputAffineMv[2].roundToPrecision( MV_PRECISION_INTERNAL, MV_PRECISION_INT ); - } + outputAffineMv[2].roundAffinePrecInternal2Amvr(pu.cu->imv); affiAMVPInfo.mvCandLB[affiAMVPInfo.numCand] = outputAffineMv[2]; } affiAMVPInfo.numCand++; @@ -2619,12 +1874,12 @@ void PU::xInheritedAffineMv( const PredictionUnit &pu, const PredictionUnit* puN int shift = MAX_CU_DEPTH; int iDMvHorX, iDMvHorY, iDMvVerX, iDMvVerY; - iDMvHorX = (mvRT - mvLT).getHor() << (shift - g_aucLog2[neiW]); - iDMvHorY = (mvRT - mvLT).getVer() << (shift - g_aucLog2[neiW]); + iDMvHorX = (mvRT - mvLT).getHor() << (shift - floorLog2(neiW)); + iDMvHorY = (mvRT - mvLT).getVer() << (shift - floorLog2(neiW)); if ( puNeighbour->cu->affineType == AFFINEMODEL_6PARAM && !isTopCtuBoundary ) { - iDMvVerX = (mvLB - mvLT).getHor() << (shift - g_aucLog2[neiH]); - iDMvVerY = (mvLB - mvLT).getVer() << (shift - g_aucLog2[neiH]); + iDMvVerX = (mvLB - mvLT).getHor() << (shift - floorLog2(neiH)); + iDMvVerY = (mvLB - mvLT).getVer() << (shift - floorLog2(neiH)); } else { @@ -2700,12 +1955,9 @@ void PU::fillAffineMvpCand(PredictionUnit &pu, const RefPicList &eRefPicList, co { for (int i = 0; i < affiAMVPInfo.numCand; i++) { - if ( pu.cu->imv != 1 ) - { - affiAMVPInfo.mvCandLT[i].changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER); - affiAMVPInfo.mvCandRT[i].changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER); - affiAMVPInfo.mvCandLB[i].changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER); - } + affiAMVPInfo.mvCandLT[i].roundAffinePrecInternal2Amvr(pu.cu->imv); + affiAMVPInfo.mvCandRT[i].roundAffinePrecInternal2Amvr(pu.cu->imv); + affiAMVPInfo.mvCandLB[i].roundAffinePrecInternal2Amvr(pu.cu->imv); } return; } @@ -2757,18 +2009,9 @@ void PU::fillAffineMvpCand(PredictionUnit &pu, const RefPicList &eRefPicList, co outputAffineMv[1] = amvpInfo1.mvCand[0]; outputAffineMv[2] = amvpInfo2.mvCand[0]; - if ( pu.cu->imv == 0 ) - { - outputAffineMv[0].roundToPrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER); - outputAffineMv[1].roundToPrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER); - outputAffineMv[2].roundToPrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER); - } - else if ( pu.cu->imv == 2 ) - { - outputAffineMv[0].roundToPrecision( MV_PRECISION_INTERNAL, MV_PRECISION_INT ); - outputAffineMv[1].roundToPrecision( MV_PRECISION_INTERNAL, MV_PRECISION_INT ); - outputAffineMv[2].roundToPrecision( MV_PRECISION_INTERNAL, MV_PRECISION_INT ); - } + outputAffineMv[0].roundAffinePrecInternal2Amvr(pu.cu->imv); + outputAffineMv[1].roundAffinePrecInternal2Amvr(pu.cu->imv); + outputAffineMv[2].roundAffinePrecInternal2Amvr(pu.cu->imv); if ( cornerMVPattern == 7 || (cornerMVPattern == 3 && pu.cu->affineType == AFFINEMODEL_4PARAM) ) { @@ -2794,7 +2037,7 @@ void PU::fillAffineMvpCand(PredictionUnit &pu, const RefPicList &eRefPicList, co } // Get Temporal Motion Predictor - if ( affiAMVPInfo.numCand < 2 && pu.cs->slice->getEnableTMVPFlag() ) + if ( affiAMVPInfo.numCand < 2 && pu.cs->picHeader->getEnableTMVPFlag() ) { const int refIdxCol = refIdx; @@ -2805,46 +2048,19 @@ void PU::fillAffineMvpCand(PredictionUnit &pu, const RefPicList &eRefPicList, co Position posC0; bool C0Avail = false; Position posC1 = pu.Y().center(); - bool C1Avail = ( posC1.x < pcv.lumaWidth ) && ( posC1.y < pcv.lumaHeight ) ; - Mv cColMv; if ( ((posRB.x + pcv.minCUWidth) < pcv.lumaWidth) && ((posRB.y + pcv.minCUHeight) < pcv.lumaHeight) ) { - Position posInCtu( posRB.x & pcv.maxCUWidthMask, posRB.y & pcv.maxCUHeightMask ); - - if ( (posInCtu.x + 4 < pcv.maxCUWidth) && // is not at the last column of CTU - (posInCtu.y + 4 < pcv.maxCUHeight) ) // is not at the last row of CTU - { - posC0 = posRB.offset( 4, 4 ); - C0Avail = true; - } - else if ( posInCtu.x + 4 < pcv.maxCUWidth ) // is not at the last column of CTU But is last row of CTU - { - // in the reference the CTU address is not set - thus probably resulting in no using this C0 possibility - posC0 = posRB.offset( 4, 4 ); - } - else if ( posInCtu.y + 4 < pcv.maxCUHeight ) // is not at the last row of CTU But is last column of CTU + int posYInCtu = posRB.y & pcv.maxCUHeightMask; + if (posYInCtu + 4 < pcv.maxCUHeight) { - posC0 = posRB.offset( 4, 4 ); + posC0 = posRB.offset(4, 4); C0Avail = true; } - else //is the right bottom corner of CTU - { - // same as for last column but not last row - posC0 = posRB.offset( 4, 4 ); - } } - - if ( (C0Avail && getColocatedMVP( pu, eRefPicList, posC0, cColMv, refIdxCol )) || (C1Avail && getColocatedMVP( pu, eRefPicList, posC1, cColMv, refIdxCol ) ) ) + if ( ( C0Avail && getColocatedMVP( pu, eRefPicList, posC0, cColMv, refIdxCol, false ) ) || getColocatedMVP( pu, eRefPicList, posC1, cColMv, refIdxCol, false ) ) { - if ( pu.cu->imv == 0 ) - { - cColMv.roundToPrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER); - } - else if ( pu.cu->imv == 2 ) - { - cColMv.roundToPrecision( MV_PRECISION_INTERNAL, MV_PRECISION_INT ); - } + cColMv.roundAffinePrecInternal2Amvr(pu.cu->imv); affiAMVPInfo.mvCandLT[affiAMVPInfo.numCand] = cColMv; affiAMVPInfo.mvCandRT[affiAMVPInfo.numCand] = cColMv; affiAMVPInfo.mvCandLB[affiAMVPInfo.numCand] = cColMv; @@ -2866,127 +2082,20 @@ void PU::fillAffineMvpCand(PredictionUnit &pu, const RefPicList &eRefPicList, co } for (int i = 0; i < affiAMVPInfo.numCand; i++) - { - if ( pu.cu->imv != 1 ) - { - affiAMVPInfo.mvCandLT[i].changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER); - affiAMVPInfo.mvCandRT[i].changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER); - affiAMVPInfo.mvCandLB[i].changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER); - } - } - - -} - -bool PU::addIBCMVPCand(const PredictionUnit &pu, const Position &pos, const MvpDir &eDir, AMVPInfo &info) -{ - CodingStructure &cs = *pu.cs; - const PredictionUnit *neibPU = NULL; - Position neibPos; - - switch (eDir) - { - case MD_LEFT: - neibPos = pos.offset(-1, 0); - break; - case MD_ABOVE: - neibPos = pos.offset(0, -1); - break; - case MD_ABOVE_RIGHT: - neibPos = pos.offset(1, -1); - break; - case MD_BELOW_LEFT: - neibPos = pos.offset(-1, 1); - break; - case MD_ABOVE_LEFT: - neibPos = pos.offset(-1, -1); - break; - default: - break; - } - - neibPU = cs.getPURestricted(neibPos, pu, pu.chType); - - if (neibPU == NULL || CU::isIBC(*neibPU->cu)==false) - { - return false; - } - - const MotionInfo& neibMi = neibPU->getMotionInfo(neibPos); - info.mvCand[info.numCand++] = neibMi.mv[REF_PIC_LIST_0]; - return true; -} - -bool PU::addMVPCandUnscaled( const PredictionUnit &pu, const RefPicList &eRefPicList, const int &iRefIdx, const Position &pos, const MvpDir &eDir, AMVPInfo &info ) -{ - CodingStructure &cs = *pu.cs; - const PredictionUnit *neibPU = NULL; - Position neibPos; - - switch (eDir) - { - case MD_LEFT: - neibPos = pos.offset( -1, 0 ); - break; - case MD_ABOVE: - neibPos = pos.offset( 0, -1 ); - break; - case MD_ABOVE_RIGHT: - neibPos = pos.offset( 1, -1 ); - break; - case MD_BELOW_LEFT: - neibPos = pos.offset( -1, 1 ); - break; - case MD_ABOVE_LEFT: - neibPos = pos.offset( -1, -1 ); - break; - default: - break; - } - - neibPU = cs.getPURestricted( neibPos, pu, pu.chType ); - - if( neibPU == NULL || !CU::isInter( *neibPU->cu ) ) - { - return false; - } - - const MotionInfo& neibMi = neibPU->getMotionInfo( neibPos ); - - const int currRefPOC = cs.slice->getRefPic( eRefPicList, iRefIdx )->getPOC(); - const RefPicList eRefPicList2nd = ( eRefPicList == REF_PIC_LIST_0 ) ? REF_PIC_LIST_1 : REF_PIC_LIST_0; - - for( int predictorSource = 0; predictorSource < 2; predictorSource++ ) // examine the indicated reference picture list, then if not available, examine the other list. - { - const RefPicList eRefPicListIndex = ( predictorSource == 0 ) ? eRefPicList : eRefPicList2nd; - const int neibRefIdx = neibMi.refIdx[eRefPicListIndex]; - - if( neibRefIdx >= 0 && currRefPOC == cs.slice->getRefPOC( eRefPicListIndex, neibRefIdx ) ) - { - info.mvCand[info.numCand++] = neibMi.mv[eRefPicListIndex]; - return true; - } - } - - return false; -} - -/** -* \param pInfo -* \param eRefPicList -* \param iRefIdx -* \param uiPartUnitIdx -* \param eDir -* \returns bool -*/ -bool PU::addMVPCandWithScaling( const PredictionUnit &pu, const RefPicList &eRefPicList, const int &iRefIdx, const Position &pos, const MvpDir &eDir, AMVPInfo &info ) + { + affiAMVPInfo.mvCandLT[i].roundAffinePrecInternal2Amvr(pu.cu->imv); + affiAMVPInfo.mvCandRT[i].roundAffinePrecInternal2Amvr(pu.cu->imv); + affiAMVPInfo.mvCandLB[i].roundAffinePrecInternal2Amvr(pu.cu->imv); + } +} + +bool PU::addMVPCandUnscaled( const PredictionUnit &pu, const RefPicList &eRefPicList, const int &iRefIdx, const Position &pos, const MvpDir &eDir, AMVPInfo &info ) { CodingStructure &cs = *pu.cs; - const Slice &slice = *cs.slice; const PredictionUnit *neibPU = NULL; Position neibPos; - switch( eDir ) + switch (eDir) { case MD_LEFT: neibPos = pos.offset( -1, 0 ); @@ -3009,53 +2118,33 @@ bool PU::addMVPCandWithScaling( const PredictionUnit &pu, const RefPicList &eRef neibPU = cs.getPURestricted( neibPos, pu, pu.chType ); - if (neibPU == NULL || !CU::isInter(*neibPU->cu) || !CU::isInter(*pu.cu)) + if( neibPU == NULL || !CU::isInter( *neibPU->cu ) ) { return false; } const MotionInfo& neibMi = neibPU->getMotionInfo( neibPos ); + const int currRefPOC = cs.slice->getRefPic( eRefPicList, iRefIdx )->getPOC(); const RefPicList eRefPicList2nd = ( eRefPicList == REF_PIC_LIST_0 ) ? REF_PIC_LIST_1 : REF_PIC_LIST_0; - const int currPOC = slice.getPOC(); - const int currRefPOC = slice.getRefPic( eRefPicList, iRefIdx )->poc; - const bool bIsCurrRefLongTerm = slice.getRefPic( eRefPicList, iRefIdx )->longTerm; - const int neibPOC = currPOC; - for( int predictorSource = 0; predictorSource < 2; predictorSource++ ) // examine the indicated reference picture list, then if not available, examine the other list. { - const RefPicList eRefPicListIndex = (predictorSource == 0) ? eRefPicList : eRefPicList2nd; + const RefPicList eRefPicListIndex = ( predictorSource == 0 ) ? eRefPicList : eRefPicList2nd; const int neibRefIdx = neibMi.refIdx[eRefPicListIndex]; - if( neibRefIdx >= 0 ) - { - const bool bIsNeibRefLongTerm = slice.getRefPic(eRefPicListIndex, neibRefIdx)->longTerm; - - if (bIsCurrRefLongTerm == bIsNeibRefLongTerm) - { - Mv cMv = neibMi.mv[eRefPicListIndex]; - - if( !( bIsCurrRefLongTerm /* || bIsNeibRefLongTerm*/) ) - { - const int neibRefPOC = slice.getRefPOC( eRefPicListIndex, neibRefIdx ); - const int scale = xGetDistScaleFactor( currPOC, currRefPOC, neibPOC, neibRefPOC ); - - if( scale != 4096 ) - { - cMv = cMv.scaleMv( scale ); - } - } - info.mvCand[info.numCand++] = cMv; - return true; - } + if( neibRefIdx >= 0 && currRefPOC == cs.slice->getRefPOC( eRefPicListIndex, neibRefIdx ) ) + { + info.mvCand[info.numCand++] = neibMi.mv[eRefPicListIndex]; + return true; } } return false; } -void PU::addAMVPHMVPCand(const PredictionUnit &pu, const RefPicList eRefPicList, const RefPicList eRefPicList2nd, const int currRefPOC, AMVPInfo &info, uint8_t imv) + +void PU::addAMVPHMVPCand(const PredictionUnit &pu, const RefPicList eRefPicList, const int currRefPOC, AMVPInfo &info) { const Slice &slice = *(*pu.cs).slice; @@ -3063,6 +2152,7 @@ void PU::addAMVPHMVPCand(const PredictionUnit &pu, const RefPicList eRefPicList, auto &lut = CU::isIBC(*pu.cu) ? pu.cs->motionLut.lutIbc : pu.cs->motionLut.lut; int num_avai_candInLUT = (int) lut.size(); int num_allowedCand = std::min(MAX_NUM_HMVP_AVMPCANDS, num_avai_candInLUT); + const RefPicList eRefPicList2nd = (eRefPicList == REF_PIC_LIST_0) ? REF_PIC_LIST_1 : REF_PIC_LIST_0; for (int mrgIdx = 1; mrgIdx <= num_allowedCand; mrgIdx++) { @@ -3080,7 +2170,7 @@ void PU::addAMVPHMVPCand(const PredictionUnit &pu, const RefPicList eRefPicList, if (neibRefIdx >= 0 && (CU::isIBC(*pu.cu) || (currRefPOC == slice.getRefPOC(eRefPicListIndex, neibRefIdx)))) { Mv pmv = neibMi.mv[eRefPicListIndex]; - pmv.roundToAmvrSignalPrecision(MV_PRECISION_INTERNAL, pu.cu->imv); + pmv.roundTransPrecInternal2Amvr(pu.cu->imv); info.mvCand[info.numCand++] = pmv; if (info.numCand >= AMVP_MAX_NUM_CANDS) @@ -3098,16 +2188,21 @@ bool PU::isBipredRestriction(const PredictionUnit &pu) { return true; } + /* disable bi-prediction for 4x8/8x4 */ + if ( pu.cu->lumaSize().width + pu.cu->lumaSize().height == 12 ) + { + return true; + } return false; } -void PU::getAffineControlPointCand( const PredictionUnit &pu, MotionInfo mi[4], bool isAvailable[4], int verIdx[4], int modelIdx, int verNum, AffineMergeCtx& affMrgType ) +void PU::getAffineControlPointCand(const PredictionUnit &pu, MotionInfo mi[4], bool isAvailable[4], int verIdx[4], int8_t bcwIdx, int modelIdx, int verNum, AffineMergeCtx& affMrgType) { int cuW = pu.Y().width; int cuH = pu.Y().height; int vx, vy; int shift = MAX_CU_DEPTH; - int shiftHtoW = shift + g_aucLog2[cuW] - g_aucLog2[cuH]; + int shiftHtoW = shift + floorLog2(cuW) - floorLog2(cuH); // motion info Mv cMv[2][4]; @@ -3135,6 +2230,7 @@ void PU::getAffineControlPointCand( const PredictionUnit &pu, MotionInfo mi[4], } } } + } else if ( verNum == 3 ) { @@ -3156,6 +2252,7 @@ void PU::getAffineControlPointCand( const PredictionUnit &pu, MotionInfo mi[4], } } } + } if ( dir == 0 ) @@ -3182,16 +2279,19 @@ void PU::getAffineControlPointCand( const PredictionUnit &pu, MotionInfo mi[4], case 1: // 1 : LT, RT, RB cMv[l][2].hor = cMv[l][3].hor + cMv[l][0].hor - cMv[l][1].hor; cMv[l][2].ver = cMv[l][3].ver + cMv[l][0].ver - cMv[l][1].ver; + cMv[l][2].clipToStorageBitDepth(); break; case 2: // 2 : LT, LB, RB cMv[l][1].hor = cMv[l][3].hor + cMv[l][0].hor - cMv[l][2].hor; cMv[l][1].ver = cMv[l][3].ver + cMv[l][0].ver - cMv[l][2].ver; + cMv[l][1].clipToStorageBitDepth(); break; case 3: // 3 : RT, LB, RB cMv[l][0].hor = cMv[l][1].hor + cMv[l][2].hor - cMv[l][3].hor; cMv[l][0].ver = cMv[l][1].ver + cMv[l][2].ver - cMv[l][3].ver; + cMv[l][0].clipToStorageBitDepth(); break; case 4: // 4 : LT, RT @@ -3202,6 +2302,7 @@ void PU::getAffineControlPointCand( const PredictionUnit &pu, MotionInfo mi[4], vy = (cMv[l][0].ver << shift) - ((cMv[l][2].hor - cMv[l][0].hor) << shiftHtoW); roundAffineMv( vx, vy, shift ); cMv[l][1].set( vx, vy ); + cMv[l][1].clipToStorageBitDepth(); break; default: @@ -3229,6 +2330,7 @@ void PU::getAffineControlPointCand( const PredictionUnit &pu, MotionInfo mi[4], } affMrgType.interDirNeighbours[affMrgType.numValidMergeCand] = dir; affMrgType.affineType[affMrgType.numValidMergeCand] = curType; + affMrgType.BcwIdx[affMrgType.numValidMergeCand] = (dir == 3) ? bcwIdx : BCW_DEFAULT; affMrgType.numValidMergeCand++; @@ -3301,7 +2403,7 @@ void PU::getAffineMergeCand( const PredictionUnit &pu, AffineMergeCtx& affMrgCtx { const CodingStructure &cs = *pu.cs; const Slice &slice = *pu.cs->slice; - const uint32_t maxNumAffineMergeCand = slice.getMaxNumAffineMergeCand(); + const uint32_t maxNumAffineMergeCand = slice.getPicHeader()->getMaxNumAffineMergeCand(); for ( int i = 0; i < maxNumAffineMergeCand; i++ ) { @@ -3313,7 +2415,7 @@ void PU::getAffineMergeCand( const PredictionUnit &pu, AffineMergeCtx& affMrgCtx affMrgCtx.interDirNeighbours[i] = 0; affMrgCtx.affineType[i] = AFFINEMODEL_4PARAM; affMrgCtx.mergeType[i] = MRG_TYPE_DEFAULT_N; - affMrgCtx.GBiIdx[i] = GBI_DEFAULT; + affMrgCtx.BcwIdx[i] = BCW_DEFAULT; } affMrgCtx.numValidMergeCand = 0; @@ -3321,7 +2423,7 @@ void PU::getAffineMergeCand( const PredictionUnit &pu, AffineMergeCtx& affMrgCtx bool enableSubPuMvp = slice.getSPS()->getSBTMVPEnabledFlag() && !(slice.getPOC() == slice.getRefPic(REF_PIC_LIST_0, 0)->getPOC() && slice.isIRAP()); bool isAvailableSubPu = false; - if ( enableSubPuMvp && slice.getEnableTMVPFlag() ) + if ( enableSubPuMvp && slice.getPicHeader()->getEnableTMVPFlag() ) { MergeCtx mrgCtx = *affMrgCtx.mrgCtx; bool tmpLICFlag = false; @@ -3415,7 +2517,7 @@ void PU::getAffineMergeCand( const PredictionUnit &pu, AffineMergeCtx& affMrgCtx } affMrgCtx.interDirNeighbours[affMrgCtx.numValidMergeCand] = puNeigh->interDir; affMrgCtx.affineType[affMrgCtx.numValidMergeCand] = (EAffineModel)(puNeigh->cu->affineType); - affMrgCtx.GBiIdx[affMrgCtx.numValidMergeCand] = puNeigh->cu->GBiIdx; + affMrgCtx.BcwIdx[affMrgCtx.numValidMergeCand] = puNeigh->cu->BcwIdx; if ( affMrgCtx.numValidMergeCand == mrgCandIdx ) { @@ -3436,6 +2538,7 @@ void PU::getAffineMergeCand( const PredictionUnit &pu, AffineMergeCtx& affMrgCtx MotionInfo mi[4]; bool isAvailable[4] = { false }; + int8_t neighBcw[2] = { BCW_DEFAULT, BCW_DEFAULT }; // control point: LT B2->B3->A2 const Position posLT[3] = { pu.Y().topLeft().offset( -1, -1 ), pu.Y().topLeft().offset( 0, -1 ), pu.Y().topLeft().offset( -1, 0 ) }; for ( int i = 0; i < 3; i++ ) @@ -3448,6 +2551,7 @@ void PU::getAffineMergeCand( const PredictionUnit &pu, AffineMergeCtx& affMrgCtx { isAvailable[0] = true; mi[0] = puNeigh->getMotionInfo( pos ); + neighBcw[0] = puNeigh->cu->BcwIdx; break; } } @@ -3465,6 +2569,7 @@ void PU::getAffineMergeCand( const PredictionUnit &pu, AffineMergeCtx& affMrgCtx { isAvailable[1] = true; mi[1] = puNeigh->getMotionInfo( pos ); + neighBcw[1] = puNeigh->cu->BcwIdx; break; } } @@ -3487,7 +2592,7 @@ void PU::getAffineMergeCand( const PredictionUnit &pu, AffineMergeCtx& affMrgCtx } // control point: RB - if ( slice.getEnableTMVPFlag() ) + if ( slice.getPicHeader()->getEnableTMVPFlag() ) { //>> MTK colocated-RightBottom // offset the pos to be sure to "point" to the same position the uiAbsPartIdx would've pointed to @@ -3499,34 +2604,17 @@ void PU::getAffineMergeCand( const PredictionUnit &pu, AffineMergeCtx& affMrgCtx if ( ((posRB.x + pcv.minCUWidth) < pcv.lumaWidth) && ((posRB.y + pcv.minCUHeight) < pcv.lumaHeight) ) { - Position posInCtu( posRB.x & pcv.maxCUWidthMask, posRB.y & pcv.maxCUHeightMask ); - - if ( (posInCtu.x + 4 < pcv.maxCUWidth) && // is not at the last column of CTU - (posInCtu.y + 4 < pcv.maxCUHeight) ) // is not at the last row of CTU + int posYInCtu = posRB.y & pcv.maxCUHeightMask; + if (posYInCtu + 4 < pcv.maxCUHeight) { - posC0 = posRB.offset( 4, 4 ); + posC0 = posRB.offset(4, 4); C0Avail = true; } - else if ( posInCtu.x + 4 < pcv.maxCUWidth ) // is not at the last column of CTU But is last row of CTU - { - posC0 = posRB.offset( 4, 4 ); - // in the reference the CTU address is not set - thus probably resulting in no using this C0 possibility - } - else if ( posInCtu.y + 4 < pcv.maxCUHeight ) // is not at the last row of CTU But is last column of CTU - { - posC0 = posRB.offset( 4, 4 ); - C0Avail = true; - } - else //is the right bottom corner of CTU - { - posC0 = posRB.offset( 4, 4 ); - // same as for last column but not last row - } } Mv cColMv; int refIdx = 0; - bool bExistMV = C0Avail && getColocatedMVP( pu, REF_PIC_LIST_0, posC0, cColMv, refIdx ); + bool bExistMV = C0Avail && getColocatedMVP( pu, REF_PIC_LIST_0, posC0, cColMv, refIdx, false ); if ( bExistMV ) { mi[3].mv[0] = cColMv; @@ -3537,7 +2625,7 @@ void PU::getAffineMergeCand( const PredictionUnit &pu, AffineMergeCtx& affMrgCtx if ( slice.isInterB() ) { - bExistMV = C0Avail && getColocatedMVP( pu, REF_PIC_LIST_1, posC0, cColMv, refIdx ); + bExistMV = C0Avail && getColocatedMVP( pu, REF_PIC_LIST_1, posC0, cColMv, refIdx, false ); if ( bExistMV ) { mi[3].mv[1] = cColMv; @@ -3565,7 +2653,7 @@ void PU::getAffineMergeCand( const PredictionUnit &pu, AffineMergeCtx& affMrgCtx for ( int idx = startIdx; idx < modelNum; idx++ ) { int modelIdx = order[idx]; - getAffineControlPointCand( pu, mi, isAvailable, model[modelIdx], modelIdx, verNum[modelIdx], affMrgCtx ); + getAffineControlPointCand(pu, mi, isAvailable, model[modelIdx], ((modelIdx == 3) ? neighBcw[1] : neighBcw[0]), modelIdx, verNum[modelIdx], affMrgCtx); if ( affMrgCtx.numValidMergeCand != 0 && affMrgCtx.numValidMergeCand - 1 == mrgCandIdx ) { return; @@ -3625,24 +2713,27 @@ void PU::setAllAffineMvField( PredictionUnit &pu, MvField *mvField, RefPicList e pu.refIdx[eRefList] = mvField[0].refIdx; } -void PU::setAllAffineMv( PredictionUnit& pu, Mv affLT, Mv affRT, Mv affLB, RefPicList eRefList, bool setHighPrec) +void PU::setAllAffineMv(PredictionUnit& pu, Mv affLT, Mv affRT, Mv affLB, RefPicList eRefList, bool clipCPMVs) { int width = pu.Y().width; int shift = MAX_CU_DEPTH; - if (setHighPrec) + if (clipCPMVs) { - affLT.changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); - affRT.changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); - affLB.changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); + affLT.mvCliptoStorageBitDepth(); + affRT.mvCliptoStorageBitDepth(); + if (pu.cu->affineType == AFFINEMODEL_6PARAM) + { + affLB.mvCliptoStorageBitDepth(); + } } int deltaMvHorX, deltaMvHorY, deltaMvVerX, deltaMvVerY; - deltaMvHorX = (affRT - affLT).getHor() << (shift - g_aucLog2[width]); - deltaMvHorY = (affRT - affLT).getVer() << (shift - g_aucLog2[width]); + deltaMvHorX = (affRT - affLT).getHor() << (shift - floorLog2(width)); + deltaMvHorY = (affRT - affLT).getVer() << (shift - floorLog2(width)); int height = pu.Y().height; if ( pu.cu->affineType == AFFINEMODEL_6PARAM ) { - deltaMvVerX = (affLB - affLT).getHor() << (shift - g_aucLog2[height]); - deltaMvVerY = (affLB - affLT).getVer() << (shift - g_aucLog2[height]); + deltaMvVerX = (affLB - affLT).getHor() << (shift - floorLog2(height)); + deltaMvVerY = (affLB - affLT).getVer() << (shift - floorLog2(height)); } else { @@ -3660,12 +2751,22 @@ void PU::setAllAffineMv( PredictionUnit& pu, Mv affLT, Mv affRT, Mv affLB, RefPi MotionBuf mb = pu.getMotionBuf(); int mvScaleTmpHor, mvScaleTmpVer; + const bool subblkMVSpreadOverLimit = InterPrediction::isSubblockVectorSpreadOverLimit( deltaMvHorX, deltaMvHorY, deltaMvVerX, deltaMvVerY, pu.interDir ); for ( int h = 0; h < pu.Y().height; h += blockHeight ) { for ( int w = 0; w < pu.Y().width; w += blockWidth ) { - mvScaleTmpHor = mvScaleHor + deltaMvHorX * (halfBW + w) + deltaMvVerX * (halfBH + h); - mvScaleTmpVer = mvScaleVer + deltaMvHorY * (halfBW + w) + deltaMvVerY * (halfBH + h); + if ( !subblkMVSpreadOverLimit ) + { + mvScaleTmpHor = mvScaleHor + deltaMvHorX * (halfBW + w) + deltaMvVerX * (halfBH + h); + mvScaleTmpVer = mvScaleVer + deltaMvHorY * (halfBW + w) + deltaMvVerY * (halfBH + h); + + } + else + { + mvScaleTmpHor = mvScaleHor + deltaMvHorX * ( pu.Y().width >> 1 ) + deltaMvVerX * ( pu.Y().height >> 1 ); + mvScaleTmpVer = mvScaleVer + deltaMvHorY * ( pu.Y().width >> 1 ) + deltaMvVerY * ( pu.Y().height >> 1 ); + } roundAffineMv( mvScaleTmpHor, mvScaleTmpVer, shift ); Mv curMv(mvScaleTmpHor, mvScaleTmpVer); curMv.clipToStorageBitDepth(); @@ -3685,93 +2786,15 @@ void PU::setAllAffineMv( PredictionUnit& pu, Mv affLT, Mv affRT, Mv affLB, RefPi pu.mvAffi[eRefList][2] = affLB; } -static bool deriveScaledMotionTemporal( const Slice& slice, - const Position& colPos, - const Picture* pColPic, - const RefPicList eCurrRefPicList, - Mv& cColMv, - const RefPicList eFetchRefPicList) -{ - const MotionInfo &mi = pColPic->cs->getMotionInfo(colPos); - const Slice *pColSlice = nullptr; - - for (const auto &pSlice : pColPic->slices) - { - if (pSlice->getIndependentSliceIdx() == mi.sliceIdx) - { - pColSlice = pSlice; - break; - } - } - - CHECK(pColSlice == nullptr, "Couldn't find the colocated slice"); - - int iColPOC, iColRefPOC, iCurrPOC, iCurrRefPOC, iScale; - bool bAllowMirrorMV = true; - RefPicList eColRefPicList = slice.getCheckLDC() ? eCurrRefPicList : RefPicList(1 - eFetchRefPicList); - if (pColPic == slice.getRefPic(RefPicList(slice.isInterB() ? 1 - slice.getColFromL0Flag() : 0), slice.getColRefIdx())) - { - eColRefPicList = eCurrRefPicList; //67 -> disable, 64 -> enable - bAllowMirrorMV = false; - } - - // Although it might make sense to keep the unavailable motion field per direction still be unavailable, I made the MV prediction the same way as in TMVP - // So there is an interaction between MV0 and MV1 of the corresponding blocks identified by TV. - - // Grab motion and do necessary scaling.{{ - iCurrPOC = slice.getPOC(); - - int iColRefIdx = mi.refIdx[eColRefPicList]; - - if (iColRefIdx < 0 && (slice.getCheckLDC() || bAllowMirrorMV)) - { - eColRefPicList = RefPicList(1 - eColRefPicList); - iColRefIdx = mi.refIdx[eColRefPicList]; - - if (iColRefIdx < 0) - { - return false; - } - } - - if (iColRefIdx >= 0 && slice.getNumRefIdx(eCurrRefPicList) > 0) - { - iColPOC = pColSlice->getPOC(); - iColRefPOC = pColSlice->getRefPOC(eColRefPicList, iColRefIdx); - if (iColPOC == iColRefPOC) - return false; - /////////////////////////////////////////////////////////////// - // Set the target reference index to 0, may be changed later // - /////////////////////////////////////////////////////////////// - iCurrRefPOC = slice.getRefPic(eCurrRefPicList, 0)->getPOC(); - // Scale the vector. - cColMv = mi.mv[eColRefPicList]; - cColMv.setHor(roundMvComp(cColMv.getHor())); - cColMv.setVer(roundMvComp(cColMv.getVer())); - //pcMvFieldSP[2*iPartition + eCurrRefPicList].getMv(); - // Assume always short-term for now - iScale = xGetDistScaleFactor(iCurrPOC, iCurrRefPOC, iColPOC, iColRefPOC); - - if (iScale != 4096) - { - - cColMv = cColMv.scaleMv(iScale); - } - - return true; - } - return false; -} - void clipColPos(int& posX, int& posY, const PredictionUnit& pu) { Position puPos = pu.lumaPos(); - int log2CtuSize = g_aucLog2[pu.cs->sps->getCTUSize()]; + int log2CtuSize = floorLog2(pu.cs->sps->getCTUSize()); int ctuX = ((puPos.x >> log2CtuSize) << log2CtuSize); int ctuY = ((puPos.y >> log2CtuSize) << log2CtuSize); - int horMax = std::min((int)pu.cs->sps->getPicWidthInLumaSamples() - 1, ctuX + (int)pu.cs->sps->getCTUSize() + 3); + int horMax = std::min( (int)pu.cs->pps->getPicWidthInLumaSamples() - 1, ctuX + (int)pu.cs->sps->getCTUSize() + 3 ); int horMin = std::max((int)0, ctuX); - int verMax = std::min((int)pu.cs->sps->getPicHeightInLumaSamples() - 1, ctuY + (int)pu.cs->sps->getCTUSize() - 1); + int verMax = std::min( (int)pu.cs->pps->getPicHeightInLumaSamples() - 1, ctuY + (int)pu.cs->sps->getCTUSize() - 1 ); int verMin = std::max((int)0, ctuY); posX = std::min(horMax, std::max(horMin, posX)); @@ -3788,29 +2811,22 @@ bool PU::getInterMergeSubPuMvpCand(const PredictionUnit &pu, MergeCtx& mrgCtx, b const Picture *pColPic = slice.getRefPic(RefPicList(slice.isInterB() ? 1 - slice.getColFromL0Flag() : 0), slice.getColRefIdx()); Mv cTMv; - RefPicList fetchRefPicList = RefPicList(slice.isInterB() ? 1 - slice.getColFromL0Flag() : 0); - bool terminate = false; - for (unsigned currRefListId = 0; currRefListId < (slice.getSliceType() == B_SLICE ? 2 : 1) && !terminate; currRefListId++) + if ( count ) { - if ( count ) + if ( (mrgCtx.interDirNeighbours[0] & (1 << REF_PIC_LIST_0)) && slice.getRefPic( REF_PIC_LIST_0, mrgCtx.mvFieldNeighbours[REF_PIC_LIST_0].refIdx ) == pColPic ) { - RefPicList currRefPicList = RefPicList(slice.getCheckLDC() ? (slice.getColFromL0Flag() ? currRefListId : 1 - currRefListId) : currRefListId); - - if ((mrgCtx.interDirNeighbours[0] & (1 << currRefPicList)) && slice.getRefPic(currRefPicList, mrgCtx.mvFieldNeighbours[0 * 2 + currRefPicList].refIdx) == pColPic) - { - cTMv = mrgCtx.mvFieldNeighbours[0 * 2 + currRefPicList].mv; - terminate = true; - fetchRefPicList = currRefPicList; - break; - } + cTMv = mrgCtx.mvFieldNeighbours[REF_PIC_LIST_0].mv; + } + else if ( slice.isInterB() && (mrgCtx.interDirNeighbours[0] & (1 << REF_PIC_LIST_1)) && slice.getRefPic( REF_PIC_LIST_1, mrgCtx.mvFieldNeighbours[REF_PIC_LIST_1].refIdx ) == pColPic ) + { + cTMv = mrgCtx.mvFieldNeighbours[REF_PIC_LIST_1].mv; } } /////////////////////////////////////////////////////////////////////// //////// GET Initial Temporal Vector //////// /////////////////////////////////////////////////////////////////////// - int mvPrec = MV_FRACTIONAL_BITS_INTERNAL; Mv cTempVector = cTMv; bool tempLICFlag = false; @@ -3824,6 +2840,7 @@ bool PU::getInterMergeSubPuMvpCand(const PredictionUnit &pu, MergeCtx& mrgCtx, b int puWidth = numPartLine == 1 ? puSize.width : 1 << ATMVP_SUB_BLOCK_SIZE; Mv cColMv; + int refIdx = 0; // use coldir. bool bBSlice = slice.isInterB(); @@ -3831,8 +2848,10 @@ bool PU::getInterMergeSubPuMvpCand(const PredictionUnit &pu, MergeCtx& mrgCtx, b bool found = false; cTempVector = cTMv; - int tempX = cTempVector.getHor() >> mvPrec; - int tempY = cTempVector.getVer() >> mvPrec; + + cTempVector.changePrecision(MV_PRECISION_SIXTEENTH, MV_PRECISION_INT); + int tempX = cTempVector.getHor(); + int tempY = cTempVector.getVer(); centerPos.x = puPos.x + (puSize.width >> 1) + tempX; centerPos.y = puPos.y + (puSize.height >> 1) + tempY; @@ -3852,13 +2871,13 @@ bool PU::getInterMergeSubPuMvpCand(const PredictionUnit &pu, MergeCtx& mrgCtx, b { RefPicList currRefPicList = RefPicList(currRefListId); - if (deriveScaledMotionTemporal(slice, centerPos, pColPic, currRefPicList, cColMv, fetchRefPicList)) + if (getColocatedMVP(pu, currRefPicList, centerPos, cColMv, refIdx, true)) { // set as default, for further motion vector field spanning mrgCtx.mvFieldNeighbours[(count << 1) + currRefListId].setMvField(cColMv, 0); mrgCtx.interDirNeighbours[count] |= (1 << currRefListId); LICFlag = tempLICFlag; - mrgCtx.GBiIdx[count] = GBI_DEFAULT; + mrgCtx.BcwIdx[count] = BCW_DEFAULT; found = true; } else @@ -3905,7 +2924,7 @@ bool PU::getInterMergeSubPuMvpCand(const PredictionUnit &pu, MergeCtx& mrgCtx, b for (unsigned currRefListId = 0; currRefListId < (bBSlice ? 2 : 1); currRefListId++) { RefPicList currRefPicList = RefPicList(currRefListId); - if (deriveScaledMotionTemporal(slice, colPos, pColPic, currRefPicList, cColMv, fetchRefPicList)) + if (getColocatedMVP(pu, currRefPicList, colPos, cColMv, refIdx, true)) { mi.refIdx[currRefListId] = 0; mi.mv[currRefListId] = cColMv; @@ -3935,7 +2954,7 @@ bool PU::getInterMergeSubPuMvpCand(const PredictionUnit &pu, MergeCtx& mrgCtx, b } } return true; - } +} void PU::spanMotionInfo( PredictionUnit &pu, const MergeCtx &mrgCtx ) { @@ -3954,6 +2973,7 @@ void PU::spanMotionInfo( PredictionUnit &pu, const MergeCtx &mrgCtx ) if( mi.isInter ) { mi.interDir = pu.interDir; + mi.useAltHpelIf = pu.cu->imv == IMV_HPEL; for( int i = 0; i < NUM_REF_PIC_LIST_01; i++ ) { @@ -4026,469 +3046,147 @@ void PU::applyImv( PredictionUnit& pu, MergeCtx &mrgCtx, InterPrediction *interP { if( pu.interDir != 2 /* PRED_L1 */ ) { - pu.mvd[0].changePrecisionAmvr( pu.cu->imv, MV_PRECISION_QUARTER); - unsigned mvp_idx = pu.mvpIdx[0]; - AMVPInfo amvpInfo; - if (CU::isIBC(*pu.cu)) - { - PU::fillIBCMvpCand(pu, amvpInfo); - } - else - PU::fillMvpCand(pu, REF_PIC_LIST_0, pu.refIdx[0], amvpInfo); - pu.mvpNum[0] = amvpInfo.numCand; - pu.mvpIdx[0] = mvp_idx; - pu.mv [0] = amvpInfo.mvCand[mvp_idx] + pu.mvd[0]; - pu.mv[0].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); - } - - if (pu.interDir != 1 /* PRED_L0 */) - { - if( !( pu.cu->cs->slice->getMvdL1ZeroFlag() && pu.interDir == 3 ) && pu.cu->imv )/* PRED_BI */ - { - pu.mvd[1].changePrecisionAmvr(pu.cu->imv, MV_PRECISION_QUARTER); - } - unsigned mvp_idx = pu.mvpIdx[1]; - AMVPInfo amvpInfo; - PU::fillMvpCand(pu, REF_PIC_LIST_1, pu.refIdx[1], amvpInfo); - pu.mvpNum[1] = amvpInfo.numCand; - pu.mvpIdx[1] = mvp_idx; - pu.mv [1] = amvpInfo.mvCand[mvp_idx] + pu.mvd[1]; - pu.mv[1].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); - } - } - else - { - // this function is never called for merge - THROW("unexpected"); - PU::getInterMergeCandidates ( pu, mrgCtx - , 0 - ); - - mrgCtx.setMergeInfo( pu, pu.mergeIdx ); - } - - PU::spanMotionInfo( pu, mrgCtx ); -} - -bool PU::isBiPredFromDifferentDir( const PredictionUnit& pu ) -{ - if ( pu.refIdx[0] >= 0 && pu.refIdx[1] >= 0 ) - { - const int iPOC0 = pu.cu->slice->getRefPOC( REF_PIC_LIST_0, pu.refIdx[0] ); - const int iPOC1 = pu.cu->slice->getRefPOC( REF_PIC_LIST_1, pu.refIdx[1] ); - const int iPOC = pu.cu->slice->getPOC(); - if ( (iPOC - iPOC0)*(iPOC - iPOC1) < 0 ) - { - return true; - } - } - - return false; -} -bool PU::isBiPredFromDifferentDirEqDistPoc(const PredictionUnit& pu) -{ - if (pu.refIdx[0] >= 0 && pu.refIdx[1] >= 0) - { - const int poc0 = pu.cu->slice->getRefPOC(REF_PIC_LIST_0, pu.refIdx[0]); - const int poc1 = pu.cu->slice->getRefPOC(REF_PIC_LIST_1, pu.refIdx[1]); - const int poc = pu.cu->slice->getPOC(); - if ((poc - poc0)*(poc - poc1) < 0) - { - if (abs(poc - poc0) == abs(poc - poc1)) - { - return true; - } - } - } - return false; -} -void PU::restrictBiPredMergeCands( const PredictionUnit &pu, MergeCtx& mergeCtx ) -{ - if( PU::isBipredRestriction( pu ) ) - { - for( uint32_t mergeCand = 0; mergeCand < mergeCtx.numValidMergeCand; ++mergeCand ) - { - if( mergeCtx.interDirNeighbours[ mergeCand ] == 3 ) - { - mergeCtx.interDirNeighbours[ mergeCand ] = 1; - mergeCtx.mvFieldNeighbours[( mergeCand << 1 ) + 1].setMvField( Mv( 0, 0 ), -1 ); - mergeCtx.GBiIdx[mergeCand] = GBI_DEFAULT; - } - } - } -} - -void PU::restrictBiPredMergeCandsOne(PredictionUnit &pu) -{ - if (PU::isBipredRestriction(pu)) - { - if (pu.interDir == 3) - { - pu.interDir = 1; - pu.refIdx[1] = -1; - pu.mv[1] = Mv(0, 0); - pu.cu->GBiIdx = GBI_DEFAULT; - } - } -} - -void PU::getTriangleMergeCandidates( const PredictionUnit &pu, MergeCtx& triangleMrgCtx ) -{ - const CodingStructure &cs = *pu.cs; - const Slice &slice = *pu.cs->slice; - const int32_t maxNumMergeCand = TRIANGLE_MAX_NUM_UNI_CANDS; - triangleMrgCtx.numValidMergeCand = 0; - - for( int32_t i = 0; i < maxNumMergeCand; i++ ) - { - triangleMrgCtx.interDirNeighbours[i] = 0; - triangleMrgCtx.mrgTypeNeighbours [i] = MRG_TYPE_DEFAULT_N; - triangleMrgCtx.mvFieldNeighbours[(i << 1) ].refIdx = NOT_VALID; - triangleMrgCtx.mvFieldNeighbours[(i << 1) + 1].refIdx = NOT_VALID; - triangleMrgCtx.mvFieldNeighbours[(i << 1) ].mv = Mv(); - triangleMrgCtx.mvFieldNeighbours[(i << 1) + 1].mv = Mv(); - } - - MotionInfo candidate[TRIANGLE_MAX_NUM_CANDS_MEM]; - int32_t candCount = 0; - - const Position posLT = pu.Y().topLeft(); - const Position posRT = pu.Y().topRight(); - const Position posLB = pu.Y().bottomLeft(); - - MotionInfo miAbove, miLeft, miAboveLeft, miAboveRight, miBelowLeft; - - //left - const PredictionUnit* puLeft = cs.getPURestricted( posLB.offset( -1, 0 ), pu, pu.chType ); - const bool isAvailableA1 = puLeft && isDiffMER( pu, *puLeft ) && pu.cu != puLeft->cu && CU::isInter( *puLeft->cu ) - ; - if( isAvailableA1 ) - { - miLeft = puLeft->getMotionInfo( posLB.offset(-1, 0) ); - candidate[candCount].isInter = true; - candidate[candCount].interDir = miLeft.interDir; - candidate[candCount].mv[0] = miLeft.mv[0]; - candidate[candCount].mv[1] = miLeft.mv[1]; - candidate[candCount].refIdx[0] = miLeft.refIdx[0]; - candidate[candCount].refIdx[1] = miLeft.refIdx[1]; - candCount++; - } - - // above - const PredictionUnit *puAbove = cs.getPURestricted( posRT.offset( 0, -1 ), pu, pu.chType ); - bool isAvailableB1 = puAbove && isDiffMER( pu, *puAbove ) && pu.cu != puAbove->cu && CU::isInter( *puAbove->cu ) - ; - if( isAvailableB1 ) - { - miAbove = puAbove->getMotionInfo( posRT.offset( 0, -1 ) ); - - if( !isAvailableA1 || ( miAbove != miLeft ) ) - { - candidate[candCount].isInter = true; - candidate[candCount].interDir = miAbove.interDir; - candidate[candCount].mv[0] = miAbove.mv[0]; - candidate[candCount].mv[1] = miAbove.mv[1]; - candidate[candCount].refIdx[0] = miAbove.refIdx[0]; - candidate[candCount].refIdx[1] = miAbove.refIdx[1]; - candCount++; - } - } - - // above right - const PredictionUnit *puAboveRight = cs.getPURestricted( posRT.offset( 1, -1 ), pu, pu.chType ); - bool isAvailableB0 = puAboveRight && isDiffMER( pu, *puAboveRight ) && CU::isInter( *puAboveRight->cu ) - ; - - if( isAvailableB0 ) - { - miAboveRight = puAboveRight->getMotionInfo( posRT.offset( 1, -1 ) ); - - if( ( !isAvailableB1 || ( miAbove != miAboveRight ) ) && ( !isAvailableA1 || ( miLeft != miAboveRight ) ) ) - { - candidate[candCount].isInter = true; - candidate[candCount].interDir = miAboveRight.interDir; - candidate[candCount].mv[0] = miAboveRight.mv[0]; - candidate[candCount].mv[1] = miAboveRight.mv[1]; - candidate[candCount].refIdx[0] = miAboveRight.refIdx[0]; - candidate[candCount].refIdx[1] = miAboveRight.refIdx[1]; - candCount++; - } - } - - //left bottom - const PredictionUnit *puLeftBottom = cs.getPURestricted( posLB.offset( -1, 1 ), pu, pu.chType ); - bool isAvailableA0 = puLeftBottom && isDiffMER( pu, *puLeftBottom ) && CU::isInter( *puLeftBottom->cu ) - ; - if( isAvailableA0 ) - { - miBelowLeft = puLeftBottom->getMotionInfo( posLB.offset( -1, 1 ) ); - - if( ( !isAvailableA1 || ( miBelowLeft != miLeft ) ) && ( !isAvailableB1 || ( miBelowLeft != miAbove ) ) && ( !isAvailableB0 || ( miBelowLeft != miAboveRight ) ) ) - { - candidate[candCount].isInter = true; - candidate[candCount].interDir = miBelowLeft.interDir; - candidate[candCount].mv[0] = miBelowLeft.mv[0]; - candidate[candCount].mv[1] = miBelowLeft.mv[1]; - candidate[candCount].refIdx[0] = miBelowLeft.refIdx[0]; - candidate[candCount].refIdx[1] = miBelowLeft.refIdx[1]; - candCount++; - } - } - - // above left - const PredictionUnit *puAboveLeft = cs.getPURestricted( posLT.offset( -1, -1 ), pu, pu.chType ); - bool isAvailableB2 = puAboveLeft && isDiffMER( pu, *puAboveLeft ) && CU::isInter( *puAboveLeft->cu ) - ; - - if( isAvailableB2 ) - { - miAboveLeft = puAboveLeft->getMotionInfo( posLT.offset( -1, -1 ) ); - - if( ( !isAvailableA1 || ( miLeft != miAboveLeft ) ) && ( !isAvailableB1 || ( miAbove != miAboveLeft ) ) && ( !isAvailableA0 || ( miBelowLeft != miAboveLeft ) ) && ( !isAvailableB0 || ( miAboveRight != miAboveLeft ) ) ) - { - candidate[candCount].isInter = true; - candidate[candCount].interDir = miAboveLeft.interDir; - candidate[candCount].mv[0] = miAboveLeft.mv[0]; - candidate[candCount].mv[1] = miAboveLeft.mv[1]; - candidate[candCount].refIdx[0] = miAboveLeft.refIdx[0]; - candidate[candCount].refIdx[1] = miAboveLeft.refIdx[1]; - candCount++; - } - } - - if( slice.getEnableTMVPFlag() ) - { - Position posRB = pu.Y().bottomRight().offset(-3, -3); - - const PreCalcValues& pcv = *cs.pcv; - - Position posC0; - Position posC1 = pu.Y().center(); - bool isAvailableC0 = false; - bool isAvailableC1 = (posC1.x < pcv.lumaWidth) && (posC1.y < pcv.lumaHeight); - - if (((posRB.x + pcv.minCUWidth) < pcv.lumaWidth) && ((posRB.y + pcv.minCUHeight) < pcv.lumaHeight)) - { - Position posInCtu( posRB.x & pcv.maxCUWidthMask, posRB.y & pcv.maxCUHeightMask ); - - if( ( posInCtu.x + 4 < pcv.maxCUWidth ) && // is not at the last column of CTU - ( posInCtu.y + 4 < pcv.maxCUHeight ) ) // is not at the last row of CTU - { - posC0 = posRB.offset( 4, 4 ); - isAvailableC0 = true; - } - else if( posInCtu.x + 4 < pcv.maxCUWidth ) // is not at the last column of CTU But is last row of CTU - { - posC0 = posRB.offset( 4, 4 ); - // in the reference the CTU address is not set - thus probably resulting in no using this C0 possibility - } - else if( posInCtu.y + 4 < pcv.maxCUHeight ) // is not at the last row of CTU But is last column of CTU - { - posC0 = posRB.offset( 4, 4 ); - isAvailableC0 = true; - } - else //is the right bottom corner of CTU + pu.mvd[0].changeTransPrecAmvr2Internal(pu.cu->imv); + unsigned mvp_idx = pu.mvpIdx[0]; + AMVPInfo amvpInfo; + if (CU::isIBC(*pu.cu)) { - posC0 = posRB.offset( 4, 4 ); - // same as for last column but not last row + PU::fillIBCMvpCand(pu, amvpInfo); } + else + PU::fillMvpCand(pu, REF_PIC_LIST_0, pu.refIdx[0], amvpInfo); + pu.mvpNum[0] = amvpInfo.numCand; + pu.mvpIdx[0] = mvp_idx; + pu.mv [0] = amvpInfo.mvCand[mvp_idx] + pu.mvd[0]; + pu.mv[0].mvCliptoStorageBitDepth(); } - // C0 - Mv cColMv; - int32_t refIdx = 0; - bool existMV = ( isAvailableC0 && getColocatedMVP( pu, REF_PIC_LIST_0, posC0, cColMv, refIdx ) ); - MotionInfo temporalMv; - temporalMv.interDir = 0; - if( existMV ) - { - temporalMv.isInter = true; - temporalMv.interDir |= 1; - temporalMv.mv[0] = cColMv; - temporalMv.refIdx[0] = refIdx; - } - existMV = ( isAvailableC0 && getColocatedMVP( pu, REF_PIC_LIST_1, posC0, cColMv, refIdx ) ); - if( existMV ) + if (pu.interDir != 1 /* PRED_L0 */) { - temporalMv.interDir |= 2; - temporalMv.mv[1] = cColMv; - temporalMv.refIdx[1] = refIdx; + if( !( pu.cu->cs->picHeader->getMvdL1ZeroFlag() && pu.interDir == 3 ) && pu.cu->imv )/* PRED_BI */ + { + pu.mvd[1].changeTransPrecAmvr2Internal(pu.cu->imv); + } + unsigned mvp_idx = pu.mvpIdx[1]; + AMVPInfo amvpInfo; + PU::fillMvpCand(pu, REF_PIC_LIST_1, pu.refIdx[1], amvpInfo); + pu.mvpNum[1] = amvpInfo.numCand; + pu.mvpIdx[1] = mvp_idx; + pu.mv [1] = amvpInfo.mvCand[mvp_idx] + pu.mvd[1]; + pu.mv[1].mvCliptoStorageBitDepth(); } + } + else + { + // this function is never called for merge + THROW("unexpected"); + PU::getInterMergeCandidates ( pu, mrgCtx + , 0 + ); - if( temporalMv.interDir != 0 ) - { - candidate[candCount].isInter = true; - candidate[candCount].interDir = temporalMv.interDir; - candidate[candCount].mv[0] = temporalMv.mv[0]; - candidate[candCount].mv[1] = temporalMv.mv[1]; - candidate[candCount].refIdx[0] = temporalMv.refIdx[0]; - candidate[candCount].refIdx[1] = temporalMv.refIdx[1]; - candCount++; - } + mrgCtx.setMergeInfo( pu, pu.mergeIdx ); + } + + PU::spanMotionInfo( pu, mrgCtx ); +} - // C1 - temporalMv.interDir = 0; - existMV = isAvailableC1 && getColocatedMVP(pu, REF_PIC_LIST_0, posC1, cColMv, refIdx ); - if( existMV ) - { - temporalMv.isInter = true; - temporalMv.interDir |= 1; - temporalMv.mv[0] = cColMv; - temporalMv.refIdx[0] = refIdx; - } - existMV = isAvailableC1 && getColocatedMVP(pu, REF_PIC_LIST_1, posC1, cColMv, refIdx ); - if( existMV ) - { - temporalMv.interDir |= 2; - temporalMv.mv[1] = cColMv; - temporalMv.refIdx[1] = refIdx; - } - if( temporalMv.interDir != 0 ) +bool PU::isBiPredFromDifferentDirEqDistPoc(const PredictionUnit& pu) +{ + if (pu.refIdx[0] >= 0 && pu.refIdx[1] >= 0) + { + if (pu.cu->slice->getRefPic(REF_PIC_LIST_0, pu.refIdx[0])->longTerm + || pu.cu->slice->getRefPic(REF_PIC_LIST_1, pu.refIdx[1])->longTerm) { - candidate[candCount].isInter = true; - candidate[candCount].interDir = temporalMv.interDir; - candidate[candCount].mv[0] = temporalMv.mv[0]; - candidate[candCount].mv[1] = temporalMv.mv[1]; - candidate[candCount].refIdx[0] = temporalMv.refIdx[0]; - candidate[candCount].refIdx[1] = temporalMv.refIdx[1]; - candCount++; + return false; } - } - // put uni-prediction candidate to the triangle candidate list - for( int32_t i = 0; i < candCount; i++ ) - { - if( candidate[i].interDir != 3 ) + const int poc0 = pu.cu->slice->getRefPOC(REF_PIC_LIST_0, pu.refIdx[0]); + const int poc1 = pu.cu->slice->getRefPOC(REF_PIC_LIST_1, pu.refIdx[1]); + const int poc = pu.cu->slice->getPOC(); + if ((poc - poc0)*(poc - poc1) < 0) { - triangleMrgCtx.interDirNeighbours[triangleMrgCtx.numValidMergeCand] = candidate[i].interDir; - triangleMrgCtx.mrgTypeNeighbours [triangleMrgCtx.numValidMergeCand] = MRG_TYPE_DEFAULT_N; - triangleMrgCtx.mvFieldNeighbours [(triangleMrgCtx.numValidMergeCand << 1) ].mv = candidate[i].mv[0]; - triangleMrgCtx.mvFieldNeighbours [(triangleMrgCtx.numValidMergeCand << 1) + 1].mv = candidate[i].mv[1]; - triangleMrgCtx.mvFieldNeighbours [(triangleMrgCtx.numValidMergeCand << 1) ].refIdx = candidate[i].refIdx[0]; - triangleMrgCtx.mvFieldNeighbours [(triangleMrgCtx.numValidMergeCand << 1) + 1].refIdx = candidate[i].refIdx[1]; - triangleMrgCtx.numValidMergeCand += isUniqueTriangleCandidates(pu, triangleMrgCtx); - if( triangleMrgCtx.numValidMergeCand == TRIANGLE_MAX_NUM_UNI_CANDS ) + if (abs(poc - poc0) == abs(poc - poc1)) { - return; + return true; } } } + return false; +} - // put L0 mv of bi-prediction candidate to the triangle candidate list - for( int32_t i = 0; i < candCount; i++ ) +void PU::restrictBiPredMergeCandsOne(PredictionUnit &pu) +{ + if (PU::isBipredRestriction(pu)) { - if( candidate[i].interDir == 3 ) + if (pu.interDir == 3) { - triangleMrgCtx.interDirNeighbours[triangleMrgCtx.numValidMergeCand] = 1; - triangleMrgCtx.mrgTypeNeighbours [triangleMrgCtx.numValidMergeCand] = MRG_TYPE_DEFAULT_N; - triangleMrgCtx.mvFieldNeighbours [(triangleMrgCtx.numValidMergeCand << 1) ].mv = candidate[i].mv[0]; - triangleMrgCtx.mvFieldNeighbours [(triangleMrgCtx.numValidMergeCand << 1) + 1].mv = Mv(0, 0); - triangleMrgCtx.mvFieldNeighbours [(triangleMrgCtx.numValidMergeCand << 1) ].refIdx = candidate[i].refIdx[0]; - triangleMrgCtx.mvFieldNeighbours [(triangleMrgCtx.numValidMergeCand << 1) + 1].refIdx = -1; - triangleMrgCtx.numValidMergeCand += isUniqueTriangleCandidates(pu, triangleMrgCtx); - if( triangleMrgCtx.numValidMergeCand == TRIANGLE_MAX_NUM_UNI_CANDS ) - { - return; - } + pu.interDir = 1; + pu.refIdx[1] = -1; + pu.mv[1] = Mv(0, 0); + pu.cu->BcwIdx = BCW_DEFAULT; } } +} + +void PU::getTriangleMergeCandidates( const PredictionUnit &pu, MergeCtx& triangleMrgCtx ) +{ + MergeCtx tmpMergeCtx; + + const Slice &slice = *pu.cs->slice; + const uint32_t maxNumMergeCand = slice.getPicHeader()->getMaxNumMergeCand(); - // put L1 mv of bi-prediction candidate to the triangle candidate list - for( int32_t i = 0; i < candCount; i++ ) + triangleMrgCtx.numValidMergeCand = 0; + + for (int32_t i = 0; i < TRIANGLE_MAX_NUM_UNI_CANDS; i++) { - if( candidate[i].interDir == 3 ) - { - triangleMrgCtx.interDirNeighbours[triangleMrgCtx.numValidMergeCand] = 2; - triangleMrgCtx.mrgTypeNeighbours [triangleMrgCtx.numValidMergeCand] = MRG_TYPE_DEFAULT_N; - triangleMrgCtx.mvFieldNeighbours [(triangleMrgCtx.numValidMergeCand << 1) ].mv = Mv(0, 0); - triangleMrgCtx.mvFieldNeighbours [(triangleMrgCtx.numValidMergeCand << 1) + 1].mv = candidate[i].mv[1]; - triangleMrgCtx.mvFieldNeighbours [(triangleMrgCtx.numValidMergeCand << 1) ].refIdx = -1; - triangleMrgCtx.mvFieldNeighbours [(triangleMrgCtx.numValidMergeCand << 1) + 1].refIdx = candidate[i].refIdx[1]; - triangleMrgCtx.numValidMergeCand += isUniqueTriangleCandidates(pu, triangleMrgCtx); - if( triangleMrgCtx.numValidMergeCand == TRIANGLE_MAX_NUM_UNI_CANDS ) - { - return; - } - } + triangleMrgCtx.BcwIdx[i] = BCW_DEFAULT; + triangleMrgCtx.interDirNeighbours[i] = 0; + triangleMrgCtx.mrgTypeNeighbours[i] = MRG_TYPE_DEFAULT_N; + triangleMrgCtx.mvFieldNeighbours[(i << 1)].refIdx = NOT_VALID; + triangleMrgCtx.mvFieldNeighbours[(i << 1) + 1].refIdx = NOT_VALID; + triangleMrgCtx.mvFieldNeighbours[(i << 1)].mv = Mv(); + triangleMrgCtx.mvFieldNeighbours[(i << 1) + 1].mv = Mv(); + triangleMrgCtx.useAltHpelIf[i] = false; } - // put average of L0 and L1 mvs of bi-prediction candidate to the triangle candidate list - for( int32_t i = 0; i < candCount; i++ ) + PU::getInterMergeCandidates(pu, tmpMergeCtx, 0); + + for (int32_t i = 0; i < maxNumMergeCand; i++) { - if( candidate[i].interDir == 3 ) + int parity = i & 1; + if (tmpMergeCtx.interDirNeighbours[i] & (0x01 + parity)) { - int32_t curPicPoc = slice.getPOC(); - int32_t refPicPocL0 = slice.getRefPOC(REF_PIC_LIST_0, candidate[i].refIdx[0]); - int32_t refPicPocL1 = slice.getRefPOC(REF_PIC_LIST_1, candidate[i].refIdx[1]); - Mv aveMv = candidate[i].mv[1]; - int32_t distscale = xGetDistScaleFactor( curPicPoc, refPicPocL0, curPicPoc, refPicPocL1 ); - if( distscale != 4096 ) - { - aveMv = aveMv.scaleMv( distscale ); // scaling to L0 - } - aveMv = aveMv + candidate[i].mv[0]; - roundAffineMv(aveMv.hor, aveMv.ver, 1); - triangleMrgCtx.interDirNeighbours[triangleMrgCtx.numValidMergeCand] = 1; - triangleMrgCtx.mrgTypeNeighbours [triangleMrgCtx.numValidMergeCand] = MRG_TYPE_DEFAULT_N; - triangleMrgCtx.mvFieldNeighbours [(triangleMrgCtx.numValidMergeCand << 1) ].mv = aveMv; - triangleMrgCtx.mvFieldNeighbours [(triangleMrgCtx.numValidMergeCand << 1) + 1].mv = Mv(0, 0); - triangleMrgCtx.mvFieldNeighbours [(triangleMrgCtx.numValidMergeCand << 1) ].refIdx = candidate[i].refIdx[0]; - triangleMrgCtx.mvFieldNeighbours [(triangleMrgCtx.numValidMergeCand << 1) + 1].refIdx = -1; - triangleMrgCtx.numValidMergeCand += isUniqueTriangleCandidates(pu, triangleMrgCtx); - if( triangleMrgCtx.numValidMergeCand == TRIANGLE_MAX_NUM_UNI_CANDS ) + triangleMrgCtx.interDirNeighbours[triangleMrgCtx.numValidMergeCand] = 1 + parity; + triangleMrgCtx.mrgTypeNeighbours[triangleMrgCtx.numValidMergeCand] = MRG_TYPE_DEFAULT_N; + triangleMrgCtx.mvFieldNeighbours[(triangleMrgCtx.numValidMergeCand << 1) + !parity].mv = Mv(0, 0); + triangleMrgCtx.mvFieldNeighbours[(triangleMrgCtx.numValidMergeCand << 1) + parity].mv = tmpMergeCtx.mvFieldNeighbours[(i << 1) + parity].mv; + triangleMrgCtx.mvFieldNeighbours[(triangleMrgCtx.numValidMergeCand << 1) + !parity].refIdx = -1; + triangleMrgCtx.mvFieldNeighbours[(triangleMrgCtx.numValidMergeCand << 1) + parity].refIdx = tmpMergeCtx.mvFieldNeighbours[(i << 1) + parity].refIdx; + triangleMrgCtx.numValidMergeCand++; + if (triangleMrgCtx.numValidMergeCand == TRIANGLE_MAX_NUM_UNI_CANDS) { return; } + continue; } - } - // fill with Mv(0, 0) - int32_t numRefIdx = std::min( slice.getNumRefIdx(REF_PIC_LIST_0), slice.getNumRefIdx(REF_PIC_LIST_1) ); - int32_t cnt = 0; - while( triangleMrgCtx.numValidMergeCand < TRIANGLE_MAX_NUM_UNI_CANDS ) - { - if( cnt < numRefIdx ) + if (tmpMergeCtx.interDirNeighbours[i] & (0x02 - parity)) { - triangleMrgCtx.interDirNeighbours[triangleMrgCtx.numValidMergeCand] = 1; - triangleMrgCtx.mvFieldNeighbours[triangleMrgCtx.numValidMergeCand << 1].setMvField(Mv(0, 0), cnt); - triangleMrgCtx.mvFieldNeighbours[(triangleMrgCtx.numValidMergeCand << 1) + 1].refIdx = NOT_VALID; - triangleMrgCtx.mvFieldNeighbours[(triangleMrgCtx.numValidMergeCand << 1) + 1].mv = Mv(); + triangleMrgCtx.interDirNeighbours[triangleMrgCtx.numValidMergeCand] = 2 - parity; + triangleMrgCtx.mrgTypeNeighbours[triangleMrgCtx.numValidMergeCand] = MRG_TYPE_DEFAULT_N; + triangleMrgCtx.mvFieldNeighbours[(triangleMrgCtx.numValidMergeCand << 1) + !parity].mv = tmpMergeCtx.mvFieldNeighbours[(i << 1) + !parity].mv; + triangleMrgCtx.mvFieldNeighbours[(triangleMrgCtx.numValidMergeCand << 1) + parity].mv = Mv(0, 0); + triangleMrgCtx.mvFieldNeighbours[(triangleMrgCtx.numValidMergeCand << 1) + !parity].refIdx = tmpMergeCtx.mvFieldNeighbours[(i << 1) + !parity].refIdx; + triangleMrgCtx.mvFieldNeighbours[(triangleMrgCtx.numValidMergeCand << 1) + parity].refIdx = -1; triangleMrgCtx.numValidMergeCand++; - - if( triangleMrgCtx.numValidMergeCand == TRIANGLE_MAX_NUM_UNI_CANDS ) + if (triangleMrgCtx.numValidMergeCand == TRIANGLE_MAX_NUM_UNI_CANDS) { return; } - - triangleMrgCtx.interDirNeighbours[triangleMrgCtx.numValidMergeCand] = 2; - triangleMrgCtx.mvFieldNeighbours [(triangleMrgCtx.numValidMergeCand << 1) + 1 ].setMvField(Mv(0, 0), cnt); - triangleMrgCtx.mvFieldNeighbours[triangleMrgCtx.numValidMergeCand << 1].refIdx = NOT_VALID; - triangleMrgCtx.mvFieldNeighbours[triangleMrgCtx.numValidMergeCand << 1].mv = Mv(); - triangleMrgCtx.numValidMergeCand++; - - cnt = (cnt + 1) % numRefIdx; - } - } -} - -bool PU::isUniqueTriangleCandidates( const PredictionUnit &pu, MergeCtx& triangleMrgCtx ) -{ - int newCand = triangleMrgCtx.numValidMergeCand; - for( int32_t i = 0; i < newCand; i++ ) - { - int32_t predFlagCur = triangleMrgCtx.interDirNeighbours[i] == 1 ? 0 : 1; - int32_t predFlagNew = triangleMrgCtx.interDirNeighbours[newCand] == 1 ? 0 : 1; - int32_t refPicPocCur = pu.cs->slice->getRefPOC( (RefPicList)predFlagCur, triangleMrgCtx.mvFieldNeighbours[(i << 1) + predFlagCur].refIdx ); - int32_t refPicPocNew = pu.cs->slice->getRefPOC( (RefPicList)predFlagNew, triangleMrgCtx.mvFieldNeighbours[(newCand << 1) + predFlagNew].refIdx); - if( refPicPocCur == refPicPocNew && triangleMrgCtx.mvFieldNeighbours[(i << 1) + predFlagCur].mv == triangleMrgCtx.mvFieldNeighbours[(newCand << 1) + predFlagNew].mv ) - { - return false; } } - return true; } - void PU::spanTriangleMotionInfo( PredictionUnit &pu, MergeCtx &triangleMrgCtx, const bool splitDir, const uint8_t candIdx0, const uint8_t candIdx1 ) { pu.triangleSplitDir = splitDir; @@ -4518,49 +3216,23 @@ void PU::spanTriangleMotionInfo( PredictionUnit &pu, MergeCtx &triangleMrgCtx, c } else if( triangleMrgCtx.interDirNeighbours[candIdx0] == 1 && triangleMrgCtx.interDirNeighbours[candIdx1] == 1 ) { - int32_t refIdx = mappingRefPic( pu, pu.cs->slice->getRefPOC( REF_PIC_LIST_0, triangleMrgCtx.mvFieldNeighbours[candIdx1 << 1].refIdx ), REF_PIC_LIST_1 ); - if( refIdx != -1 ) - { - biMv.interDir = 3; - biMv.mv[0] = triangleMrgCtx.mvFieldNeighbours[candIdx0 << 1].mv; - biMv.mv[1] = triangleMrgCtx.mvFieldNeighbours[candIdx1 << 1].mv; - biMv.refIdx[0] = triangleMrgCtx.mvFieldNeighbours[candIdx0 << 1].refIdx; - biMv.refIdx[1] = refIdx; - } - else - { - refIdx = mappingRefPic( pu, pu.cs->slice->getRefPOC( REF_PIC_LIST_0, triangleMrgCtx.mvFieldNeighbours[candIdx0 << 1].refIdx), REF_PIC_LIST_1 ); - biMv.interDir = ( refIdx != -1 ) ? 3 : 1; - biMv.mv[0] = ( refIdx != -1 ) ? triangleMrgCtx.mvFieldNeighbours[candIdx1 << 1].mv : triangleMrgCtx.mvFieldNeighbours[candIdx0 << 1].mv; - biMv.mv[1] = ( refIdx != -1 ) ? triangleMrgCtx.mvFieldNeighbours[candIdx0 << 1].mv : Mv(0, 0); - biMv.refIdx[0] = ( refIdx != -1 ) ? triangleMrgCtx.mvFieldNeighbours[candIdx1 << 1].refIdx : triangleMrgCtx.mvFieldNeighbours[candIdx0 << 1].refIdx; - biMv.refIdx[1] = ( refIdx != -1 ) ? refIdx : -1; - } + biMv.interDir = 1; + biMv.mv[0] = triangleMrgCtx.mvFieldNeighbours[candIdx1 << 1].mv; + biMv.mv[1] = Mv(0, 0); + biMv.refIdx[0] = triangleMrgCtx.mvFieldNeighbours[candIdx1 << 1].refIdx; + biMv.refIdx[1] = -1; } else if( triangleMrgCtx.interDirNeighbours[candIdx0] == 2 && triangleMrgCtx.interDirNeighbours[candIdx1] == 2 ) { - int32_t refIdx = mappingRefPic( pu, pu.cs->slice->getRefPOC( REF_PIC_LIST_1, triangleMrgCtx.mvFieldNeighbours[(candIdx1 << 1) + 1].refIdx ), REF_PIC_LIST_0 ); - if( refIdx != -1 ) - { - biMv.interDir = 3; - biMv.mv[0] = triangleMrgCtx.mvFieldNeighbours[(candIdx1 << 1) + 1].mv; - biMv.mv[1] = triangleMrgCtx.mvFieldNeighbours[(candIdx0 << 1) + 1].mv; - biMv.refIdx[0] = refIdx; - biMv.refIdx[1] = triangleMrgCtx.mvFieldNeighbours[(candIdx0 << 1) + 1].refIdx; - } - else - { - refIdx = mappingRefPic( pu, pu.cs->slice->getRefPOC( REF_PIC_LIST_1, triangleMrgCtx.mvFieldNeighbours[(candIdx0 << 1) + 1].refIdx ), REF_PIC_LIST_0 ); - biMv.interDir = ( refIdx != -1 ) ? 3 : 2; - biMv.mv[0] = ( refIdx != -1 ) ? triangleMrgCtx.mvFieldNeighbours[(candIdx0 << 1) + 1].mv : Mv(0, 0); - biMv.mv[1] = ( refIdx != -1 ) ? triangleMrgCtx.mvFieldNeighbours[(candIdx1 << 1) + 1].mv : triangleMrgCtx.mvFieldNeighbours[(candIdx0 << 1) + 1].mv; - biMv.refIdx[0] = ( refIdx != -1 ) ? refIdx : -1; - biMv.refIdx[1] = ( refIdx != -1 ) ? triangleMrgCtx.mvFieldNeighbours[(candIdx1 << 1) + 1].refIdx : triangleMrgCtx.mvFieldNeighbours[(candIdx0 << 1) + 1].refIdx; - } + biMv.interDir = 2; + biMv.mv[0] = Mv(0, 0); + biMv.mv[1] = triangleMrgCtx.mvFieldNeighbours[(candIdx1 << 1) + 1].mv; + biMv.refIdx[0] = -1; + biMv.refIdx[1] = triangleMrgCtx.mvFieldNeighbours[(candIdx1 << 1) + 1].refIdx; } - int32_t idxW = (int32_t)(g_aucLog2[pu.lwidth() ] - MIN_CU_LOG2); - int32_t idxH = (int32_t)(g_aucLog2[pu.lheight()] - MIN_CU_LOG2); + int32_t idxW = (int32_t)(floorLog2(pu.lwidth() ) - MIN_CU_LOG2); + int32_t idxH = (int32_t)(floorLog2(pu.lheight()) - MIN_CU_LOG2); for( int32_t y = 0; y < mb.height; y++ ) { for( int32_t x = 0; x < mb.width; x++ ) @@ -4613,69 +3285,6 @@ int32_t PU::mappingRefPic( const PredictionUnit &pu, int32_t refPicPoc, bool tar return -1; } -void CU::resetMVDandMV2Int( CodingUnit& cu, InterPrediction *interPred ) -{ - for( auto &pu : CU::traversePUs( cu ) ) - { - MergeCtx mrgCtx; - - if( !pu.mergeFlag ) - { - if( pu.interDir != 2 /* PRED_L1 */ ) - { - Mv mv = pu.mv[0]; - Mv mvPred; - AMVPInfo amvpInfo; - if (CU::isIBC(*pu.cu)) - PU::fillIBCMvpCand(pu, amvpInfo); - else - PU::fillMvpCand(pu, REF_PIC_LIST_0, pu.refIdx[0], amvpInfo); - pu.mvpNum[0] = amvpInfo.numCand; - - mvPred = amvpInfo.mvCand[pu.mvpIdx[0]]; - mv.roundToAmvrSignalPrecision(MV_PRECISION_QUARTER, cu.imv); - pu.mv[0] = mv; - Mv mvDiff = mv - mvPred; - pu.mvd[0] = mvDiff; - } - if( pu.interDir != 1 /* PRED_L0 */ ) - { - Mv mv = pu.mv[1]; - Mv mvPred; - AMVPInfo amvpInfo; - PU::fillMvpCand(pu, REF_PIC_LIST_1, pu.refIdx[1], amvpInfo); - pu.mvpNum[1] = amvpInfo.numCand; - - mvPred = amvpInfo.mvCand[pu.mvpIdx[1]]; - mv.roundToAmvrSignalPrecision(MV_PRECISION_QUARTER, cu.imv); - Mv mvDiff = mv - mvPred; - - if( pu.cu->cs->slice->getMvdL1ZeroFlag() && pu.interDir == 3 /* PRED_BI */ ) - { - pu.mvd[1] = Mv(); - mv = mvPred; - } - else - { - pu.mvd[1] = mvDiff; - } - pu.mv[1] = mv; - } - - } - else - { - PU::getInterMergeCandidates ( pu, mrgCtx - , 0 - ); - - mrgCtx.setMergeInfo( pu, pu.mergeIdx ); - } - - PU::spanMotionInfo( pu, mrgCtx ); - } -} - bool CU::hasSubCUNonZeroMVd( const CodingUnit& cu ) { bool bNonZeroMvd = false; @@ -4691,7 +3300,7 @@ bool CU::hasSubCUNonZeroMVd( const CodingUnit& cu ) } if( pu.interDir != 1 /* PRED_L0 */ ) { - if( !pu.cu->cs->slice->getMvdL1ZeroFlag() || pu.interDir != 3 /* PRED_BI */ ) + if( !pu.cu->cs->picHeader->getMvdL1ZeroFlag() || pu.interDir != 3 /* PRED_BI */ ) { bNonZeroMvd |= pu.mvd[REF_PIC_LIST_1].getHor() != 0; bNonZeroMvd |= pu.mvd[REF_PIC_LIST_1].getVer() != 0; @@ -4727,7 +3336,7 @@ bool CU::hasSubCUNonZeroAffineMVd( const CodingUnit& cu ) if ( pu.interDir != 1 /* PRED_L0 */ ) { - if ( !pu.cu->cs->slice->getMvdL1ZeroFlag() || pu.interDir != 3 /* PRED_BI */ ) + if ( !pu.cu->cs->picHeader->getMvdL1ZeroFlag() || pu.interDir != 3 /* PRED_BI */ ) { for ( int i = 0; i < ( cu.affineType == AFFINEMODEL_6PARAM ? 3 : 2 ); i++ ) { @@ -4742,30 +3351,6 @@ bool CU::hasSubCUNonZeroAffineMVd( const CodingUnit& cu ) return nonZeroAffineMvd; } -int CU::getMaxNeighboriMVCandNum( const CodingStructure& cs, const Position& pos ) -{ - const int numDefault = 0; - int maxImvNumCand = 0; - - // Get BCBP of left PU -#if HEVC_TILES_WPP - const CodingUnit *cuLeft = cs.getCURestricted( pos.offset( -1, 0 ), cs.slice->getIndependentSliceIdx(), cs.picture->tileMap->getTileIdxMap( pos ), CH_L ); -#else - const CodingUnit *cuLeft = cs.getCURestricted( pos.offset( -1, 0 ), cs.slice->getIndependentSliceIdx(), CH_L ); -#endif - maxImvNumCand = ( cuLeft ) ? cuLeft->imvNumCand : numDefault; - - // Get BCBP of above PU -#if HEVC_TILES_WPP - const CodingUnit *cuAbove = cs.getCURestricted( pos.offset( 0, -1 ), cs.slice->getIndependentSliceIdx(), cs.picture->tileMap->getTileIdxMap( pos ), CH_L ); -#else - const CodingUnit *cuAbove = cs.getCURestricted( pos.offset( 0, -1 ), cs.slice->getIndependentSliceIdx(), CH_L ); -#endif - maxImvNumCand = std::max( maxImvNumCand, ( cuAbove ) ? cuAbove->imvNumCand : numDefault ); - - return maxImvNumCand; -} - uint8_t CU::getSbtInfo( uint8_t idx, uint8_t pos ) { return ( pos << 4 ) + ( idx << 0 ); @@ -4856,11 +3441,6 @@ uint8_t CU::numSbtModeRdo( uint8_t sbtAllowed ) return sum; } -bool CU::isMtsMode( const uint8_t sbtInfo ) -{ - return getSbtIdx( sbtInfo ) == SBT_OFF_MTS; -} - bool CU::isSbtMode( const uint8_t sbtInfo ) { uint8_t sbtIdx = getSbtIdx( sbtInfo ); @@ -4879,11 +3459,34 @@ bool CU::isSameSbtSize( const uint8_t sbtInfo1, const uint8_t sbtInfo2 ) return false; } -bool CU::isGBiIdxCoded( const CodingUnit &cu ) +bool CU::isPredRegDiffFromTB(const CodingUnit &cu, const ComponentID compID) +{ + return (compID == COMPONENT_Y) + && (cu.ispMode == VER_INTRA_SUBPARTITIONS && + CU::isMinWidthPredEnabledForBlkSize(cu.blocks[compID].width, cu.blocks[compID].height) + ); +} + +bool CU::isMinWidthPredEnabledForBlkSize(const int w, const int h) +{ + return ((w == 8 && h > 4) || w == 4); +} + +bool CU::isFirstTBInPredReg(const CodingUnit& cu, const ComponentID compID, const CompArea &area) +{ + return (compID == COMPONENT_Y) && cu.ispMode && ((area.topLeft().x - cu.Y().topLeft().x) % PRED_REG_MIN_WIDTH == 0); +} + +void CU::adjustPredArea(CompArea &area) +{ + area.width = std::max<int>(PRED_REG_MIN_WIDTH, area.width); +} + +bool CU::isBcwIdxCoded( const CodingUnit &cu ) { - if( cu.cs->sps->getUseGBi() == false ) + if( cu.cs->sps->getUseBcw() == false ) { - CHECK(cu.GBiIdx != GBI_DEFAULT, "Error: cu.GBiIdx != GBI_DEFAULT"); + CHECK(cu.BcwIdx != BCW_DEFAULT, "Error: cu.BcwIdx != BCW_DEFAULT"); return false; } @@ -4897,7 +3500,7 @@ bool CU::isGBiIdxCoded( const CodingUnit &cu ) return false; } - if( cu.lwidth() * cu.lheight() < GBI_SIZE_CONSTRAINT ) + if( cu.lwidth() * cu.lheight() < BCW_SIZE_CONSTRAINT ) { return false; } @@ -4906,19 +3509,18 @@ bool CU::isGBiIdxCoded( const CodingUnit &cu ) { if( cu.firstPU->interDir == 3 ) { - WPScalingParam *wp0; - WPScalingParam *wp1; - int refIdx0 = cu.firstPU->refIdx[REF_PIC_LIST_0]; - int refIdx1 = cu.firstPU->refIdx[REF_PIC_LIST_1]; - - cu.cs->slice->getWpScaling(REF_PIC_LIST_0, refIdx0, wp0); - cu.cs->slice->getWpScaling(REF_PIC_LIST_1, refIdx1, wp1); - if ((wp0[COMPONENT_Y].bPresentFlag || wp0[COMPONENT_Cb].bPresentFlag || wp0[COMPONENT_Cr].bPresentFlag - || wp1[COMPONENT_Y].bPresentFlag || wp1[COMPONENT_Cb].bPresentFlag || wp1[COMPONENT_Cr].bPresentFlag) - ) - { - return false; - } + WPScalingParam *wp0; + WPScalingParam *wp1; + int refIdx0 = cu.firstPU->refIdx[REF_PIC_LIST_0]; + int refIdx1 = cu.firstPU->refIdx[REF_PIC_LIST_1]; + + cu.cs->slice->getWpScaling(REF_PIC_LIST_0, refIdx0, wp0); + cu.cs->slice->getWpScaling(REF_PIC_LIST_1, refIdx1, wp1); + if ((wp0[COMPONENT_Y].bPresentFlag || wp0[COMPONENT_Cb].bPresentFlag || wp0[COMPONENT_Cr].bPresentFlag + || wp1[COMPONENT_Y].bPresentFlag || wp1[COMPONENT_Cb].bPresentFlag || wp1[COMPONENT_Cr].bPresentFlag)) + { + return false; + } return true; } } @@ -4926,11 +3528,11 @@ bool CU::isGBiIdxCoded( const CodingUnit &cu ) return false; } -uint8_t CU::getValidGbiIdx( const CodingUnit &cu ) +uint8_t CU::getValidBcwIdx( const CodingUnit &cu ) { if( cu.firstPU->interDir == 3 && !cu.firstPU->mergeFlag ) { - return cu.GBiIdx; + return cu.BcwIdx; } else if( cu.firstPU->interDir == 3 && cu.firstPU->mergeFlag && cu.firstPU->mergeType == MRG_TYPE_DEFAULT_N ) { @@ -4938,23 +3540,23 @@ uint8_t CU::getValidGbiIdx( const CodingUnit &cu ) } else if( cu.firstPU->mergeFlag && cu.firstPU->mergeType == MRG_TYPE_SUBPU_ATMVP ) { - CHECK(cu.GBiIdx != GBI_DEFAULT, " cu.GBiIdx != GBI_DEFAULT "); + CHECK(cu.BcwIdx != BCW_DEFAULT, " cu.BcwIdx != BCW_DEFAULT "); } else { - CHECK(cu.GBiIdx != GBI_DEFAULT, " cu.GBiIdx != GBI_DEFAULT "); + CHECK(cu.BcwIdx != BCW_DEFAULT, " cu.BcwIdx != BCW_DEFAULT "); } - return GBI_DEFAULT; + return BCW_DEFAULT; } -void CU::setGbiIdx( CodingUnit &cu, uint8_t uh ) +void CU::setBcwIdx( CodingUnit &cu, uint8_t uh ) { int8_t uhCnt = 0; if( cu.firstPU->interDir == 3 && !cu.firstPU->mergeFlag ) { - cu.GBiIdx = uh; + cu.BcwIdx = uh; ++uhCnt; } else if( cu.firstPU->interDir == 3 && cu.firstPU->mergeFlag && cu.firstPU->mergeType == MRG_TYPE_DEFAULT_N ) @@ -4963,44 +3565,74 @@ void CU::setGbiIdx( CodingUnit &cu, uint8_t uh ) } else if( cu.firstPU->mergeFlag && cu.firstPU->mergeType == MRG_TYPE_SUBPU_ATMVP ) { - cu.GBiIdx = GBI_DEFAULT; + cu.BcwIdx = BCW_DEFAULT; } else { - cu.GBiIdx = GBI_DEFAULT; + cu.BcwIdx = BCW_DEFAULT; } CHECK(uhCnt <= 0, " uhCnt <= 0 "); } -uint8_t CU::deriveGbiIdx( uint8_t gbiLO, uint8_t gbiL1 ) +uint8_t CU::deriveBcwIdx( uint8_t bcwLO, uint8_t bcwL1 ) { - if( gbiLO == gbiL1 ) + if( bcwLO == bcwL1 ) { - return gbiLO; + return bcwLO; } - const int8_t w0 = getGbiWeight(gbiLO, REF_PIC_LIST_0); - const int8_t w1 = getGbiWeight(gbiL1, REF_PIC_LIST_1); - const int8_t th = g_GbiWeightBase >> 1; + const int8_t w0 = getBcwWeight(bcwLO, REF_PIC_LIST_0); + const int8_t w1 = getBcwWeight(bcwL1, REF_PIC_LIST_1); + const int8_t th = g_BcwWeightBase >> 1; const int8_t off = 1; if( w0 == w1 || (w0 < (th - off) && w1 < (th - off)) || (w0 >(th + off) && w1 >(th + off)) ) { - return GBI_DEFAULT; + return BCW_DEFAULT; } else { if( w0 > w1 ) { - return ( w0 >= th ? gbiLO : gbiL1 ); + return ( w0 >= th ? bcwLO : bcwL1 ); } else { - return ( w1 >= th ? gbiL1 : gbiLO ); + return ( w1 >= th ? bcwL1 : bcwLO ); } } } +bool CU::bdpcmAllowed( const CodingUnit& cu, const ComponentID compID ) +{ + SizeType transformSkipMaxSize = 1 << cu.cs->pps->getLog2MaxTransformSkipBlockSize(); + + bool bdpcmAllowed = cu.cs->sps->getBDPCMEnabled(); + bdpcmAllowed &= (isLuma(compID) || cu.cs->sps->getBDPCMEnabled() == BDPCM_LUMACHROMA); + bdpcmAllowed &= CU::isIntra( cu ); + if (isLuma(compID)) + bdpcmAllowed &= (cu.lwidth() <= transformSkipMaxSize && cu.lheight() <= transformSkipMaxSize); + else + bdpcmAllowed &= (cu.chromaSize().width <= transformSkipMaxSize && cu.chromaSize().height <= transformSkipMaxSize); + return bdpcmAllowed; +} + +bool CU::isMTSAllowed(const CodingUnit &cu, const ComponentID compID) +{ + SizeType tsMaxSize = 1 << cu.cs->pps->getLog2MaxTransformSkipBlockSize(); + const int maxSize = CU::isIntra( cu ) ? MTS_INTRA_MAX_CU_SIZE : MTS_INTER_MAX_CU_SIZE; + const int cuWidth = cu.blocks[0].lumaSize().width; + const int cuHeight = cu.blocks[0].lumaSize().height; + bool mtsAllowed = cu.chType == CHANNEL_TYPE_LUMA && compID == COMPONENT_Y; + + mtsAllowed &= CU::isIntra( cu ) ? cu.cs->sps->getUseIntraMTS() : cu.cs->sps->getUseInterMTS() && CU::isInter( cu ); + mtsAllowed &= cuWidth <= maxSize && cuHeight <= maxSize; + mtsAllowed &= !cu.ispMode; + mtsAllowed &= !cu.sbtInfo; + mtsAllowed &= !(cu.bdpcmMode && cuWidth <= tsMaxSize && cuHeight <= tsMaxSize); + return mtsAllowed; +} + // TU tools bool TU::isNonTransformedResidualRotated(const TransformUnit &tu, const ComponentID &compID) @@ -5015,6 +3647,8 @@ bool TU::getCbf( const TransformUnit &tu, const ComponentID &compID ) bool TU::getCbfAtDepth(const TransformUnit &tu, const ComponentID &compID, const unsigned &depth) { + if( !tu.blocks[compID].valid() ) + CHECK( tu.cbf[compID] != 0, "cbf must be 0 if the component is not available" ); return ((tu.cbf[compID] >> depth) & 1) == 1; } @@ -5028,156 +3662,47 @@ void TU::setCbfAtDepth(TransformUnit &tu, const ComponentID &compID, const unsig bool TU::isTSAllowed(const TransformUnit &tu, const ComponentID compID) { - bool tsAllowed = compID == COMPONENT_Y; - const int maxSize = tu.cs->pps->getPpsRangeExtension().getLog2MaxTransformSkipBlockSize(); + const int maxSize = tu.cs->pps->getLog2MaxTransformSkipBlockSize(); - tsAllowed &= tu.cs->pps->getUseTransformSkip(); - tsAllowed &= !tu.cu->transQuantBypass; + bool tsAllowed = tu.cs->sps->getTransformSkipEnabledFlag(); tsAllowed &= ( !tu.cu->ispMode || !isLuma(compID) ); - SizeType transformSkipMaxSize = 1 << maxSize; - tsAllowed &= tu.lwidth() <= transformSkipMaxSize && tu.lheight() <= transformSkipMaxSize; + tsAllowed &= !(tu.cu->bdpcmMode && isLuma(compID)); + tsAllowed &= !(tu.cu->bdpcmModeChroma && isChroma(compID)); + tsAllowed &= tu.blocks[compID].width <= transformSkipMaxSize && tu.blocks[compID].height <= transformSkipMaxSize; tsAllowed &= !tu.cu->sbtInfo; return tsAllowed; } -bool TU::isMTSAllowed(const TransformUnit &tu, const ComponentID compID) -{ - bool mtsAllowed = compID == COMPONENT_Y; - const int maxSize = CU::isIntra( *tu.cu ) ? MTS_INTRA_MAX_CU_SIZE : MTS_INTER_MAX_CU_SIZE; - - mtsAllowed &= CU::isIntra( *tu.cu ) ? tu.cs->sps->getUseIntraMTS() : tu.cs->sps->getUseInterMTS() && CU::isInter( *tu.cu ); - mtsAllowed &= ( tu.lwidth() <= maxSize && tu.lheight() <= maxSize ); - mtsAllowed &= !tu.cu->ispMode; - mtsAllowed &= !tu.cu->sbtInfo; - return mtsAllowed; -} - -uint32_t TU::getGolombRiceStatisticsIndex(const TransformUnit &tu, const ComponentID &compID) -{ - const bool transformSkip = tu.mtsIdx==1; - const bool transquantBypass = tu.cu->transQuantBypass; - - //-------- - - const uint32_t channelTypeOffset = isChroma(compID) ? 2 : 0; - const uint32_t nonTransformedOffset = (transformSkip || transquantBypass) ? 1 : 0; - - //-------- - - const uint32_t selectedIndex = channelTypeOffset + nonTransformedOffset; - CHECK( selectedIndex >= RExt__GOLOMB_RICE_ADAPTATION_STATISTICS_SETS, "Invalid golomb rice adaptation statistics set" ); - return selectedIndex; -} - -#if HEVC_USE_MDCS -uint32_t TU::getCoefScanIdx(const TransformUnit &tu, const ComponentID &compID) +int TU::getICTMode( const TransformUnit& tu, int jointCbCr ) { - //------------------------------------------------ - - //this mechanism is available for intra only - - if( !CU::isIntra( *tu.cu ) ) + if( jointCbCr < 0 ) { - return SCAN_DIAG; - } - - //------------------------------------------------ - - //check that MDCS can be used for this TU - - - const CompArea &area = tu.blocks[compID]; - const SPS &sps = *tu.cs->sps; - const ChromaFormat format = sps.getChromaFormatIdc(); - - - const uint32_t maximumWidth = MDCS_MAXIMUM_WIDTH >> getComponentScaleX(compID, format); - const uint32_t maximumHeight = MDCS_MAXIMUM_HEIGHT >> getComponentScaleY(compID, format); - - if ((area.width > maximumWidth) || (area.height > maximumHeight)) - { - return SCAN_DIAG; - } - - //------------------------------------------------ - - //otherwise, select the appropriate mode - - const PredictionUnit &pu = *tu.cs->getPU( area.pos(), toChannelType( compID ) ); - - uint32_t uiDirMode = PU::getFinalIntraMode(pu, toChannelType(compID)); - - //------------------ - - if (abs((int) uiDirMode - VER_IDX) <= MDCS_ANGLE_LIMIT) - { - return SCAN_HOR; - } - else if (abs((int) uiDirMode - HOR_IDX) <= MDCS_ANGLE_LIMIT) - { - return SCAN_VER; - } - else - { - return SCAN_DIAG; + jointCbCr = tu.jointCbCr; } + return g_ictModes[ tu.cs->picHeader->getJointCbCrSignFlag() ][ jointCbCr ]; } -#endif bool TU::hasCrossCompPredInfo( const TransformUnit &tu, const ComponentID &compID ) { return (isChroma(compID) && tu.cs->pps->getPpsRangeExtension().getCrossComponentPredictionEnabledFlag() && TU::getCbf(tu, COMPONENT_Y) && (!CU::isIntra(*tu.cu) || PU::isChromaIntraModeCrossCheckMode(*tu.cs->getPU(tu.blocks[compID].pos(), toChannelType(compID))))); } -uint32_t TU::getNumNonZeroCoeffsNonTS( const TransformUnit& tu, const bool bLuma, const bool bChroma ) -{ - uint32_t count = 0; - for( uint32_t i = 0; i < ::getNumberValidTBlocks( *tu.cs->pcv ); i++ ) - { - if( tu.blocks[i].valid() && ( isLuma(ComponentID(i)) ? tu.mtsIdx !=1 : true ) && TU::getCbf( tu, ComponentID( i ) ) ) - { - if( isLuma ( tu.blocks[i].compID ) && !bLuma ) continue; - if( isChroma( tu.blocks[i].compID ) && !bChroma ) continue; - - uint32_t area = tu.blocks[i].area(); - const TCoeff* coeff = tu.getCoeffs( ComponentID( i ) ).buf; - for( uint32_t j = 0; j < area; j++ ) - { - count += coeff[j] != 0; - } - } - } - return count; -} bool TU::needsSqrt2Scale( const TransformUnit &tu, const ComponentID &compID ) { const Size &size=tu.blocks[compID]; - const bool isTransformSkip = tu.mtsIdx==1 && isLuma(compID); - return (!isTransformSkip) && (((g_aucLog2[size.width] + g_aucLog2[size.height]) & 1) == 1); + const bool isTransformSkip = (tu.mtsIdx[compID] == MTS_SKIP); + return (!isTransformSkip) && (((floorLog2(size.width) + floorLog2(size.height)) & 1) == 1); } -#if HM_QTBT_AS_IN_JEM_QUANT - bool TU::needsBlockSizeTrafoScale( const TransformUnit &tu, const ComponentID &compID ) { return needsSqrt2Scale( tu, compID ) || isNonLog2BlockSize( tu.blocks[compID] ); } -#else -bool TU::needsQP3Offset(const TransformUnit &tu, const ComponentID &compID) -{ - if( !tu.transformSkip[compID] ) - { - return ( ( ( g_aucLog2[tu.blocks[compID].width] + g_aucLog2[tu.blocks[compID].height] ) & 1 ) == 1 ); - } - return false; -} -#endif - TransformUnit* TU::getPrevTU( const TransformUnit &tu, const ComponentID compID ) { @@ -5197,44 +3722,50 @@ bool TU::getPrevTuCbfAtDepth( const TransformUnit ¤tTu, const ComponentID return ( prevTU != nullptr ) ? TU::getCbfAtDepth( *prevTU, compID, trDepth ) : false; } -void TU::getTransformTypeISP( const TransformUnit &tu, const ComponentID compID, int &typeH, int &typeV ) + +// other tools + +uint32_t getCtuAddr( const Position& pos, const PreCalcValues& pcv ) { - typeH = DCT2, typeV = DCT2; - const int uiChFinalMode = PU::getFinalIntraMode( *tu.cu->firstPU, toChannelType( compID ) ); - bool intraModeIsEven = uiChFinalMode % 2 == 0; + return ( pos.x >> pcv.maxCUWidthLog2 ) + ( pos.y >> pcv.maxCUHeightLog2 ) * pcv.widthInCtus; +} - if( uiChFinalMode == DC_IDX || uiChFinalMode == 33 || uiChFinalMode == 35 ) +int getNumModesMip(const Size& block) +{ + switch( getMipSizeId(block) ) { - typeH = DCT2; - typeV = typeH; + case 0: return 16; + case 1: return 8; + case 2: return 6; + default: THROW( "Invalid mipSizeId" ); } - else if( uiChFinalMode == PLANAR_IDX || ( uiChFinalMode >= 31 && uiChFinalMode <= 37 ) ) +} + + +int getMipSizeId(const Size& block) +{ + if( block.width == 4 && block.height == 4 ) { - typeH = DST7; - typeV = typeH; + return 0; } - else if( ( intraModeIsEven && uiChFinalMode >= 2 && uiChFinalMode <= 30 ) || ( !intraModeIsEven && uiChFinalMode >= 39 && uiChFinalMode <= 65 ) ) + else if( block.width == 4 || block.height == 4 || (block.width == 8 && block.height == 8) ) { - typeH = DST7; - typeV = DCT2; + return 1; } - else if( ( !intraModeIsEven && uiChFinalMode >= 3 && uiChFinalMode <= 29 ) || ( intraModeIsEven && uiChFinalMode >= 38 && uiChFinalMode <= 66 ) ) + else { - typeH = DCT2; - typeV = DST7; + return 2; } - //Size restriction for non-DCT-II transforms - Area tuArea = tu.blocks[compID]; - typeH = tuArea.width <= 2 || tuArea.width >= 32 ? DCT2 : typeH; - typeV = tuArea.height <= 2 || tuArea.height >= 32 ? DCT2 : typeV; -} - -// other tools +} -uint32_t getCtuAddr( const Position& pos, const PreCalcValues& pcv ) +bool allowLfnstWithMip(const Size& block) { - return ( pos.x >> pcv.maxCUWidthLog2 ) + ( pos.y >> pcv.maxCUHeightLog2 ) * pcv.widthInCtus; + if (block.width >= 16 && block.height >= 16) + { + return true; + } + return false; } diff --git a/source/Lib/CommonLib/UnitTools.h b/source/Lib/CommonLib/UnitTools.h index 344bac8fb20b2b0d64a461b0c6841c002166143d..8cead3c5ca37492e67158b262460de667902203f 100644 --- a/source/Lib/CommonLib/UnitTools.h +++ b/source/Lib/CommonLib/UnitTools.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * -* Copyright (c) 2010-2019, ITU/ISO/IEC +* Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -59,42 +59,48 @@ namespace CU bool isIntra (const CodingUnit &cu); bool isInter (const CodingUnit &cu); bool isIBC (const CodingUnit &cu); + bool isPLT (const CodingUnit &cu); bool isRDPCMEnabled (const CodingUnit &cu); - bool isLosslessCoded (const CodingUnit &cu); - uint32_t getIntraSizeIdx (const CodingUnit &cu); bool isSameCtu (const CodingUnit &cu, const CodingUnit &cu2); bool isSameSlice (const CodingUnit &cu, const CodingUnit &cu2); -#if HEVC_TILES_WPP bool isSameTile (const CodingUnit &cu, const CodingUnit &cu2); bool isSameSliceAndTile (const CodingUnit &cu, const CodingUnit &cu2); -#endif bool isLastSubCUOfCtu (const CodingUnit &cu); uint32_t getCtuAddr (const CodingUnit &cu); - int predictQP (const CodingUnit& cu, const int prevQP ); uint32_t getNumPUs (const CodingUnit& cu); void addPUs ( CodingUnit& cu); + void saveMotionInHMVP (const CodingUnit& cu, const bool isToBeDone ); + PartSplit getSplitAtDepth (const CodingUnit& cu, const unsigned depth); + ModeType getModeTypeAtDepth (const CodingUnit& cu, const unsigned depth); - bool hasNonTsCodedBlock (const CodingUnit& cu); - uint32_t getNumNonZeroCoeffNonTs (const CodingUnit& cu); + uint32_t getNumNonZeroCoeffNonTsCorner8x8( const CodingUnit& cu, const bool lumaFlag = true, const bool chromaFlag = true ); + bool isPredRegDiffFromTB(const CodingUnit& cu, const ComponentID compID); + bool isFirstTBInPredReg(const CodingUnit& cu, const ComponentID compID, const CompArea &area); + bool isMinWidthPredEnabledForBlkSize(const int w, const int h); + void adjustPredArea(CompArea &area); + bool isBcwIdxCoded (const CodingUnit& cu); + uint8_t getValidBcwIdx (const CodingUnit& cu); + void setBcwIdx (CodingUnit& cu, uint8_t uh); + uint8_t deriveBcwIdx (uint8_t bcwLO, uint8_t bcwL1); + bool bdpcmAllowed (const CodingUnit& cu, const ComponentID compID); + bool isMTSAllowed (const CodingUnit& cu, const ComponentID compID); - bool isGBiIdxCoded (const CodingUnit& cu); - uint8_t getValidGbiIdx (const CodingUnit& cu); - void setGbiIdx (CodingUnit& cu, uint8_t uh); - uint8_t deriveGbiIdx (uint8_t gbiLO, uint8_t gbiL1); bool divideTuInRows ( const CodingUnit &cu ); - bool firstTestISPHorSplit ( const int width, const int height, const ComponentID compID, const CodingUnit *cuLeft = nullptr, const CodingUnit *cuAbove = nullptr ); PartSplit getISPType ( const CodingUnit &cu, const ComponentID compID ); bool isISPLast ( const CodingUnit &cu, const CompArea &tuArea, const ComponentID compID ); bool isISPFirst ( const CodingUnit &cu, const CompArea &tuArea, const ComponentID compID ); - ISPType canUseISPSplit ( const CodingUnit &cu, const ComponentID compID ); - ISPType canUseISPSplit ( const int width, const int height, const int maxTrSize = MAX_TB_SIZEY ); + bool canUseISP ( const CodingUnit &cu, const ComponentID compID ); + bool canUseISP ( const int width, const int height, const int maxTrSize = MAX_TB_SIZEY ); + bool canUseLfnstWithISP ( const CompArea& cuArea, const ISPType ispSplitType ); + bool canUseLfnstWithISP ( const CodingUnit& cu, const ChannelType chType ); uint32_t getISPSplitDim ( const int width, const int height, const PartSplit ispType ); + bool allLumaCBFsAreZero ( const CodingUnit& cu ); PUTraverser traversePUs ( CodingUnit& cu); TUTraverser traverseTUs ( CodingUnit& cu); @@ -103,8 +109,6 @@ namespace CU bool hasSubCUNonZeroMVd (const CodingUnit& cu); bool hasSubCUNonZeroAffineMVd ( const CodingUnit& cu ); - int getMaxNeighboriMVCandNum (const CodingStructure& cs, const Position& pos); - void resetMVDandMV2Int ( CodingUnit& cu, InterPrediction *interPred ); uint8_t getSbtInfo (uint8_t idx, uint8_t pos); uint8_t getSbtIdx (const uint8_t sbtInfo); @@ -114,17 +118,21 @@ namespace CU uint8_t getSbtPosFromSbtMode (const uint8_t sbtMode); uint8_t targetSbtAllowed (uint8_t idx, uint8_t sbtAllowed); uint8_t numSbtModeRdo (uint8_t sbtAllowed); - bool isMtsMode (const uint8_t sbtInfo); bool isSbtMode (const uint8_t sbtInfo); bool isSameSbtSize (const uint8_t sbtInfo1, const uint8_t sbtInfo2); + bool getRprScaling ( const SPS* sps, const PPS* curPPS, Picture* refPic, int& xScale, int& yScale ); } // PU tools namespace PU { - int getLMSymbolList(const PredictionUnit &pu, int *pModeList); + int getLMSymbolList(const PredictionUnit &pu, int *modeList); int getIntraMPMs(const PredictionUnit &pu, unsigned *mpm, const ChannelType &channelType = CHANNEL_TYPE_LUMA); + bool isMIP (const PredictionUnit &pu, const ChannelType &chType = CHANNEL_TYPE_LUMA); + uint32_t getIntraDirLuma (const PredictionUnit &pu); void getIntraChromaCandModes (const PredictionUnit &pu, unsigned modeList[NUM_CHROMA_MODE]); uint32_t getFinalIntraMode (const PredictionUnit &pu, const ChannelType &chType); + uint32_t getCoLocatedIntraLumaMode (const PredictionUnit &pu); + int getWideAngIntraMode ( const TransformUnit &tu, const uint32_t dirMode, const ComponentID compID ); void getInterMergeCandidates (const PredictionUnit &pu, MergeCtx& mrgCtx, int mmvdList, const int& mrgCandIdx = -1 ); @@ -132,92 +140,70 @@ namespace PU void getInterMMVDMergeCandidates(const PredictionUnit &pu, MergeCtx& mrgCtx, const int& mrgCandIdx = -1); int getDistScaleFactor(const int &currPOC, const int &currRefPOC, const int &colPOC, const int &colRefPOC); bool isDiffMER (const PredictionUnit &pu, const PredictionUnit &pu2); - bool getColocatedMVP (const PredictionUnit &pu, const RefPicList &eRefPicList, const Position &pos, Mv& rcMv, const int &refIdx); + bool getColocatedMVP (const PredictionUnit &pu, const RefPicList &eRefPicList, const Position &pos, Mv& rcMv, const int &refIdx, bool sbFlag); void fillMvpCand ( PredictionUnit &pu, const RefPicList &eRefPicList, const int &refIdx, AMVPInfo &amvpInfo ); void fillIBCMvpCand (PredictionUnit &pu, AMVPInfo &amvpInfo); - bool addIBCMVPCand (const PredictionUnit &pu, const Position &pos, const MvpDir &eDir, AMVPInfo &amvpInfo); void fillAffineMvpCand ( PredictionUnit &pu, const RefPicList &eRefPicList, const int &refIdx, AffineAMVPInfo &affiAMVPInfo); bool addMVPCandUnscaled (const PredictionUnit &pu, const RefPicList &eRefPicList, const int &iRefIdx, const Position &pos, const MvpDir &eDir, AMVPInfo &amvpInfo); - bool addMVPCandWithScaling (const PredictionUnit &pu, const RefPicList &eRefPicList, const int &iRefIdx, const Position &pos, const MvpDir &eDir, AMVPInfo &amvpInfo); void xInheritedAffineMv ( const PredictionUnit &pu, const PredictionUnit* puNeighbour, RefPicList eRefPicList, Mv rcMv[3] ); - bool xCheckSimilarMotion(const int mergeCandIndex, const int prevCnt, const MergeCtx mergeCandList, bool hasPruned[MRG_MAX_NUM_CANDS]); -#if JVET_L0090_PAIR_AVG - bool addMergeHMVPCand(const CodingStructure &cs, MergeCtx& mrgCtx, bool canFastExit, const int& mrgCandIdx, const uint32_t maxNumMergeCandMin1, int &cnt, const int prevCnt, bool isAvailableSubPu, unsigned subPuMvpPos - , bool ibcFlag - , bool isShared - ); -#else - bool addMergeHMVPCand(const CodingStructure &cs, MergeCtx& mrgCtx, bool isCandInter[MRG_MAX_NUM_CANDS], bool canFastExit, const int& mrgCandIdx, const uint32_t maxNumMergeCandMin1, int &cnt, const int prevCnt, bool isAvailableSubPu, unsigned subPuMvpPos + bool addMergeHMVPCand (const CodingStructure &cs, MergeCtx& mrgCtx, const int& mrgCandIdx, const uint32_t maxNumMergeCandMin1, int &cnt + , const bool isAvailableA1, const MotionInfo miLeft, const bool isAvailableB1, const MotionInfo miAbove + , const bool ibcFlag + , const bool isGt4x4 ); -#endif - void addAMVPHMVPCand(const PredictionUnit &pu, const RefPicList eRefPicList, const RefPicList eRefPicList2nd, const int currRefPOC, AMVPInfo &info, uint8_t imv); - bool addAffineMVPCandUnscaled( const PredictionUnit &pu, const RefPicList &refPicList, const int &refIdx, const Position &pos, const MvpDir &dir, AffineAMVPInfo &affiAmvpInfo ); + void addAMVPHMVPCand (const PredictionUnit &pu, const RefPicList eRefPicList, const int currRefPOC, AMVPInfo &info); + bool addAffineMVPCandUnscaled ( const PredictionUnit &pu, const RefPicList &refPicList, const int &refIdx, const Position &pos, const MvpDir &dir, AffineAMVPInfo &affiAmvpInfo ); bool isBipredRestriction (const PredictionUnit &pu); void spanMotionInfo ( PredictionUnit &pu, const MergeCtx &mrgCtx = MergeCtx() ); void applyImv ( PredictionUnit &pu, MergeCtx &mrgCtx, InterPrediction *interPred = NULL ); - void getAffineControlPointCand( const PredictionUnit &pu, MotionInfo mi[4], bool isAvailable[4], int verIdx[4], int modelIdx, int verNum, AffineMergeCtx& affMrgCtx ); + void getAffineControlPointCand(const PredictionUnit &pu, MotionInfo mi[4], bool isAvailable[4], int verIdx[4], int8_t bcwIdx, int modelIdx, int verNum, AffineMergeCtx& affMrgCtx); void getAffineMergeCand( const PredictionUnit &pu, AffineMergeCtx& affMrgCtx, const int mrgCandIdx = -1 ); void setAllAffineMvField ( PredictionUnit &pu, MvField *mvField, RefPicList eRefList ); - void setAllAffineMv ( PredictionUnit &pu, Mv affLT, Mv affRT, Mv affLB, RefPicList eRefList - , bool setHighPrec = false - ); - bool getInterMergeSubPuMvpCand(const PredictionUnit &pu, MergeCtx &mrgCtx, bool& LICFlag, const int count - , int mmvdList - ); + void setAllAffineMv ( PredictionUnit &pu, Mv affLT, Mv affRT, Mv affLB, RefPicList eRefList, bool clipCPMVs = false ); + bool getInterMergeSubPuMvpCand(const PredictionUnit &pu, MergeCtx &mrgCtx, bool& LICFlag, const int count, int mmvdList); bool getInterMergeSubPuRecurCand(const PredictionUnit &pu, MergeCtx &mrgCtx, const int count); - bool isBiPredFromDifferentDir (const PredictionUnit &pu); bool isBiPredFromDifferentDirEqDistPoc(const PredictionUnit &pu); - void restrictBiPredMergeCands (const PredictionUnit &pu, MergeCtx& mrgCtx); void restrictBiPredMergeCandsOne (PredictionUnit &pu); bool isLMCMode ( unsigned mode); bool isLMCModeEnabled (const PredictionUnit &pu, unsigned mode); bool isChromaIntraModeCrossCheckMode(const PredictionUnit &pu); - int getMHIntraMPMs (const PredictionUnit &pu, unsigned *mpm, const ChannelType &channelType = CHANNEL_TYPE_LUMA, const bool isChromaMDMS = false, const unsigned startIdx = 0); - int getNarrowShape (const int width, const int height); void getTriangleMergeCandidates (const PredictionUnit &pu, MergeCtx &triangleMrgCtx); - bool isUniqueTriangleCandidates (const PredictionUnit &pu, MergeCtx &triangleMrgCtx); void spanTriangleMotionInfo ( PredictionUnit &pu, MergeCtx &triangleMrgCtx, const bool splitDir, const uint8_t candIdx0, const uint8_t candIdx1); int32_t mappingRefPic (const PredictionUnit &pu, int32_t refPicPoc, bool targetRefPicList); - void getIbcMVPsEncOnly(PredictionUnit &pu, Mv* MvPred, int& nbPred); + bool isAddNeighborMv (const Mv& currMv, Mv* neighborMvs, int numNeighborMv); + void getIbcMVPsEncOnly(PredictionUnit &pu, Mv* mvPred, int& nbPred); bool getDerivedBV(PredictionUnit &pu, const Mv& currentMv, Mv& derivedMv); - bool isBlockVectorValid(PredictionUnit& pu, int xPos, int yPos, int width, int height, int picWidth, int picHeight, int xStartInCU, int yStartInCU, int xBv, int yBv, int ctuSize); bool checkDMVRCondition(const PredictionUnit& pu); + } // TU tools namespace TU { - uint32_t getNumNonZeroCoeffsNonTS (const TransformUnit &tu, const bool bLuma = true, const bool bChroma = true); + uint32_t getNumNonZeroCoeffsNonTSCorner8x8( const TransformUnit &tu, const bool bLuma = true, const bool bChroma = true ); bool isNonTransformedResidualRotated(const TransformUnit &tu, const ComponentID &compID); bool getCbf (const TransformUnit &tu, const ComponentID &compID); bool getCbfAtDepth (const TransformUnit &tu, const ComponentID &compID, const unsigned &depth); void setCbfAtDepth ( TransformUnit &tu, const ComponentID &compID, const unsigned &depth, const bool &cbf); bool isTSAllowed (const TransformUnit &tu, const ComponentID compID); - bool isMTSAllowed (const TransformUnit &tu, const ComponentID compID); - uint32_t getGolombRiceStatisticsIndex (const TransformUnit &tu, const ComponentID &compID); -#if HEVC_USE_MDCS - uint32_t getCoefScanIdx (const TransformUnit &tu, const ComponentID &compID); -#endif bool hasCrossCompPredInfo (const TransformUnit &tu, const ComponentID &compID); bool needsSqrt2Scale ( const TransformUnit &tu, const ComponentID &compID ); -#if HM_QTBT_AS_IN_JEM_QUANT bool needsBlockSizeTrafoScale ( const TransformUnit &tu, const ComponentID &compID ); -#else - bool needsQP3Offset (const TransformUnit &tu, const ComponentID &compID); -#endif TransformUnit* getPrevTU ( const TransformUnit &tu, const ComponentID compID ); bool getPrevTuCbfAtDepth( const TransformUnit &tu, const ComponentID compID, const int trDepth ); - void getTransformTypeISP( const TransformUnit &tu, const ComponentID compID, int &typeH, int &typeV ); + int getICTMode ( const TransformUnit &tu, int jointCbCr = -1 ); } uint32_t getCtuAddr (const Position& pos, const PreCalcValues &pcv); +int getNumModesMip (const Size& block); +int getMipSizeId (const Size& block); +bool allowLfnstWithMip(const Size& block); template<typename T, size_t N> uint32_t updateCandList(T uiMode, double uiCost, static_vector<T, N>& candModeList, static_vector<double, N>& candCostList - , static_vector<int, N>& extendRefList, int extendRef , size_t uiFastCandNum = N, int* iserttPos = nullptr) { CHECK( std::min( uiFastCandNum, candModeList.size() ) != std::min( uiFastCandNum, candCostList.size() ), "Sizes do not match!" ); @@ -238,17 +224,9 @@ uint32_t updateCandList(T uiMode, double uiCost, static_vector<T, N>& candModeLi { candModeList[currSize - i] = candModeList[currSize - 1 - i]; candCostList[currSize - i] = candCostList[currSize - 1 - i]; - if (extendRef != -1) - { - extendRefList[currSize - i] = extendRefList[currSize - 1 - i]; - } } candModeList[currSize - shift] = uiMode; candCostList[currSize - shift] = uiCost; - if (extendRef != -1) - { - extendRefList[currSize - shift] = extendRef; - } if (iserttPos != nullptr) { *iserttPos = int(currSize - shift); @@ -259,10 +237,6 @@ uint32_t updateCandList(T uiMode, double uiCost, static_vector<T, N>& candModeLi { candModeList.insert( candModeList.end() - shift, uiMode ); candCostList.insert( candCostList.end() - shift, uiCost ); - if (extendRef != -1) - { - extendRefList.insert(extendRefList.end() - shift, extendRef); - } if (iserttPos != nullptr) { *iserttPos = int(candModeList.size() - shift - 1); @@ -276,56 +250,4 @@ uint32_t updateCandList(T uiMode, double uiCost, static_vector<T, N>& candModeLi return 0; } -template<typename T, size_t N> -uint32_t updateDoubleCandList(T mode, double cost, static_vector<T, N>& candModeList, static_vector<double, N>& candCostList, static_vector<T, N>& candModeList2, T mode2, size_t fastCandNum = N, int* iserttPos = nullptr) -{ - CHECK(std::min(fastCandNum, candModeList.size()) != std::min(fastCandNum, candCostList.size()), "Sizes do not match!"); - CHECK(fastCandNum > candModeList.capacity(), "The vector is to small to hold all the candidates!"); - - size_t i; - size_t shift = 0; - size_t currSize = std::min(fastCandNum, candCostList.size()); - - while (shift < fastCandNum && shift < currSize && cost < candCostList[currSize - 1 - shift]) - { - shift++; - } - - if (candModeList.size() >= fastCandNum && shift != 0) - { - for (i = 1; i < shift; i++) - { - candModeList[currSize - i] = candModeList[currSize - 1 - i]; - candModeList2[currSize - i] = candModeList2[currSize - 1 - i]; - candCostList[currSize - i] = candCostList[currSize - 1 - i]; - } - candModeList[currSize - shift] = mode; - candModeList2[currSize - shift] = mode2; - candCostList[currSize - shift] = cost; - if (iserttPos != nullptr) - { - *iserttPos = int(currSize - shift); - } - return 1; - } - else if (currSize < fastCandNum) - { - candModeList.insert(candModeList.end() - shift, mode); - candModeList2.insert(candModeList2.end() - shift, mode2); - candCostList.insert(candCostList.end() - shift, cost); - if (iserttPos != nullptr) - { - *iserttPos = int(candModeList.size() - shift - 1); - } - return 1; - } - - if (iserttPos != nullptr) - { - *iserttPos = -1; - } - return 0; -} - - #endif diff --git a/source/Lib/CommonLib/WeightPrediction.cpp b/source/Lib/CommonLib/WeightPrediction.cpp index 8495736e89b85b9ef48ec2111027fd8a74986bcb..cf20eb20902d4fb6f4f00478b81d5dc983f77eb2 100644 --- a/source/Lib/CommonLib/WeightPrediction.cpp +++ b/source/Lib/CommonLib/WeightPrediction.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -161,13 +161,19 @@ void WeightPrediction::addWeightBi(const CPelUnitBuf &pcYuvSrc0, const WPScalingParam *const wp1, PelUnitBuf &rpcYuvDst, const bool bRoundLuma /*= true*/, - const ComponentID maxNumComp) + const ComponentID maxNumComp + , bool lumaOnly + , bool chromaOnly +) { const bool enableRounding[MAX_NUM_COMPONENT] = { bRoundLuma, true, true }; const uint32_t numValidComponent = (const uint32_t)pcYuvSrc0.bufs.size(); - for (int componentIndex = 0; componentIndex < numValidComponent && componentIndex <= maxNumComp; componentIndex++) + CHECK( lumaOnly && chromaOnly, "Not allowed to have both lumaOnly and chromaOnly selected" ); + int firstComponent = chromaOnly ? 1 : 0; + int lastComponent = lumaOnly ? 0 : maxNumComp; + for (int componentIndex = firstComponent; componentIndex < numValidComponent && componentIndex <= lastComponent; componentIndex++) { const ComponentID compID = ComponentID(componentIndex); @@ -215,15 +221,78 @@ void WeightPrediction::addWeightBi(const CPelUnitBuf &pcYuvSrc0, } // compID loop } +void WeightPrediction::addWeightBiComponent(const CPelUnitBuf &pcYuvSrc0, + const CPelUnitBuf &pcYuvSrc1, + const ClpRngs &clpRngs, + const WPScalingParam *const wp0, + const WPScalingParam *const wp1, + PelUnitBuf &rpcYuvDst, + const bool bRoundLuma /*= true*/, + const ComponentID Comp) +{ + const bool enableRounding[MAX_NUM_COMPONENT] = { bRoundLuma, true, true }; + + const ComponentID compID = ComponentID(Comp); + + const Pel* src0 = pcYuvSrc0.bufs[compID].buf; + const Pel* src1 = pcYuvSrc1.bufs[compID].buf; + Pel* dst = rpcYuvDst.bufs[compID].buf; + + // Luma : -------------------------------------------- + const ClpRng& clpRng = clpRngs.comp[compID]; + const int w0 = wp0[compID].w; + const int offset = wp0[compID].offset; + const int clipBD = clpRng.bd; + const int shiftNum = std::max<int>(2, (IF_INTERNAL_PREC - clipBD)); + const int shift = wp0[compID].shift + shiftNum; + const int round = (enableRounding[compID] && (shift > 0)) ? (1 << (shift - 1)) : 0; + const int w1 = wp1[compID].w; + const int height = rpcYuvDst.bufs[compID].height; + const int width = rpcYuvDst.bufs[compID].width; + + const uint32_t src0Stride = pcYuvSrc0.bufs[compID].stride; + const uint32_t src1Stride = pcYuvSrc1.bufs[compID].stride; + const uint32_t dstStride = rpcYuvDst.bufs[compID].stride; + + for (int y = height - 1; y >= 0; y--) + { + // do it in batches of 4 (partial unroll) + int x = width - 1; + + for (; x >= 3; ) + { + dst[x] = weightBidir(w0, src0[x], w1, src1[x], round, shift, offset, clpRng ); x--; + dst[x] = weightBidir(w0, src0[x], w1, src1[x], round, shift, offset, clpRng ); x--; + dst[x] = weightBidir(w0, src0[x], w1, src1[x], round, shift, offset, clpRng ); x--; + dst[x] = weightBidir(w0, src0[x], w1, src1[x], round, shift, offset, clpRng ); x--; + } + for (; x >= 0; x--) + { + dst[x] = weightBidir(w0, src0[x], w1, src1[x], round, shift, offset, clpRng ); + } + + src0 += src0Stride; + src1 += src1Stride; + dst += dstStride; + } // y loop +} + void WeightPrediction::addWeightUni(const CPelUnitBuf &pcYuvSrc0, const ClpRngs &clpRngs, const WPScalingParam *const wp0, PelUnitBuf &rpcYuvDst, - const ComponentID maxNumComp) + const ComponentID maxNumComp + , bool lumaOnly + , bool chromaOnly +) { const uint32_t numValidComponent = (const uint32_t)pcYuvSrc0.bufs.size(); - for (int componentIndex = 0; componentIndex < numValidComponent && componentIndex <= maxNumComp; componentIndex++) + CHECK( lumaOnly && chromaOnly, "Not allowed to have both lumaOnly and chromaOnly selected" ); + int firstComponent = chromaOnly ? 1 : 0; + int lastComponent = lumaOnly ? 0 : maxNumComp; + for (int componentIndex = firstComponent; componentIndex < numValidComponent && componentIndex <= lastComponent; + componentIndex++) { const ComponentID compID = ComponentID(componentIndex); @@ -315,7 +384,10 @@ void WeightPrediction::xWeightedPredictionUni(const PredictionUnit &pu, const RefPicList &eRefPicList, PelUnitBuf &pcYuvPred, const int iRefIdx_input/* = -1*/, - const ComponentID maxNumComp) + const ComponentID maxNumComp + , bool lumaOnly + , bool chromaOnly +) { WPScalingParam *pwp, *pwpTmp; @@ -335,14 +407,17 @@ void WeightPrediction::xWeightedPredictionUni(const PredictionUnit &pu, { getWpScaling(pu.cs->slice, -1, iRefIdx, pwpTmp, pwp, maxNumComp); } - addWeightUni(pcYuvSrc, pu.cu->slice->clpRngs(), pwp, pcYuvPred, maxNumComp); + addWeightUni(pcYuvSrc, pu.cu->slice->clpRngs(), pwp, pcYuvPred, maxNumComp, lumaOnly, chromaOnly); } void WeightPrediction::xWeightedPredictionBi(const PredictionUnit &pu, const CPelUnitBuf &pcYuvSrc0, const CPelUnitBuf &pcYuvSrc1, PelUnitBuf &rpcYuvDst, - const ComponentID maxNumComp) + const ComponentID maxNumComp + , bool lumaOnly + , bool chromaOnly +) { const int iRefIdx0 = pu.refIdx[0]; const int iRefIdx1 = pu.refIdx[1]; @@ -351,19 +426,21 @@ void WeightPrediction::xWeightedPredictionBi(const PredictionUnit &pu, CHECK( !pu.cs->pps->getWPBiPred(), "Weighted Bi-prediction disabled" ); + if (iRefIdx0 < 0 && iRefIdx1 < 0) return; + getWpScaling(pu.cu->slice, iRefIdx0, iRefIdx1, pwp0, pwp1, maxNumComp); if (iRefIdx0 >= 0 && iRefIdx1 >= 0) { - addWeightBi(pcYuvSrc0, pcYuvSrc1, pu.cu->slice->clpRngs(), pwp0, pwp1, rpcYuvDst, true, maxNumComp); + addWeightBi(pcYuvSrc0, pcYuvSrc1, pu.cu->slice->clpRngs(), pwp0, pwp1, rpcYuvDst, true, maxNumComp, lumaOnly, chromaOnly); } else if (iRefIdx0 >= 0 && iRefIdx1 < 0) { - addWeightUni(pcYuvSrc0, pu.cu->slice->clpRngs(), pwp0, rpcYuvDst, maxNumComp); + addWeightUni(pcYuvSrc0, pu.cu->slice->clpRngs(), pwp0, rpcYuvDst, maxNumComp, lumaOnly, chromaOnly); } else if (iRefIdx0 < 0 && iRefIdx1 >= 0) { - addWeightUni(pcYuvSrc1, pu.cu->slice->clpRngs(), pwp1, rpcYuvDst, maxNumComp); + addWeightUni(pcYuvSrc1, pu.cu->slice->clpRngs(), pwp1, rpcYuvDst, maxNumComp, lumaOnly, chromaOnly); } else { diff --git a/source/Lib/CommonLib/WeightPrediction.h b/source/Lib/CommonLib/WeightPrediction.h index 2cbb82fa608e06ad25ff95a80fbc822a7aa16c37..4cc91597c8251dda3cffd509d6b87a0cdc36ac4e 100644 --- a/source/Lib/CommonLib/WeightPrediction.h +++ b/source/Lib/CommonLib/WeightPrediction.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -69,26 +69,47 @@ public: const WPScalingParam *const wp1, PelUnitBuf &rpcYuvDst, const bool bRoundLuma = true, - const ComponentID maxNumComp = MAX_NUM_COMPONENT ); + const ComponentID maxNumComp = MAX_NUM_COMPONENT + , bool lumaOnly = false + , bool chromaOnly = false + ); + + void addWeightBiComponent( const CPelUnitBuf &pcYuvSrc0, + const CPelUnitBuf &pcYuvSrc1, + const ClpRngs &clpRngs, + const WPScalingParam *const wp0, + const WPScalingParam *const wp1, + PelUnitBuf &rpcYuvDst, + const bool bRoundLuma = true, + const ComponentID Comp = COMPONENT_Y); void addWeightUni( const CPelUnitBuf &pcYuvSrc0, const ClpRngs &clpRngs, const WPScalingParam *const wp0, PelUnitBuf &rpcYuvDst, - const ComponentID maxNumComp = MAX_NUM_COMPONENT); + const ComponentID maxNumComp = MAX_NUM_COMPONENT + , bool lumaOnly = false + , bool chromaOnly = false + ); void xWeightedPredictionUni( const PredictionUnit &pu, const CPelUnitBuf &pcYuvSrc, const RefPicList &eRefPicList, PelUnitBuf &pcYuvPred, const int iRefIdx=-1, - const ComponentID maxNumComp = MAX_NUM_COMPONENT); + const ComponentID maxNumComp = MAX_NUM_COMPONENT + , bool lumaOnly = false + , bool chromaOnly = false + ); void xWeightedPredictionBi( const PredictionUnit &pu, const CPelUnitBuf &pcYuvSrc0, const CPelUnitBuf &pcYuvSrc1, PelUnitBuf &pcYuvDst, - const ComponentID maxNumComp = MAX_NUM_COMPONENT ); + const ComponentID maxNumComp = MAX_NUM_COMPONENT + , bool lumaOnly = false + , bool chromaOnly = false + ); }; #endif diff --git a/source/Lib/CommonLib/dtrace.cpp b/source/Lib/CommonLib/dtrace.cpp index 503f53eec6313b222fbbbf6fd15d851ca8d01993..3500c6be69f4c885964cbec1cd10bc855144fa1b 100644 --- a/source/Lib/CommonLib/dtrace.cpp +++ b/source/Lib/CommonLib/dtrace.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/source/Lib/CommonLib/dtrace.h b/source/Lib/CommonLib/dtrace.h index b57efbd22dfa02edc57a02a5fbf3966ec7bd7b34..9e10e201b70042294d934eb4bf4bd44ae78d8dae 100644 --- a/source/Lib/CommonLib/dtrace.h +++ b/source/Lib/CommonLib/dtrace.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/source/Lib/CommonLib/dtrace_blockstatistics.cpp b/source/Lib/CommonLib/dtrace_blockstatistics.cpp index c78720917af4ffb15c340e446a649ac0c5d881a7..8005a537f23095a2ca901a3d95414f68ecd3dccc 100644 --- a/source/Lib/CommonLib/dtrace_blockstatistics.cpp +++ b/source/Lib/CommonLib/dtrace_blockstatistics.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -277,8 +277,8 @@ void retrieveTriangularMvInfo(const PredictionUnit& pu, MotionInfo& mi0, MotionI CMotionBuf mb = pu.getMotionBuf(); bool foundMv[2] = { false, false }; bool foundBi = false; - int32_t idxW = (int32_t)(g_aucLog2[pu.lwidth() ] - MIN_CU_LOG2); - int32_t idxH = (int32_t)(g_aucLog2[pu.lheight()] - MIN_CU_LOG2); + int32_t idxW = (int32_t)(floorLog2(pu.lwidth() ) - MIN_CU_LOG2); + int32_t idxH = (int32_t)(floorLog2(pu.lheight()) - MIN_CU_LOG2); for (int32_t y = 0; y < mb.height; y++) { for (int32_t x = 0; x < mb.width; x++) @@ -376,7 +376,7 @@ void writeBlockStatisticsHeader(const SPS *sps) DTRACE_HEADER( g_trace_ctx, "# VTMBMS Block Statistics\n"); // sequence info - DTRACE_HEADER( g_trace_ctx, "# Sequence size: [%dx %d]\n", sps->getPicWidthInLumaSamples(), sps->getPicHeightInLumaSamples()); + DTRACE_HEADER( g_trace_ctx, "# Sequence size: [%dx %d]\n", sps->getMaxPicWidthInLumaSamples(), sps->getMaxPicHeightInLumaSamples() ); // list statistics for( auto i = static_cast<int>(BlockStatistic::PredMode); i < static_cast<int>(BlockStatistic::NumBlockStatistics); i++) { @@ -426,17 +426,17 @@ void writeAllData(const CodingStructure& cs, const UnitArea& ctuArea) DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::QP), cu.qp); DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::SplitSeries), (int)cu.splitSeries); - if (cs.pps->getTransquantBypassEnabledFlag()) - { - DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::TransQuantBypassFlag), cu.transQuantBypass); - } - // skip flag if (!cs.slice->isIntra()) { DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::SkipFlag), cu.skip); } + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::BDPCM), cu.bdpcmMode); + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::BDPCMChroma), cu.bdpcmModeChroma); + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::TileIdx), cu.tileIdx); + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::IndependentSliceIdx), cu.slice->getIndependentSliceIdx()); + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::LFNSTIdx), cu.lfnstIdx); DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::MMVDSkipFlag), cu.mmvdSkip); } else if( chType == CHANNEL_TYPE_CHROMA ) @@ -449,11 +449,7 @@ void writeAllData(const CodingStructure& cs, const UnitArea& ctuArea) DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::QP_Chroma), cu.qp); DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::SplitSeries_Chroma), (int)cu.splitSeries); - if (cs.pps->getTransquantBypassEnabledFlag()) - { - DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::TransQuantBypassFlag_Chroma), cu.transQuantBypass); - } - + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::BDPCMChroma), cu.bdpcmModeChroma); } @@ -467,6 +463,7 @@ void writeAllData(const CodingStructure& cs, const UnitArea& ctuArea) { DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, pu, GetBlockStatisticName(BlockStatistic::MergeFlag), pu.mergeFlag); } + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, pu, GetBlockStatisticName(BlockStatistic::RegularMergeFlag), pu.regularMergeFlag); if( pu.mergeFlag ) { DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, pu, GetBlockStatisticName(BlockStatistic::MergeIdx), pu.mergeIdx); @@ -476,12 +473,11 @@ void writeAllData(const CodingStructure& cs, const UnitArea& ctuArea) { DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, pu, GetBlockStatisticName(BlockStatistic::MMVDMergeIdx), pu.mmvdMergeIdx); } - DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, pu, GetBlockStatisticName(BlockStatistic::MHIntraFlag), pu.mhIntraFlag); - if (pu.mhIntraFlag) + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, pu, GetBlockStatisticName(BlockStatistic::CiipFlag), pu.ciipFlag); + if (pu.ciipFlag) { DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, pu, GetBlockStatisticName(BlockStatistic::Luma_IntraMode), pu.intraDir[COMPONENT_Y]); } - DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, pu, GetBlockStatisticName(BlockStatistic::TriangleFlag), pu.cu->triangle); } DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, pu, GetBlockStatisticName(BlockStatistic::AffineFlag), pu.cu->affine); DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, pu, GetBlockStatisticName(BlockStatistic::AffineType), pu.cu->affineType); @@ -657,22 +653,20 @@ void writeAllData(const CodingStructure& cs, const UnitArea& ctuArea) } + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::SMVDFlag), cu.smvdMode); DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::IMVMode), cu.imv); DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::RootCbf), cu.rootCbf); - DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::GBIIndex), cu.GBiIdx); - DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::IBCFlag), cu.predMode == MODE_IBC); + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::BCWIndex), cu.BcwIdx); + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::SbtIdx), cu.getSbtIdx()); + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::SbtPos), cu.getSbtPos()); } break; case MODE_INTRA: { - if(chType == CHANNEL_TYPE_LUMA) { - DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::IPCM), cu.ipcm); - } - else if(chType == CHANNEL_TYPE_CHROMA) - { - DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::IPCM_Chroma), cu.ipcm); + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::MIPFlag), cu.mipFlag); + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::ISPMode), cu.ispMode); } const uint32_t numChType = ::getNumberValidChannels( cu.chromaFormat ); @@ -693,9 +687,7 @@ void writeAllData(const CodingStructure& cs, const UnitArea& ctuArea) { const uint32_t uiChFinalMode = PU::getFinalIntraMode( pu, ChannelType( chType ) ); DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_ALL, pu, GetBlockStatisticName(BlockStatistic::Chroma_IntraMode), uiChFinalMode); -#if ENABLE_CHROMA_422 assert(0); -#endif } } } @@ -712,12 +704,32 @@ void writeAllData(const CodingStructure& cs, const UnitArea& ctuArea) if (tu.Y().valid()) { DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, tu, GetBlockStatisticName(BlockStatistic::Cbf_Y), tu.cbf[COMPONENT_Y]); - DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, tu, GetBlockStatisticName(BlockStatistic::MTSIdx), tu.mtsIdx); + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, tu, GetBlockStatisticName(BlockStatistic::MTSIdx_Y), tu.mtsIdx[COMPONENT_Y]); } - if (!(cu.chromaFormat == CHROMA_400 || (CS::isDualITree(*cu.cs) && cu.chType == CHANNEL_TYPE_LUMA))) + if ( tu.Cb().valid() ) + { + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, tu, GetBlockStatisticName(BlockStatistic::JointCbCr), tu.jointCbCr); + } + + bool lumaOnly = ( cu.chromaFormat == CHROMA_400 || !tu.blocks[COMPONENT_Cb].valid() ); + if( !lumaOnly ) + { + if( TU::hasCrossCompPredInfo( tu, COMPONENT_Cb ) ) + { + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, tu, GetBlockStatisticName(BlockStatistic::CompAlphaCb), tu.compAlpha[COMPONENT_Cb] ); + } + if( TU::hasCrossCompPredInfo( tu, COMPONENT_Cr ) ) + { + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, tu, GetBlockStatisticName(BlockStatistic::CompAlphaCr), tu.compAlpha[COMPONENT_Cr] ); + } + } + + if( !(cu.chromaFormat == CHROMA_400 || (cu.isSepTree() && cu.chType == CHANNEL_TYPE_LUMA)) ) { DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_ALL, tu, GetBlockStatisticName(BlockStatistic::Cbf_Cb), tu.cbf[COMPONENT_Cb]); DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_ALL, tu, GetBlockStatisticName(BlockStatistic::Cbf_Cr), tu.cbf[COMPONENT_Cr]); + DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_ALL, tu, GetBlockStatisticName(BlockStatistic::MTSIdx_Cb), tu.mtsIdx[COMPONENT_Cb]); + DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_ALL, tu, GetBlockStatisticName(BlockStatistic::MTSIdx_Cr), tu.mtsIdx[COMPONENT_Cr]); } } } @@ -733,7 +745,6 @@ void writeAllCodedData(const CodingStructure & cs, const UnitArea & ctuArea) for (int ch = 0; ch < maxNumChannelType; ch++) { const ChannelType chType = ChannelType(ch); - const SPS& sps = *cs.sps; for (const CodingUnit &cu : cs.traverseCUs(CS::getArea(cs, ctuArea, chType), chType)) { @@ -746,11 +757,6 @@ void writeAllCodedData(const CodingStructure & cs, const UnitArea & ctuArea) DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::ChromaQPAdj), cu.chromaQpAdj); DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::QP), cu.qp); DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::SplitSeries), (int)cu.splitSeries); - // transquant bypass flag - if (cs.pps->getTransquantBypassEnabledFlag()) - { - DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::TransQuantBypassFlag), cu.transQuantBypass); - } // skip flag if (!cs.slice->isIntra() && cu.Y().valid()) { @@ -764,13 +770,6 @@ void writeAllCodedData(const CodingStructure & cs, const UnitArea & ctuArea) // prediction mode and partitioning data DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::PredMode), cu.predMode); - if (CU::isIntra(cu)) - { - if (!(!sps.getPCMEnabledFlag() || cu.lumaSize().width > (1 << sps.getPCMLog2MaxSize()) || cu.lumaSize().width < (1 << sps.getPCMLog2MinSize()))) - { - DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::IPCM), cu.ipcm); - } - } } else if (chType == CHANNEL_TYPE_CHROMA ) { @@ -781,19 +780,7 @@ void writeAllCodedData(const CodingStructure & cs, const UnitArea & ctuArea) DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::ChromaQPAdj_Chroma), cu.chromaQpAdj); DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::QP_Chroma), cu.qp); DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::SplitSeries_Chroma), (int)cu.splitSeries); - // transquant bypass flag - if (cs.pps->getTransquantBypassEnabledFlag()) - { - DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::TransQuantBypassFlag_Chroma), cu.transQuantBypass); - } - if (CU::isIntra(cu)) - { - if (!(!sps.getPCMEnabledFlag() || cu.lumaSize().width > (1 << sps.getPCMLog2MaxSize()) || cu.lumaSize().width < (1 << sps.getPCMLog2MinSize()))) - { - DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::IPCM_Chroma), cu.ipcm); - } - } } for (const PredictionUnit &pu : CU::traversePUs(cu)) @@ -806,7 +793,7 @@ void writeAllCodedData(const CodingStructure & cs, const UnitArea & ctuArea) { DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, pu, GetBlockStatisticName(BlockStatistic::Luma_IntraMode), PU::getFinalIntraMode(pu, ChannelType(chType))); } - if (!(pu.chromaFormat == CHROMA_400 || (CS::isDualITree(*pu.cs) && pu.chType == CHANNEL_TYPE_LUMA))) + if (!(pu.chromaFormat == CHROMA_400 || (pu.cu->isSepTree() && pu.chType == CHANNEL_TYPE_LUMA))) { DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_CODED, pu, GetBlockStatisticName(BlockStatistic::Chroma_IntraMode), PU::getFinalIntraMode(pu, CHANNEL_TYPE_CHROMA)); } @@ -837,12 +824,12 @@ void writeAllCodedData(const CodingStructure & cs, const UnitArea & ctuArea) { DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, pu, GetBlockStatisticName(BlockStatistic::AffineFlag), pu.cu->affine); } - if (pu.cs->sps->getUseMHIntra() && !pu.cu->skip && !pu.cu->affine && !(pu.cu->lwidth() * pu.cu->lheight() < 64 || pu.cu->lwidth() >= MAX_CU_SIZE || pu.cu->lheight() >= MAX_CU_SIZE) + if (pu.cs->sps->getUseCiip() && !pu.cu->skip && !pu.cu->affine && !(pu.cu->lwidth() * pu.cu->lheight() < 64 || pu.cu->lwidth() >= MAX_CU_SIZE || pu.cu->lheight() >= MAX_CU_SIZE) && !pu.mmvdMergeFlag ) { - DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, pu, GetBlockStatisticName(BlockStatistic::MHIntraFlag), pu.mhIntraFlag); - if (pu.mhIntraFlag) + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, pu, GetBlockStatisticName(BlockStatistic::CiipFlag), pu.ciipFlag); + if (pu.ciipFlag) { if (cu.Y().valid()) { @@ -851,11 +838,6 @@ void writeAllCodedData(const CodingStructure & cs, const UnitArea & ctuArea) } } } - if (cu.cs->slice->getSPS()->getUseTriangle() && cu.cs->slice->isInterB() && cu.lwidth() * cu.lheight() >= TRIANGLE_MIN_SIZE && !cu.affine) - { - DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::TriangleFlag), cu.triangle); - - } } else { @@ -967,9 +949,9 @@ void writeAllCodedData(const CodingStructure & cs, const UnitArea & ctuArea) { DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::IMVMode), cu.imv); } - if (CU::isGBiIdxCoded(cu)) + if (CU::isBcwIdxCoded(cu)) { - DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::GBIIndex), cu.GBiIdx); + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::BCWIndex), cu.BcwIdx); } break; } @@ -995,12 +977,14 @@ void writeAllCodedData(const CodingStructure & cs, const UnitArea & ctuArea) if (tu.Y().valid()) { DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, tu, GetBlockStatisticName(BlockStatistic::Cbf_Y), tu.cbf[COMPONENT_Y]); - DTRACE_BLOCK_SCALAR( g_trace_ctx, D_BLOCK_STATISTICS_CODED, tu, GetBlockStatisticName( BlockStatistic::MTSIdx ), tu.mtsIdx ); + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, tu, GetBlockStatisticName(BlockStatistic::MTSIdx_Y), tu.mtsIdx[COMPONENT_Y]); } - if (!(cu.chromaFormat == CHROMA_400 || (CS::isDualITree(*cu.cs) && cu.chType == CHANNEL_TYPE_LUMA))) + if (!(cu.chromaFormat == CHROMA_400 || (cu.isSepTree() && cu.chType == CHANNEL_TYPE_LUMA))) { DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_CODED, tu, GetBlockStatisticName(BlockStatistic::Cbf_Cb), tu.cbf[COMPONENT_Cb]); DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_CODED, tu, GetBlockStatisticName(BlockStatistic::Cbf_Cr), tu.cbf[COMPONENT_Cr]); + DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_CODED, tu, GetBlockStatisticName(BlockStatistic::MTSIdx_Cb), tu.mtsIdx[COMPONENT_Cb]); + DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_CODED, tu, GetBlockStatisticName(BlockStatistic::MTSIdx_Cr), tu.mtsIdx[COMPONENT_Cr]); } } } diff --git a/source/Lib/CommonLib/dtrace_blockstatistics.h b/source/Lib/CommonLib/dtrace_blockstatistics.h index 56b5adec63f37049fcc7bf518083f0b0c2e40fbf..2df8b9b3af20416ee918590671b48f374c40f1d9 100644 --- a/source/Lib/CommonLib/dtrace_blockstatistics.h +++ b/source/Lib/CommonLib/dtrace_blockstatistics.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -63,23 +63,40 @@ enum class BlockStatistic { ChromaQPAdj, QP, SplitSeries, - TransQuantBypassFlag, - MTSIdx, + MTSIdx_Y, + MTSIdx_Cb, + MTSIdx_Cr, + BDPCM, + BDPCMChroma, + TileIdx, + IndependentSliceIdx, + LFNSTIdx, + JointCbCr, + CompAlphaCb, + CompAlphaCr, + RDPCM_Y, + RDPCM_Cb, + RDPCM_Cr, // intra - IPCM, Luma_IntraMode, Chroma_IntraMode, MultiRefIdx, + MIPFlag, + ISPMode, + // inter SkipFlag, RootCbf, + SbtIdx, + SbtPos, Cbf_Y, Cbf_Cb, Cbf_Cr, IMVMode, InterDir, MergeFlag, + RegularMergeFlag, MergeIdx, MergeType, MVPIdxL0, @@ -99,13 +116,12 @@ enum class BlockStatistic { MMVDSkipFlag, MMVDMergeFlag, MMVDMergeIdx, - MHIntraFlag, - TriangleFlag, + CiipFlag, + SMVDFlag, TrianglePartitioning, TriangleMVL0, //<< currently only uni-prediction enabled TriangleMVL1, //<< currently only uni-prediction enabled - GBIIndex, - IBCFlag, + BCWIndex, // for dual tree // general Depth_Chroma, @@ -115,10 +131,8 @@ enum class BlockStatistic { ChromaQPAdj_Chroma, QP_Chroma, SplitSeries_Chroma, - TransQuantBypassFlag_Chroma, // intra - IPCM_Chroma, NumBlockStatistics, }; @@ -138,15 +152,31 @@ static const std::map<BlockStatistic, std::tuple<std::string, BlockStatisticType { // Statistics enum Statistics name string Statistic Type Type specific information: // Value range, vector scale - { BlockStatistic::PredMode, std::tuple<std::string, BlockStatisticType, std::string>{"PredMode", BlockStatisticType::Flag, ""}}, + { BlockStatistic::PredMode, std::tuple<std::string, BlockStatisticType, std::string>{"PredMode", BlockStatisticType::Integer, "[0, " + std::to_string(NUMBER_OF_PREDICTION_MODES) + "]"}}, { BlockStatistic::MergeFlag, std::tuple<std::string, BlockStatisticType, std::string>{"MergeFlag", BlockStatisticType::Flag, ""}}, + { BlockStatistic::RegularMergeFlag, std::tuple<std::string, BlockStatisticType, std::string>{"RegularMergeFlag", BlockStatisticType::Flag, ""}}, { BlockStatistic::MVL0, std::tuple<std::string, BlockStatisticType, std::string>{"MVL0", BlockStatisticType::Vector, "Scale: 4"}}, { BlockStatistic::MVL1, std::tuple<std::string, BlockStatisticType, std::string>{"MVL1", BlockStatisticType::Vector, "Scale: 4"}}, - { BlockStatistic::IPCM, std::tuple<std::string, BlockStatisticType, std::string>{"IPCM", BlockStatisticType::Flag, ""}}, { BlockStatistic::Luma_IntraMode, std::tuple<std::string, BlockStatisticType, std::string>{"Luma_IntraMode", BlockStatisticType::Integer, "[0, " + std::to_string(NUM_INTRA_MODE) + "]"}}, { BlockStatistic::Chroma_IntraMode, std::tuple<std::string, BlockStatisticType, std::string>{"Chroma_IntraMode", BlockStatisticType::Integer, "[0, " + std::to_string(NUM_INTRA_MODE) + "]"}}, { BlockStatistic::SkipFlag, std::tuple<std::string, BlockStatisticType, std::string>{"SkipFlag", BlockStatisticType::Flag, ""}}, - { BlockStatistic::MTSIdx, std::tuple<std::string, BlockStatisticType, std::string>{"TransformSkipFlag_Y", BlockStatisticType::Integer, ""}}, + { BlockStatistic::MTSIdx_Y, std::tuple<std::string, BlockStatisticType, std::string> {"MTS_Y", BlockStatisticType::Integer, ""} }, + { BlockStatistic::MTSIdx_Cb, std::tuple<std::string, BlockStatisticType, std::string>{"MTS_Cb", BlockStatisticType::Integer, ""} }, + { BlockStatistic::MTSIdx_Cr, std::tuple<std::string, BlockStatisticType, std::string>{"MTS_Cr", BlockStatisticType::Integer, ""} }, + { BlockStatistic::BDPCM, std::tuple<std::string, BlockStatisticType, std::string>{"BDPCM", BlockStatisticType::Flag, ""}}, // called bdpcmMode, but used like a flag in the software? related to intra, but signalled always? + { BlockStatistic::BDPCMChroma, std::tuple<std::string, BlockStatisticType, std::string>{"BDPCMChroma", BlockStatisticType::Flag, ""}}, + { BlockStatistic::TileIdx, std::tuple<std::string, BlockStatisticType, std::string>{"TileIdx", BlockStatisticType::Integer, ""}}, + { BlockStatistic::IndependentSliceIdx, std::tuple<std::string, BlockStatisticType, std::string>{"IndependentSliceIdx", BlockStatisticType::Integer, ""}}, + { BlockStatistic::LFNSTIdx, std::tuple<std::string, BlockStatisticType, std::string>{"LFNSTIdx", BlockStatisticType::Integer, "[0, 3]"}}, + { BlockStatistic::JointCbCr, std::tuple<std::string, BlockStatisticType, std::string>{"JointCbCr", BlockStatisticType::Flag, ""}}, + { BlockStatistic::CompAlphaCb, std::tuple<std::string, BlockStatisticType, std::string>{"CompAlphaCb", BlockStatisticType::Integer, ""}}, + { BlockStatistic::CompAlphaCr, std::tuple<std::string, BlockStatisticType, std::string>{"CompAlphaCr", BlockStatisticType::Integer, ""}}, + { BlockStatistic::RDPCM_Y, std::tuple<std::string, BlockStatisticType, std::string>{"RDPCM_Y", BlockStatisticType::Integer, "[0, " + std::to_string(NUMBER_OF_RDPCM_MODES) + "]"}}, + { BlockStatistic::RDPCM_Cb, std::tuple<std::string, BlockStatisticType, std::string>{"RDPCM_Cb", BlockStatisticType::Integer, "[0, " + std::to_string(NUMBER_OF_RDPCM_MODES) + "]"}}, + { BlockStatistic::RDPCM_Cr, std::tuple<std::string, BlockStatisticType, std::string>{"RDPCM_Cr", BlockStatisticType::Integer, "[0, " + std::to_string(NUMBER_OF_RDPCM_MODES) + "]"}}, + + { BlockStatistic::MIPFlag, std::tuple<std::string, BlockStatisticType, std::string>{"MIPFlag", BlockStatisticType::Flag, ""}}, + { BlockStatistic::ISPMode, std::tuple<std::string, BlockStatisticType, std::string>{"ISPMode", BlockStatisticType::Integer, "[0, " + std::to_string(NUM_INTRA_SUBPARTITIONS_MODES) + "]"}}, { BlockStatistic::Depth, std::tuple<std::string, BlockStatisticType, std::string>{"Depth", BlockStatisticType::Integer, "[0, 7]"}}, { BlockStatistic::QT_Depth, std::tuple<std::string, BlockStatisticType, std::string>{"QT_Depth", BlockStatisticType::Integer, "[0, 7]"}}, { BlockStatistic::BT_Depth, std::tuple<std::string, BlockStatisticType, std::string>{"BT_Depth", BlockStatisticType::Integer, "[0, 7]"}}, @@ -155,10 +185,11 @@ static const std::map<BlockStatistic, std::tuple<std::string, BlockStatisticType { BlockStatistic::QP, std::tuple<std::string, BlockStatisticType, std::string>{"QP", BlockStatisticType::Integer, "[0, 51]"}}, { BlockStatistic::SplitSeries, std::tuple<std::string, BlockStatisticType, std::string>{"SplitSeries", BlockStatisticType::Integer, "[0, " + std::to_string(std::numeric_limits<SplitSeries>::max()) + "]"}}, { BlockStatistic::RootCbf, std::tuple<std::string, BlockStatisticType, std::string>{"RootCbf", BlockStatisticType::Flag, ""}}, + { BlockStatistic::SbtIdx, std::tuple<std::string, BlockStatisticType, std::string>{"SbtIdx", BlockStatisticType::Integer, "[0, " + std::to_string(NUMBER_SBT_IDX) + "]"}}, + { BlockStatistic::SbtPos, std::tuple<std::string, BlockStatisticType, std::string>{"SbtPos", BlockStatisticType::Integer, "[0, " + std::to_string(NUMBER_SBT_POS) + "]"}}, { BlockStatistic::Cbf_Y, std::tuple<std::string, BlockStatisticType, std::string>{"Cbf_Y", BlockStatisticType::Flag, ""}}, { BlockStatistic::Cbf_Cb, std::tuple<std::string, BlockStatisticType, std::string>{"Cbf_Cb", BlockStatisticType::Flag, ""}}, { BlockStatistic::Cbf_Cr, std::tuple<std::string, BlockStatisticType, std::string>{"Cbf_Cr", BlockStatisticType::Flag, ""}}, - { BlockStatistic::TransQuantBypassFlag, std::tuple<std::string, BlockStatisticType, std::string>{"TransQuantBypassFlag", BlockStatisticType::Flag, ""}}, { BlockStatistic::MergeIdx, std::tuple<std::string, BlockStatisticType, std::string>{"MergeIdx", BlockStatisticType::Integer, "[0, 7]"}}, { BlockStatistic::InterDir, std::tuple<std::string, BlockStatisticType, std::string>{"InterDir", BlockStatisticType::Integer, "[1, 3]"}}, { BlockStatistic::MergeType, std::tuple<std::string, BlockStatisticType, std::string>{"MergeType", BlockStatisticType::Integer, "[0, 2]"}}, @@ -179,13 +210,12 @@ static const std::map<BlockStatistic, std::tuple<std::string, BlockStatisticType { BlockStatistic::MMVDSkipFlag, std::tuple<std::string, BlockStatisticType, std::string>{"MMVDSkipFlag", BlockStatisticType::Flag, ""}}, { BlockStatistic::MMVDMergeFlag, std::tuple<std::string, BlockStatisticType, std::string>{"MMVDMergeFlag", BlockStatisticType::Flag, ""}}, { BlockStatistic::MMVDMergeIdx, std::tuple<std::string, BlockStatisticType, std::string>{"MMVDMergeIdx", BlockStatisticType::Integer, "[0, 1]"}}, - { BlockStatistic::MHIntraFlag, std::tuple<std::string, BlockStatisticType, std::string>{"MHIntraFlag", BlockStatisticType::Flag, ""}}, - { BlockStatistic::TriangleFlag, std::tuple<std::string, BlockStatisticType, std::string>{"TriangleFlag", BlockStatisticType::Flag, ""}}, + { BlockStatistic::CiipFlag, std::tuple<std::string, BlockStatisticType, std::string>{"CiipFlag", BlockStatisticType::Flag, ""}}, + { BlockStatistic::SMVDFlag, std::tuple<std::string, BlockStatisticType, std::string>{"SMVDFlag", BlockStatisticType::Flag, ""}}, { BlockStatistic::TrianglePartitioning, std::tuple<std::string, BlockStatisticType, std::string>{"TrianglePartitioning", BlockStatisticType::Line, ""}}, { BlockStatistic::TriangleMVL0, std::tuple<std::string, BlockStatisticType, std::string>{"TriangleMVL0", BlockStatisticType::VectorPolygon, "Scale: 4"}}, { BlockStatistic::TriangleMVL1, std::tuple<std::string, BlockStatisticType, std::string>{"TriangleMVL1", BlockStatisticType::VectorPolygon, "Scale: 4"}}, - { BlockStatistic::GBIIndex, std::tuple<std::string, BlockStatisticType, std::string>{"GBIIndex", BlockStatisticType::Integer, "[0, 4]"}}, - { BlockStatistic::IBCFlag, std::tuple<std::string, BlockStatisticType, std::string>{"IBCFlag", BlockStatisticType::Flag, ""}}, + { BlockStatistic::BCWIndex, std::tuple<std::string, BlockStatisticType, std::string>{"BCWIndex", BlockStatisticType::Integer, "[0, 4]"}}, // for dual tree { BlockStatistic::Depth_Chroma, std::tuple<std::string, BlockStatisticType, std::string>{"Depth_Chroma", BlockStatisticType::Integer, "[0, 10]"}}, // todo: actual limits? { BlockStatistic::QT_Depth_Chroma, std::tuple<std::string, BlockStatisticType, std::string>{"QT_Depth_Chroma", BlockStatisticType::Integer, "[0, 10]"}}, // todo: actual limits? @@ -194,8 +224,6 @@ static const std::map<BlockStatistic, std::tuple<std::string, BlockStatisticType { BlockStatistic::ChromaQPAdj_Chroma, std::tuple<std::string, BlockStatisticType, std::string>{"ChromaQPAdj_Chroma", BlockStatisticType::Integer, "[-10, 10]"}}, // todo: actual limits? { BlockStatistic::QP_Chroma, std::tuple<std::string, BlockStatisticType, std::string>{"QP_Chroma", BlockStatisticType::Integer, "[0, 51]"}}, { BlockStatistic::SplitSeries_Chroma, std::tuple<std::string, BlockStatisticType, std::string>{"SplitSeries_Chroma", BlockStatisticType::Integer, "[0, " + std::to_string(std::numeric_limits<SplitSeries>::max()) + "]"}}, - { BlockStatistic::TransQuantBypassFlag_Chroma, std::tuple<std::string, BlockStatisticType, std::string>{"TransQuantBypassFlag_Chroma", BlockStatisticType::Flag, ""}}, - { BlockStatistic::IPCM_Chroma, std::tuple<std::string, BlockStatisticType, std::string>{"IPCM_Chroma", BlockStatisticType::Flag, ""}}, }; diff --git a/source/Lib/CommonLib/dtrace_buffer.h b/source/Lib/CommonLib/dtrace_buffer.h index f5fcbdf61a542990ed952845786472b1e40d6077..afba4a3ced5a5e4e8db2d57faba03a055c53db1d 100644 --- a/source/Lib/CommonLib/dtrace_buffer.h +++ b/source/Lib/CommonLib/dtrace_buffer.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/source/Lib/CommonLib/dtrace_codingstruct.h b/source/Lib/CommonLib/dtrace_codingstruct.h index eafc916e1b9cbfbb4fb646a057cbd8e59f1bc436..656942903eb14621fe0db10cc3a721a465affb0c 100644 --- a/source/Lib/CommonLib/dtrace_codingstruct.h +++ b/source/Lib/CommonLib/dtrace_codingstruct.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/source/Lib/CommonLib/dtrace_next.h b/source/Lib/CommonLib/dtrace_next.h index 95681319b4d76c2b1a02236fd4dd41ec1b905d4b..7ef78cca960ba7d31b793788d4d0510bee803e52 100644 --- a/source/Lib/CommonLib/dtrace_next.h +++ b/source/Lib/CommonLib/dtrace_next.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/source/Lib/CommonLib/version.h b/source/Lib/CommonLib/version.h index 39d859b2b00201c0e31e0bc5b97c7fe3e0c7acc0..3ef053dca0c41d0a1edba42c9f4c2a67b976c164 100644 --- a/source/Lib/CommonLib/version.h +++ b/source/Lib/CommonLib/version.h @@ -1,3 +1,3 @@ #if ! defined( VTM_VERSION ) -#define VTM_VERSION "4.1" +#define VTM_VERSION "7.3" #endif diff --git a/source/Lib/CommonLib/x86/AdaptiveLoopFilterX86.h b/source/Lib/CommonLib/x86/AdaptiveLoopFilterX86.h index ef368d78f93a25eeccfa57e0481415fda4b588f8..30858585761678f3c87cdbb9c4bcc51b3e572a7b 100644 --- a/source/Lib/CommonLib/x86/AdaptiveLoopFilterX86.h +++ b/source/Lib/CommonLib/x86/AdaptiveLoopFilterX86.h @@ -31,881 +31,653 @@ * THE POSSIBILITY OF SUCH DAMAGE. */ -/** \file AdaptiveLoopFilterX86.h - \brief adaptive loop filter class -*/ #include "CommonDefX86.h" #include "../AdaptiveLoopFilter.h" -//! \ingroup CommonLib -//! \{ - #ifdef TARGET_SIMD_X86 #if defined _MSC_VER #include <tmmintrin.h> #else -#include <immintrin.h> +#include <x86intrin.h> #endif template<X86_VEXT vext> -static void simdDeriveClassificationBlk( AlfClassifier** classifier, int** laplacian[NUM_DIRECTIONS], const CPelBuf& srcLuma, const Area& blk, const int shift ) +static void simdDeriveClassificationBlk(AlfClassifier **classifier, int **laplacian[NUM_DIRECTIONS], + const CPelBuf &srcLuma, const Area &blkDst, const Area &blk, const int shift, + const int vbCTUHeight, int vbPos ) { - const int img_stride = srcLuma.stride; - const Pel* srcExt = srcLuma.buf; + CHECK((blk.height & 7) != 0, "Block height must be a multiple of 8"); + CHECK((blk.width & 7) != 0, "Block width must be a multiple of 8"); + CHECK((vbCTUHeight & (vbCTUHeight - 1)) != 0, "vbCTUHeight must be a power of 2"); - const int fl = 2; - const int flplusOne = fl + 1; - const int fl2plusTwo = 2 * fl + 2; - const int var_max = 15; + const size_t imgStride = srcLuma.stride; + const Pel * srcExt = srcLuma.buf; - const int imgHExtended = blk.height + fl2plusTwo; - const int imgWExtended = blk.width + fl2plusTwo; + const int imgHExtended = blk.height + 4; + const int imgWExtended = blk.width + 4; const int posX = blk.pos().x; const int posY = blk.pos().y; - const int start_height1 = posY - flplusOne; - static uint16_t _temp[( AdaptiveLoopFilter::m_CLASSIFICATION_BLK_SIZE + 4 ) >> 1][AdaptiveLoopFilter::m_CLASSIFICATION_BLK_SIZE + 4]; + // 18x40 array + uint16_t colSums[(AdaptiveLoopFilter::m_CLASSIFICATION_BLK_SIZE + 4) >> 1] + [AdaptiveLoopFilter::m_CLASSIFICATION_BLK_SIZE + 8]; - for( int i = 0; i < imgHExtended - 2; i += 2 ) + for (int i = 0; i < imgHExtended; i += 2) { - int yoffset = ( i + 1 + start_height1 ) * img_stride - flplusOne; - - const Pel *p_imgY_pad_down = &srcExt[yoffset - img_stride]; - const Pel *p_imgY_pad = &srcExt[yoffset]; - const Pel *p_imgY_pad_up = &srcExt[yoffset + img_stride]; - const Pel *p_imgY_pad_up2 = &srcExt[yoffset + img_stride * 2]; + const size_t offset = (i + posY - 3) * imgStride + posX - 3; - __m128i mmStore = _mm_setzero_si128(); + const Pel *imgY0 = &srcExt[offset]; + const Pel *imgY1 = &srcExt[offset + imgStride]; + const Pel *imgY2 = &srcExt[offset + imgStride * 2]; + const Pel *imgY3 = &srcExt[offset + imgStride * 3]; - for( int j = 2; j < imgWExtended; j += 8 ) + // pixel padding for gradient calculation + int pos = blkDst.pos().y - 2 + i; + int posInCTU = pos & (vbCTUHeight - 1); + if (pos > 0 && posInCTU == vbPos - 2) { - const int pixY = j - 1 + posX; - - const __m128i* pY = ( __m128i* )( p_imgY_pad + pixY - 1 ); - const __m128i* pYdown = ( __m128i* )( p_imgY_pad_down + pixY - 1 ); - const __m128i* pYup = ( __m128i* )( p_imgY_pad_up + pixY - 1 ); - const __m128i* pYup2 = ( __m128i* )( p_imgY_pad_up2 + pixY - 1 ); - - const __m128i* pY_next = ( __m128i* )( p_imgY_pad + pixY + 7 ); - const __m128i* pYdown_next = ( __m128i* )( p_imgY_pad_down + pixY + 7 ); - const __m128i* pYup_next = ( __m128i* )( p_imgY_pad_up + pixY + 7 ); - const __m128i* pYup2_next = ( __m128i* )( p_imgY_pad_up2 + pixY + 7 ); - - __m128i xmm0 = _mm_loadu_si128( pYdown ); - __m128i xmm1 = _mm_loadu_si128( pY ); - __m128i xmm2 = _mm_loadu_si128( pYup ); - __m128i xmm3 = _mm_loadu_si128( pYup2 ); - - const __m128i xmm0_next = _mm_loadu_si128( pYdown_next ); - const __m128i xmm1_next = _mm_loadu_si128( pY_next ); - const __m128i xmm2_next = _mm_loadu_si128( pYup_next ); - const __m128i xmm3_next = _mm_loadu_si128( pYup2_next ); - - __m128i xmm4 = _mm_slli_epi16( _mm_alignr_epi8( xmm1_next, xmm1, 2 ), 1 ); - __m128i xmm5 = _mm_slli_epi16( _mm_alignr_epi8( xmm2_next, xmm2, 2 ), 1 ); - - __m128i xmm15 = _mm_setzero_si128(); - - //dig0 - __m128i xmm6 = _mm_add_epi16( _mm_alignr_epi8( xmm2_next, xmm2, 4 ), xmm0 ); - xmm6 = _mm_sub_epi16( _mm_blend_epi16 ( xmm4, xmm15, 0xAA ), _mm_blend_epi16 ( xmm6, xmm15, 0xAA ) ); - __m128i xmm8 = _mm_add_epi16( _mm_alignr_epi8( xmm3_next, xmm3, 4 ), xmm1 ); - xmm8 = _mm_sub_epi16( _mm_blend_epi16 ( xmm5, xmm15, 0x55 ), _mm_blend_epi16 ( xmm8, xmm15, 0x55 ) ); - - //dig1 - __m128i xmm9 = _mm_add_epi16( _mm_alignr_epi8( xmm0_next, xmm0, 4 ), xmm2 ); - xmm9 = _mm_sub_epi16( _mm_blend_epi16 ( xmm4, xmm15, 0xAA ), _mm_blend_epi16 ( xmm9, xmm15, 0xAA ) ); - __m128i xmm10 = _mm_add_epi16( _mm_alignr_epi8( xmm1_next, xmm1, 4 ), xmm3 ); - xmm10 = _mm_sub_epi16( _mm_blend_epi16 ( xmm5, xmm15, 0x55 ), _mm_blend_epi16 ( xmm10, xmm15, 0x55 ) ); - - //hor - __m128i xmm13 = _mm_add_epi16( _mm_alignr_epi8( xmm1_next, xmm1, 4 ), xmm1 ); - xmm13 = _mm_sub_epi16( _mm_blend_epi16 ( xmm4, xmm15, 0xAA ), _mm_blend_epi16 ( xmm13, xmm15, 0xAA ) ); - __m128i xmm14 = _mm_add_epi16( _mm_alignr_epi8( xmm2_next, xmm2, 4 ), xmm2 ); - xmm14 = _mm_sub_epi16( _mm_blend_epi16 ( xmm5, xmm15, 0x55 ), _mm_blend_epi16 ( xmm14, xmm15, 0x55 ) ); - - //ver - __m128i xmm11 = _mm_add_epi16( _mm_alignr_epi8( xmm0_next, xmm0, 2 ), _mm_alignr_epi8( xmm2_next, xmm2, 2 ) ); - xmm11 = _mm_sub_epi16( _mm_blend_epi16 ( xmm4, xmm15, 0xAA ), _mm_blend_epi16 ( xmm11, xmm15, 0xAA ) ); - __m128i xmm12 = _mm_add_epi16( _mm_alignr_epi8( xmm1_next, xmm1, 2 ), _mm_alignr_epi8( xmm3_next, xmm3, 2 ) ); - xmm12 = _mm_sub_epi16( _mm_blend_epi16 ( xmm5, xmm15, 0x55 ), _mm_blend_epi16 ( xmm12, xmm15, 0x55 ) ); - - xmm6 = _mm_abs_epi16( xmm6 ); - xmm8 = _mm_abs_epi16( xmm8 ); - xmm9 = _mm_abs_epi16( xmm9 ); - xmm10 = _mm_abs_epi16( xmm10 ); - xmm11 = _mm_abs_epi16( xmm11 ); - xmm12 = _mm_abs_epi16( xmm12 ); - xmm13 = _mm_abs_epi16( xmm13 ); - xmm14 = _mm_abs_epi16( xmm14 ); - - xmm6 = _mm_add_epi16( xmm6, xmm8 ); - xmm9 = _mm_add_epi16( xmm9, xmm10 ); - xmm11 = _mm_add_epi16( xmm11, xmm12 ); - xmm13 = _mm_add_epi16( xmm13, xmm14 ); - - xmm6 = _mm_add_epi16( xmm6, _mm_srli_si128( xmm6, 2 ) ); - xmm9 = _mm_add_epi16( xmm9, _mm_slli_si128( xmm9, 2 ) ); - xmm11 = _mm_add_epi16( xmm11, _mm_srli_si128( xmm11, 2 ) ); - xmm13 = _mm_add_epi16( xmm13, _mm_slli_si128( xmm13, 2 ) ); - - xmm6 = _mm_blend_epi16( xmm6, xmm9, 0xAA ); - xmm11 = _mm_blend_epi16( xmm11, xmm13, 0xAA ); - - xmm6 = _mm_add_epi16( xmm6, _mm_slli_si128( xmm6, 4 ) ); - xmm11 = _mm_add_epi16( xmm11, _mm_srli_si128( xmm11, 4 ) ); - - xmm6 = _mm_blend_epi16( xmm11, xmm6, 0xCC ); - - xmm9 = _mm_srli_si128( xmm6, 8 ); - - if( j > 2 ) - { - _mm_storel_epi64( ( __m128i* )( &( _temp[i >> 1][j - 2 - 4] ) ), _mm_add_epi16( xmm6, mmStore ) ); - } + imgY3 = imgY2; + } + else if (pos > 0 && posInCTU == vbPos) + { + imgY0 = imgY1; + } - xmm6 = _mm_add_epi16( xmm6, xmm9 ); //V H D0 D1 - _mm_storel_epi64( ( __m128i* )( &( _temp[i >> 1][j - 2] ) ), xmm6 ); + __m128i prev = _mm_setzero_si128(); - mmStore = xmm9; + for (int j = 0; j < imgWExtended; j += 8) + { + const __m128i x0 = _mm_loadu_si128((const __m128i *) (imgY0 + j)); + const __m128i x1 = _mm_loadu_si128((const __m128i *) (imgY1 + j)); + const __m128i x2 = _mm_loadu_si128((const __m128i *) (imgY2 + j)); + const __m128i x3 = _mm_loadu_si128((const __m128i *) (imgY3 + j)); + + const __m128i x4 = _mm_loadu_si128((const __m128i *) (imgY0 + j + 2)); + const __m128i x5 = _mm_loadu_si128((const __m128i *) (imgY1 + j + 2)); + const __m128i x6 = _mm_loadu_si128((const __m128i *) (imgY2 + j + 2)); + const __m128i x7 = _mm_loadu_si128((const __m128i *) (imgY3 + j + 2)); + + const __m128i nw = _mm_blend_epi16(x0, x1, 0xaa); + const __m128i n = _mm_blend_epi16(x0, x5, 0x55); + const __m128i ne = _mm_blend_epi16(x4, x5, 0xaa); + const __m128i w = _mm_blend_epi16(x1, x2, 0xaa); + const __m128i e = _mm_blend_epi16(x5, x6, 0xaa); + const __m128i sw = _mm_blend_epi16(x2, x3, 0xaa); + const __m128i s = _mm_blend_epi16(x2, x7, 0x55); + const __m128i se = _mm_blend_epi16(x6, x7, 0xaa); + + __m128i c = _mm_blend_epi16(x1, x6, 0x55); + c = _mm_add_epi16(c, c); + __m128i d = _mm_shuffle_epi8(c, _mm_setr_epi8(2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13)); + + const __m128i ver = _mm_abs_epi16(_mm_sub_epi16(c, _mm_add_epi16(n, s))); + const __m128i hor = _mm_abs_epi16(_mm_sub_epi16(d, _mm_add_epi16(w, e))); + const __m128i di0 = _mm_abs_epi16(_mm_sub_epi16(d, _mm_add_epi16(nw, se))); + const __m128i di1 = _mm_abs_epi16(_mm_sub_epi16(d, _mm_add_epi16(ne, sw))); + + const __m128i hv = _mm_hadd_epi16(ver, hor); + const __m128i di = _mm_hadd_epi16(di0, di1); + const __m128i all = _mm_hadd_epi16(hv, di); + + const __m128i t = _mm_blend_epi16(all, prev, 0xaa); + _mm_storeu_si128((__m128i *) &colSums[i >> 1][j], _mm_hadd_epi16(t, all)); + prev = all; } } - //const int offset = 8 << NO_VALS_LAGR_SHIFT; - - const __m128i mm_0 = _mm_setzero_si128(); - const __m128i mm_15 = _mm_set1_epi64x( 0x000000000000000F ); - const __m128i mm_th = _mm_set1_epi64x( 0x4333333332222210 ); - - const __m128i xmm14 = _mm_set1_epi32( 1 ); //offset - const __m128i xmm13 = _mm_set1_epi32( var_max ); - - for( int i = 0; i < ( blk.height >> 1 ); i += 2 ) + for (int i = 0; i < (blk.height >> 1); i += 4) { - for( int j = 0; j < blk.width; j += 8 ) + for (int j = 0; j < blk.width; j += 8) { - __m128i xmm0 = _mm_loadu_si128( ( __m128i* )( &( _temp[i + 0][j] ) ) ); - __m128i xmm1 = _mm_loadu_si128( ( __m128i* )( &( _temp[i + 1][j] ) ) ); - __m128i xmm2 = _mm_loadu_si128( ( __m128i* )( &( _temp[i + 2][j] ) ) ); - __m128i xmm3 = _mm_loadu_si128( ( __m128i* )( &( _temp[i + 3][j] ) ) ); - - __m128i xmm4 = _mm_add_epi16( xmm0, xmm1 ); - __m128i xmm6 = _mm_add_epi16( xmm2, xmm3 ); - - xmm0 = _mm_unpackhi_epi16( xmm4, mm_0 ); - xmm2 = _mm_unpackhi_epi16( xmm6, mm_0 ); - xmm0 = _mm_add_epi32( xmm0, xmm2 ); - - xmm4 = _mm_unpacklo_epi16( xmm4, mm_0 ); - xmm6 = _mm_unpacklo_epi16( xmm6, mm_0 ); - xmm4 = _mm_add_epi32( xmm4, xmm6 ); - - __m128i xmm12 = _mm_blend_epi16( xmm4, _mm_shuffle_epi32( xmm0, 0x40 ), 0xF0 ); - __m128i xmm10 = _mm_shuffle_epi32( xmm12, 0xB1 ); - xmm12 = _mm_add_epi32( xmm10, xmm12 ); - xmm12 = _mm_srai_epi32( xmm12, shift - 6 ); - xmm12 = _mm_min_epi32( xmm12, xmm13 ); - - xmm12 = _mm_and_si128( xmm12, mm_15 ); - xmm12 = _mm_slli_epi32( xmm12, 2 ); - __m128i xmm11 = _mm_shuffle_epi32( xmm12, 0x0E ); //extracted from second half coz no different shifts are available - xmm12 = _mm_srl_epi64( mm_th, xmm12 ); - xmm11 = _mm_srl_epi64( mm_th, xmm11 ); - xmm12 = _mm_blend_epi16( xmm12, xmm11, 0xF0 ); - xmm12 = _mm_and_si128( xmm12, mm_15 ); // avg_var in lower 4 bits of both halves - - xmm6 = _mm_shuffle_epi32( xmm4, 0xB1 ); - xmm2 = _mm_shuffle_epi32( xmm0, 0xB1 ); - - __m128i xmm7 = _mm_set_epi32( 0, 2, 1, 3 ); - __m128i xmm9 = _mm_shuffle_epi32( xmm7, 0xB1 ); - - __m128i xmm5 = _mm_cmplt_epi32( xmm6, xmm4 ); - __m128i xmm8 = _mm_cmplt_epi32( xmm2, xmm0 ); //2 masks coz 4 integers for every parts are compared - - xmm5 = _mm_shuffle_epi32( xmm5, 0xA0 ); - xmm8 = _mm_shuffle_epi32( xmm8, 0xA0 ); - - xmm4 = _mm_or_si128( _mm_andnot_si128( xmm5, xmm4 ), _mm_and_si128( xmm5, xmm6 ) ); //HV + D - xmm0 = _mm_or_si128( _mm_andnot_si128( xmm8, xmm0 ), _mm_and_si128( xmm8, xmm2 ) ); //HV + D <--second part - - xmm10 = _mm_or_si128( _mm_andnot_si128( xmm8, xmm7 ), _mm_and_si128( xmm8, xmm9 ) ); //dirTemp <-- second part - xmm7 = _mm_or_si128( _mm_andnot_si128( xmm5, xmm7 ), _mm_and_si128( xmm5, xmm9 ) ); //dirTemp - - xmm3 = _mm_shuffle_epi32( xmm0, 0x1B ); // need higher part from this - xmm6 = _mm_shuffle_epi32( xmm4, 0x1B ); - xmm8 = _mm_blend_epi16( xmm4, xmm3, 0xF0 ); // 0 or 3 - xmm6 = _mm_blend_epi16( xmm6, xmm0, 0xF0 ); - - xmm6 = _mm_mullo_epi32( xmm8, xmm6 ); - xmm9 = _mm_shuffle_epi32( xmm6, 0xB1 ); - xmm5 = _mm_cmpgt_epi32( xmm6, xmm9 ); - xmm5 = _mm_shuffle_epi32( xmm5, 0xF0 ); //second mask is for all upper part - - xmm8 = _mm_shuffle_epi32( xmm4, 0x0E ); - xmm8 = _mm_blend_epi16( xmm8, xmm0, 0xF0 ); // (DL, DH in upepr part) - xmm4 = _mm_blend_epi16( xmm4, _mm_shuffle_epi32( xmm0, 0x40 ), 0xF0 ); //(HVL, HVH) in upper part - - xmm7 = _mm_shuffle_epi32( xmm7, 0x08 ); // 2 -> 1 - xmm7 = _mm_blend_epi16( xmm7, _mm_shuffle_epi32( xmm10, 0x80 ), 0xF0 ); - xmm1 = _mm_shuffle_epi32( xmm7, 0xB1 ); // 1 -> 0, 0 -> 1 - - xmm4 = _mm_or_si128( _mm_andnot_si128( xmm5, xmm4 ), _mm_and_si128( xmm5, xmm8 ) ); //HV_D - xmm7 = _mm_or_si128( _mm_andnot_si128( xmm5, xmm7 ), _mm_and_si128( xmm5, xmm1 ) ); //main - secondary (upper halves are for second value) - - //xmm7 not to mix - - xmm0 = _mm_shuffle_epi32( xmm4, 0xFA ); - xmm4 = _mm_shuffle_epi32( xmm4, 0x50 ); //low, low, high, high - xmm6 = _mm_set_epi32( 2, 1, 9, 2 ); - - xmm2 = _mm_mullo_epi32( xmm0, xmm6 ); - xmm6 = _mm_mullo_epi32( xmm4, xmm6 ); - xmm4 = _mm_shuffle_epi32( xmm6, 0x4E ); - xmm0 = _mm_shuffle_epi32( xmm2, 0x4E ); //p to xmm6 - xmm6 = _mm_blend_epi16( xmm6, xmm0, 0xF0 ); - xmm4 = _mm_blend_epi16( xmm4, xmm2, 0xF0 ); - - xmm5 = _mm_cmpgt_epi32( xmm4, xmm6 ); - xmm4 = _mm_and_si128( xmm5, xmm14 ); // 1 + 1 - - xmm8 = _mm_and_si128( xmm7, xmm14 ); - xmm8 = _mm_slli_epi32( xmm8, 1 ); - - xmm5 = _mm_add_epi32( xmm4, _mm_shuffle_epi32( xmm4, 0xB1 ) ); //directionStrength - xmm4 = _mm_cmpgt_epi32( xmm5, mm_0 ); //is a mask now - xmm4 = _mm_and_si128( _mm_add_epi32( xmm8, xmm5 ), xmm4 ); - - xmm4 = _mm_add_epi32( xmm4, _mm_slli_epi32( xmm4, 2 ) ); //x5 - xmm4 = _mm_add_epi32( xmm4, xmm12 ); //+= - - xmm9 = _mm_shuffle_epi32( xmm7, 0xB1 );// <-- - xmm7 = _mm_slli_epi32( xmm7, 1 ); - xmm9 = _mm_srai_epi32( xmm9, 1 ); - xmm7 = _mm_add_epi32( xmm7, xmm9 ); - - //to write to struct - const int t0 = _mm_extract_epi32( xmm7, 0 ); - const int t1 = _mm_extract_epi32( xmm7, 2 ); - const int c0 = _mm_extract_epi32( xmm4, 0 ); - const int c1 = _mm_extract_epi32( xmm4, 2 ); - - const int transposeTable[8] = { 0, 1, 0, 2, 2, 3, 1, 3 }; - int transposeIdx0 = transposeTable[t0]; - int transposeIdx1 = transposeTable[t1]; - int classIdx0 = c0; - int classIdx1 = c1; - - const int yOffset = ( i << 1 ) + posY; - const int xOffset = j + posX; - - AlfClassifier *cl0 = classifier[yOffset] + xOffset; - AlfClassifier *cl1 = classifier[yOffset + 1] + xOffset; - AlfClassifier *cl2 = classifier[yOffset + 2] + xOffset; - AlfClassifier *cl3 = classifier[yOffset + 3] + xOffset; - - AlfClassifier *_cl0 = cl0 + 4; - AlfClassifier *_cl1 = cl1 + 4; - AlfClassifier *_cl2 = cl2 + 4; - AlfClassifier *_cl3 = cl3 + 4; - - cl0[0] = cl0[1] = cl0[2] = cl0[3] = cl1[0] = cl1[1] = cl1[2] = cl1[3] = cl2[0] = cl2[1] = cl2[2] = cl2[3] = cl3[0] = cl3[1] = cl3[2] = cl3[3] = AlfClassifier( classIdx0, transposeIdx0 ); - _cl0[0] = _cl0[1] = _cl0[2] = _cl0[3] = _cl1[0] = _cl1[1] = _cl1[2] = _cl1[3] = _cl2[0] = _cl2[1] = _cl2[2] = _cl2[3] = _cl3[0] = _cl3[1] = _cl3[2] = _cl3[3] = AlfClassifier( classIdx1, transposeIdx1 ); + __m128i x0, x1, x2, x3, x4, x5, x6, x7; + + const uint32_t z = (2 * i + blkDst.pos().y) & (vbCTUHeight - 1); + const uint32_t z2 = (2 * i + 4 + blkDst.pos().y) & (vbCTUHeight - 1); + + x0 = (z == vbPos) ? _mm_setzero_si128() : _mm_loadu_si128((__m128i *) &colSums[i + 0][j + 4]); + x1 = _mm_loadu_si128((__m128i *) &colSums[i + 1][j + 4]); + x2 = _mm_loadu_si128((__m128i *) &colSums[i + 2][j + 4]); + x3 = (z == vbPos - 4) ? _mm_setzero_si128() : _mm_loadu_si128((__m128i *) &colSums[i + 3][j + 4]); + + x4 = (z2 == vbPos) ? _mm_setzero_si128() : _mm_loadu_si128((__m128i *) &colSums[i + 2][j + 4]); + x5 = _mm_loadu_si128((__m128i *) &colSums[i + 3][j + 4]); + x6 = _mm_loadu_si128((__m128i *) &colSums[i + 4][j + 4]); + x7 = (z2 == vbPos - 4) ? _mm_setzero_si128() : _mm_loadu_si128((__m128i *) &colSums[i + 5][j + 4]); + + __m128i x0l = _mm_cvtepu16_epi32(x0); + __m128i x0h = _mm_unpackhi_epi16(x0, _mm_setzero_si128()); + __m128i x1l = _mm_cvtepu16_epi32(x1); + __m128i x1h = _mm_unpackhi_epi16(x1, _mm_setzero_si128()); + __m128i x2l = _mm_cvtepu16_epi32(x2); + __m128i x2h = _mm_unpackhi_epi16(x2, _mm_setzero_si128()); + __m128i x3l = _mm_cvtepu16_epi32(x3); + __m128i x3h = _mm_unpackhi_epi16(x3, _mm_setzero_si128()); + __m128i x4l = _mm_cvtepu16_epi32(x4); + __m128i x4h = _mm_unpackhi_epi16(x4, _mm_setzero_si128()); + __m128i x5l = _mm_cvtepu16_epi32(x5); + __m128i x5h = _mm_unpackhi_epi16(x5, _mm_setzero_si128()); + __m128i x6l = _mm_cvtepu16_epi32(x6); + __m128i x6h = _mm_unpackhi_epi16(x6, _mm_setzero_si128()); + __m128i x7l = _mm_cvtepu16_epi32(x7); + __m128i x7h = _mm_unpackhi_epi16(x7, _mm_setzero_si128()); + + x0l = _mm_add_epi32(x0l, x1l); + x2l = _mm_add_epi32(x2l, x3l); + x4l = _mm_add_epi32(x4l, x5l); + x6l = _mm_add_epi32(x6l, x7l); + x0h = _mm_add_epi32(x0h, x1h); + x2h = _mm_add_epi32(x2h, x3h); + x4h = _mm_add_epi32(x4h, x5h); + x6h = _mm_add_epi32(x6h, x7h); + + x0l = _mm_add_epi32(x0l, x2l); + x4l = _mm_add_epi32(x4l, x6l); + x0h = _mm_add_epi32(x0h, x2h); + x4h = _mm_add_epi32(x4h, x6h); + + x2l = _mm_unpacklo_epi32(x0l, x4l); + x2h = _mm_unpackhi_epi32(x0l, x4l); + x6l = _mm_unpacklo_epi32(x0h, x4h); + x6h = _mm_unpackhi_epi32(x0h, x4h); + + __m128i sumV = _mm_unpacklo_epi32(x2l, x6l); + __m128i sumH = _mm_unpackhi_epi32(x2l, x6l); + __m128i sumD0 = _mm_unpacklo_epi32(x2h, x6h); + __m128i sumD1 = _mm_unpackhi_epi32(x2h, x6h); + + // uint32_t tempAct = sumV + sumH; + __m128i tempAct = _mm_add_epi32(sumV, sumH); + + // const uint32_t activity = std::min<uint32_t>(15, tempAct * scale >> shift); + // static const uint8_t th[16] = { 0, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4 }; + // uint8_t classIdx = th[activity]; + const uint32_t scale = (z == vbPos - 4 || z == vbPos) ? 96 : 64; + const uint32_t scale2 = (z2 == vbPos - 4 || z2 == vbPos) ? 96 : 64; + __m128i activity = _mm_mullo_epi32(tempAct, _mm_unpacklo_epi64(_mm_set1_epi32(scale), _mm_set1_epi32(scale2))); + activity = _mm_srl_epi32(activity, _mm_cvtsi32_si128(shift)); + activity = _mm_min_epi32(activity, _mm_set1_epi32(15)); + __m128i classIdx = _mm_shuffle_epi8(_mm_setr_epi8(0, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4), activity); + + // if (sumV > sumH) + // { + // hv1 = sumV; + // hv0 = sumH; + // dirTempHV = 0; + // } + // else + // { + // hv1 = sumH; + // hv0 = sumV; + // dirTempHV = 1; + // } + __m128i dirTempHVMinus1 = _mm_cmpgt_epi32(sumV, sumH); + __m128i hv1 = _mm_max_epi32(sumV, sumH); + __m128i hv0 = _mm_min_epi32(sumV, sumH); + + // if (sumD0 > sumD1) + // { + // d1 = sumD0; + // d0 = sumD1; + // dirTempD = 0; + // } + // else + // { + // d1 = sumD1; + // d0 = sumD0; + // dirTempD = 1; + // } + __m128i dirTempDMinus1 = _mm_cmpgt_epi32(sumD0, sumD1); + __m128i d1 = _mm_max_epi32(sumD0, sumD1); + __m128i d0 = _mm_min_epi32(sumD0, sumD1); + + // int dirIdx; + // if (d1 * hv0 > hv1 * d0) + // { + // hvd1 = d1; + // hvd0 = d0; + // dirIdx = 0; + // } + // else + // { + // hvd1 = hv1; + // hvd0 = hv0; + // dirIdx = 2; + // } + __m128i a = _mm_xor_si128(_mm_mullo_epi32(d1, hv0), _mm_set1_epi32(0x80000000)); + __m128i b = _mm_xor_si128(_mm_mullo_epi32(hv1, d0), _mm_set1_epi32(0x80000000)); + __m128i dirIdx = _mm_cmpgt_epi32(a, b); + __m128i hvd1 = _mm_blendv_epi8(hv1, d1, dirIdx); + __m128i hvd0 = _mm_blendv_epi8(hv0, d0, dirIdx); + + // if (hvd1 * 2 > 9 * hvd0) + // { + // classIdx += (dirIdx + 2) * 5; + // } + // else if (hvd1 > 2 * hvd0) + // { + // classIdx += (dirIdx + 1) * 5; + // } + __m128i strength1 = _mm_cmpgt_epi32(hvd1, _mm_add_epi32(hvd0, hvd0)); + __m128i strength2 = _mm_cmpgt_epi32(_mm_add_epi32(hvd1, hvd1), _mm_add_epi32(hvd0, _mm_slli_epi32(hvd0, 3))); + __m128i offset = _mm_and_si128(strength1, _mm_set1_epi32(5)); + classIdx = _mm_add_epi32(classIdx, offset); + classIdx = _mm_add_epi32(classIdx, _mm_and_si128(strength2, _mm_set1_epi32(5))); + offset = _mm_andnot_si128(dirIdx, offset); + offset = _mm_add_epi32(offset, offset); + classIdx = _mm_add_epi32(classIdx, offset); + + // uint8_t transposeIdx = 2 * dirTempD + dirTempHV; + __m128i transposeIdx = _mm_set1_epi32(3); + transposeIdx = _mm_add_epi32(transposeIdx, dirTempHVMinus1); + transposeIdx = _mm_add_epi32(transposeIdx, dirTempDMinus1); + transposeIdx = _mm_add_epi32(transposeIdx, dirTempDMinus1); + + int yOffset = 2 * i + blkDst.pos().y; + int xOffset = j + blkDst.pos().x; + + static_assert(sizeof(AlfClassifier) == 2, "ALFClassifier type must be 16 bits wide"); + __m128i v; + v = _mm_unpacklo_epi8(classIdx, transposeIdx); + v = _mm_shuffle_epi8(v, _mm_setr_epi8(0, 1, 0, 1, 0, 1, 0, 1, 8, 9, 8, 9, 8, 9, 8, 9)); + _mm_storeu_si128((__m128i *) (classifier[yOffset] + xOffset), v); + _mm_storeu_si128((__m128i *) (classifier[yOffset + 1] + xOffset), v); + _mm_storeu_si128((__m128i *) (classifier[yOffset + 2] + xOffset), v); + _mm_storeu_si128((__m128i *) (classifier[yOffset + 3] + xOffset), v); + v = _mm_unpackhi_epi8(classIdx, transposeIdx); + v = _mm_shuffle_epi8(v, _mm_setr_epi8(0, 1, 0, 1, 0, 1, 0, 1, 8, 9, 8, 9, 8, 9, 8, 9)); + _mm_storeu_si128((__m128i *) (classifier[yOffset + 4] + xOffset), v); + _mm_storeu_si128((__m128i *) (classifier[yOffset + 5] + xOffset), v); + _mm_storeu_si128((__m128i *) (classifier[yOffset + 6] + xOffset), v); + _mm_storeu_si128((__m128i *) (classifier[yOffset + 7] + xOffset), v); } } } template<X86_VEXT vext> -static void simdFilter5x5Blk( AlfClassifier** classifier, const PelUnitBuf &recDst, const CPelUnitBuf& recSrc, const Area& blk, const ComponentID compId, short* filterSet, const ClpRng& clpRng, CodingStructure& cs ) -{ - static const unsigned char mask05[16] = { 8, 9, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; - static const unsigned char mask03[16] = { 4, 5, 2, 3, 0, 1, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }; - static const unsigned char mask_c[16] = { 0, 1, 8, 9, 4, 5, 14, 15, 2, 3, 10, 11, 12, 13, 6, 7 }; - - const bool bChroma = isChroma( compId ); - - const SPS* sps = cs.slice->getSPS(); - bool isDualTree = CS::isDualITree(cs); - bool isPCMFilterDisabled = sps->getPCMFilterDisableFlag(); - ChromaFormat nChromaFormat = sps->getChromaFormatIdc(); +static void simdFilter5x5Blk(AlfClassifier **classifier, const PelUnitBuf &recDst, const CPelUnitBuf &recSrc, + const Area &blkDst, const Area &blk, const ComponentID compId, const short *filterSet, + const short *fClipSet, const ClpRng &clpRng, CodingStructure &cs, const int vbCTUHeight, + int vbPos) - const CPelBuf srcLuma = recSrc.get( compId ); - PelBuf dstLuma = recDst.get( compId ); - - const int srcStride = srcLuma.stride; - const int dstStride = dstLuma.stride; - - const Pel* srcExt = srcLuma.buf; - Pel* dst = dstLuma.buf; - - const Pel *pImgYPad0, *pImgYPad1, *pImgYPad2, *pImgYPad3, *pImgYPad4, *pImgYPad5; - - short *coef = filterSet; - const Pel *pImg0, *pImg1, *pImg2, *pImg3, *pImg4, *pImg5; - - const int numBitsMinus1 = AdaptiveLoopFilter::m_NUM_BITS - 1; - const int offset = ( 1 << ( AdaptiveLoopFilter::m_NUM_BITS - 2 ) ); - - const int startHeight = blk.y; - const int endHeight = blk.y + blk.height; - const int startWidth = blk.x; - const int endWidth = blk.x + blk.width; +{ + CHECK((vbCTUHeight & (vbCTUHeight - 1)) != 0, "vbCTUHeight must be a power of 2"); + CHECK(!isChroma(compId), "ALF 5x5 filter is for chroma only"); - Pel* imgYRecPost = dst; - imgYRecPost += startHeight * dstStride; - int transposeIdx = 0; + const CPelBuf srcBuffer = recSrc.get(compId); + PelBuf dstBuffer = recDst.get(compId); - const int clsSizeY = 4; - const int clsSizeX = 4; + const size_t srcStride = srcBuffer.stride; + const size_t dstStride = dstBuffer.stride; - bool pcmFlags2x2[4] = {0,0,0,0}; - Pel pcmRec2x2[16]; + constexpr int SHIFT = AdaptiveLoopFilter::m_NUM_BITS - 1; + constexpr int ROUND = 1 << (SHIFT - 1); - CHECK( startHeight % clsSizeY, "Wrong startHeight in filtering" ); - CHECK( startWidth % clsSizeX, "Wrong startWidth in filtering" ); - CHECK( ( endHeight - startHeight ) % clsSizeY, "Wrong endHeight in filtering" ); - CHECK( ( endWidth - startWidth ) % clsSizeX, "Wrong endWidth in filtering" ); + const size_t width = blk.width; + const size_t height = blk.height; - const Pel* imgYRec = srcExt; + constexpr size_t STEP_X = 8; + constexpr size_t STEP_Y = 4; - Pel *pRec; - AlfClassifier *pClass = nullptr; + CHECK(blk.y % STEP_Y, "Wrong startHeight in filtering"); + CHECK(blk.x % STEP_X, "Wrong startWidth in filtering"); + CHECK(height % STEP_Y, "Wrong endHeight in filtering"); + CHECK(width % 4, "Wrong endWidth in filtering"); - int srcStride2 = srcStride * clsSizeY; + const Pel *src = srcBuffer.buf + blk.y * srcStride + blk.x; + Pel * dst = dstBuffer.buf + blkDst.y * dstStride + blkDst.x; - const __m128i mmOffset = _mm_set1_epi32( offset ); - const __m128i mmMin = _mm_set1_epi32( clpRng.min ); - const __m128i mmMax = _mm_set1_epi32( clpRng.max ); - const __m128i xmm10 = _mm_loadu_si128( ( __m128i* )mask03 ); - const __m128i mm_mask05 = _mm_loadu_si128( ( __m128i* )mask05 ); - pImgYPad0 = imgYRec + startHeight * srcStride + startWidth; - pImgYPad1 = pImgYPad0 + srcStride; - pImgYPad2 = pImgYPad0 - srcStride; - pImgYPad3 = pImgYPad1 + srcStride; - pImgYPad4 = pImgYPad2 - srcStride; - pImgYPad5 = pImgYPad3 + srcStride; + const __m128i mmOffset = _mm_set1_epi32(ROUND); + const __m128i mmMin = _mm_set1_epi16( clpRng.min ); + const __m128i mmMax = _mm_set1_epi16( clpRng.max ); - pRec = imgYRecPost + startWidth; + __m128i params[2][3]; + __m128i fs = _mm_loadu_si128((__m128i *) filterSet); + params[0][0] = _mm_shuffle_epi32(fs, 0x00); + params[0][1] = _mm_shuffle_epi32(fs, 0x55); + params[0][2] = _mm_shuffle_epi32(fs, 0xaa); + __m128i fc = _mm_loadu_si128((__m128i *) fClipSet); + params[1][0] = _mm_shuffle_epi32(fc, 0x00); + params[1][1] = _mm_shuffle_epi32(fc, 0x55); + params[1][2] = _mm_shuffle_epi32(fc, 0xaa); - for( int i = 0; i < endHeight - startHeight; i += 4 ) + for (size_t i = 0; i < height; i += STEP_Y) { - pRec = imgYRecPost + startWidth + i * dstStride; - - if( !bChroma ) + for (size_t j = 0; j < width; j += STEP_X) { - pClass = classifier[startHeight + i] + startWidth; - } - for( int j = 0; j < endWidth - startWidth; j += 4 ) - { - if( !bChroma ) + for (size_t ii = 0; ii < STEP_Y; ii++) { - AlfClassifier& cl = pClass[j]; - transposeIdx = cl.transposeIdx; - if( isPCMFilterDisabled && cl.classIdx == AdaptiveLoopFilter::m_ALF_UNUSED_CLASSIDX && transposeIdx == AdaptiveLoopFilter::m_ALF_UNUSED_TRANSPOSIDX ) + const Pel *pImg0, *pImg1, *pImg2, *pImg3, *pImg4; + + pImg0 = src + j + ii * srcStride; + pImg1 = pImg0 + srcStride; + pImg2 = pImg0 - srcStride; + pImg3 = pImg1 + srcStride; + pImg4 = pImg2 - srcStride; + + const int yVb = (blkDst.y + i + ii) & (vbCTUHeight - 1); + if (yVb < vbPos && (yVb >= vbPos - 2)) // above { - pRec += 4; - continue; + pImg1 = (yVb == vbPos - 1) ? pImg0 : pImg1; + pImg3 = (yVb >= vbPos - 2) ? pImg1 : pImg3; + + pImg2 = (yVb == vbPos - 1) ? pImg0 : pImg2; + pImg4 = (yVb >= vbPos - 2) ? pImg2 : pImg4; } - coef = filterSet + cl.classIdx * MAX_NUM_ALF_LUMA_COEFF; - } - else if ( isPCMFilterDisabled ) - { - int blkX, blkY; - bool *flags = pcmFlags2x2; - Pel *pcmRec = pcmRec2x2; + else if (yVb >= vbPos && (yVb <= vbPos + 1)) // bottom + { + pImg2 = (yVb == vbPos) ? pImg0 : pImg2; + pImg4 = (yVb <= vbPos + 1) ? pImg2 : pImg4; - // check which chroma 2x2 blocks use PCM - // chroma PCM may not be aligned with 4x4 ALF processing grid - for( blkY=0; blkY<4; blkY+=2 ) + pImg1 = (yVb == vbPos) ? pImg0 : pImg1; + pImg3 = (yVb <= vbPos + 1) ? pImg1 : pImg3; + } + __m128i cur = _mm_loadu_si128((const __m128i *) pImg0); + __m128i accumA = mmOffset; + __m128i accumB = mmOffset; + + auto process2coeffs = [&](const int i, const Pel *ptr0, const Pel *ptr1, const Pel *ptr2, const Pel *ptr3) { + const __m128i val00 = _mm_sub_epi16(_mm_loadu_si128((const __m128i *) ptr0), cur); + const __m128i val10 = _mm_sub_epi16(_mm_loadu_si128((const __m128i *) ptr2), cur); + const __m128i val01 = _mm_sub_epi16(_mm_loadu_si128((const __m128i *) ptr1), cur); + const __m128i val11 = _mm_sub_epi16(_mm_loadu_si128((const __m128i *) ptr3), cur); + __m128i val01A = _mm_unpacklo_epi16(val00, val10); + __m128i val01B = _mm_unpackhi_epi16(val00, val10); + __m128i val01C = _mm_unpacklo_epi16(val01, val11); + __m128i val01D = _mm_unpackhi_epi16(val01, val11); + + __m128i limit01A = params[1][i]; + + val01A = _mm_min_epi16(val01A, limit01A); + val01B = _mm_min_epi16(val01B, limit01A); + val01C = _mm_min_epi16(val01C, limit01A); + val01D = _mm_min_epi16(val01D, limit01A); + + limit01A = _mm_sub_epi16(_mm_setzero_si128(), limit01A); + + val01A = _mm_max_epi16(val01A, limit01A); + val01B = _mm_max_epi16(val01B, limit01A); + val01C = _mm_max_epi16(val01C, limit01A); + val01D = _mm_max_epi16(val01D, limit01A); + + val01A = _mm_add_epi16(val01A, val01C); + val01B = _mm_add_epi16(val01B, val01D); + + __m128i coeff01A = params[0][i]; + + accumA = _mm_add_epi32(accumA, _mm_madd_epi16(val01A, coeff01A)); + accumB = _mm_add_epi32(accumB, _mm_madd_epi16(val01B, coeff01A)); + }; + + process2coeffs(0, pImg3 + 0, pImg4 + 0, pImg1 + 1, pImg2 - 1); + process2coeffs(1, pImg1 + 0, pImg2 + 0, pImg1 - 1, pImg2 + 1); + process2coeffs(2, pImg0 + 2, pImg0 - 2, pImg0 + 1, pImg0 - 1); +#if JVET_Q0150 + bool isNearVBabove = yVb < vbPos && (yVb >= vbPos - 1); + bool isNearVBbelow = yVb >= vbPos && (yVb <= vbPos); + if (!(isNearVBabove || isNearVBbelow)) { - for( blkX=0; blkX<4; blkX+=2 ) - { - Position pos(j+startWidth+blkX, i+startHeight+blkY); - CodingUnit* cu = isDualTree ? cs.getCU(pos, CH_C) : cs.getCU(recalcPosition(nChromaFormat, CH_C, CH_L, pos), CH_L); - *flags++ = cu->ipcm ? 1 : 0; - - // save original samples from 2x2 PCM blocks - if( cu->ipcm ) - { - *pcmRec++ = pRec[(blkY+0)*dstStride + (blkX+0)]; - *pcmRec++ = pRec[(blkY+0)*dstStride + (blkX+1)]; - *pcmRec++ = pRec[(blkY+1)*dstStride + (blkX+0)]; - *pcmRec++ = pRec[(blkY+1)*dstStride + (blkX+1)]; - } - } + accumA = _mm_srai_epi32(accumA, SHIFT); + accumB = _mm_srai_epi32(accumB, SHIFT); } - - // skip entire 4x4 if all chroma 2x2 blocks use PCM - if( pcmFlags2x2[0] && pcmFlags2x2[1] && pcmFlags2x2[2] && pcmFlags2x2[3] ) + else { - pRec += 4; - continue; + accumA = _mm_srai_epi32(accumA, SHIFT + 3); + accumB = _mm_srai_epi32(accumB, SHIFT + 3); } - } - - __m128i c0, t0 = _mm_setzero_si128(); - - c0 = _mm_loadu_si128( ( __m128i* )( coef + 0 ) ); - c0 = _mm_alignr_epi8( c0, c0, 2 ); - c0 = _mm_blend_epi16( c0, t0, 0x40 ); - - if( transposeIdx & 1 ) - { - c0 = _mm_shuffle_epi8( c0, _mm_loadu_si128( ( __m128i* )mask_c ) ); - } - - if( transposeIdx == 0 || transposeIdx == 1 ) - { - c0 = _mm_shuffle_epi8( c0, xmm10 ); - } - - pImg0 = pImgYPad0 + j; - pImg1 = pImgYPad1 + j; - pImg2 = pImgYPad2 + j; - pImg3 = pImgYPad3 + j; - pImg4 = pImgYPad4 + j; - pImg5 = pImgYPad5 + j; - - for( int k = 0; k < 4; k++ ) - { - __m128i xmm4 = _mm_lddqu_si128( ( __m128i* ) ( pImg4 ) ); - __m128i xmm2 = _mm_lddqu_si128( ( __m128i* ) ( pImg2 - 1 ) ); - __m128i xmm0 = _mm_lddqu_si128( ( __m128i* ) ( pImg0 - 2 ) ); - __m128i xmm1 = _mm_lddqu_si128( ( __m128i* ) ( pImg1 - 1 - 1 ) ); - __m128i xmm3 = _mm_lddqu_si128( ( __m128i* ) ( pImg3 - 0 - 2 ) ); - - __m128i xmm7 = _mm_setzero_si128(); - - __m128i xmm6 = _mm_shuffle_epi8( xmm0, mm_mask05 ); - __m128i xmm8 = _mm_shuffle_epi8( _mm_srli_si128( xmm0, 2 ), mm_mask05 ); - __m128i xmm9 = _mm_shuffle_epi8( _mm_srli_si128( xmm0, 4 ), mm_mask05 ); - __m128i xmm11 = _mm_shuffle_epi8( _mm_srli_si128( xmm0, 6 ), mm_mask05 ); - - xmm6 = _mm_blend_epi16( xmm7, xmm6, 0x03 ); - xmm8 = _mm_blend_epi16( xmm7, xmm8, 0x03 ); - xmm9 = _mm_blend_epi16( xmm7, xmm9, 0x03 ); - xmm11 = _mm_blend_epi16( xmm7, xmm11, 0x03 ); - - xmm6 = _mm_add_epi16( xmm6, xmm0 ); - xmm8 = _mm_add_epi16( xmm8, _mm_srli_si128( xmm0, 2 ) ); - xmm9 = _mm_add_epi16( xmm9, _mm_srli_si128( xmm0, 4 ) ); - xmm11 = _mm_add_epi16( xmm11, _mm_srli_si128( xmm0, 6 ) ); - - xmm6 = _mm_slli_si128( xmm6, 6 ); - xmm8 = _mm_slli_si128( xmm8, 6 ); - xmm9 = _mm_slli_si128( xmm9, 6 ); - xmm11 = _mm_slli_si128( xmm11, 6 ); - - xmm4 = _mm_add_epi16( xmm4, _mm_srli_si128( xmm3, 4 ) ); - xmm6 = _mm_blend_epi16( xmm6, _mm_slli_si128( xmm4, 14 ), 0x80 ); - xmm8 = _mm_blend_epi16( xmm8, _mm_slli_si128( xmm4, 12 ), 0x80 ); - xmm9 = _mm_blend_epi16( xmm9, _mm_slli_si128( xmm4, 10 ), 0x80 ); - xmm11 = _mm_blend_epi16( xmm11, _mm_slli_si128( xmm4, 8 ), 0x80 ); - - __m128i xmm12 = _mm_shuffle_epi8( xmm2, xmm10 ); - __m128i xmm13 = _mm_shuffle_epi8( _mm_srli_si128( xmm2, 2 ), xmm10 ); - __m128i xmm14 = _mm_shuffle_epi8( _mm_srli_si128( xmm2, 4 ), xmm10 ); - __m128i xmm15 = _mm_shuffle_epi8( _mm_srli_si128( xmm2, 6 ), xmm10 ); - - xmm12 = _mm_add_epi16( xmm12, _mm_srli_si128( xmm1, 2 ) ); - xmm13 = _mm_add_epi16( xmm13, _mm_srli_si128( xmm1, 4 ) ); - xmm14 = _mm_add_epi16( xmm14, _mm_srli_si128( xmm1, 6 ) ); - xmm15 = _mm_add_epi16( xmm15, _mm_srli_si128( xmm1, 8 ) ); - - xmm6 = _mm_blend_epi16( xmm6, xmm12, 0x07 ); - xmm8 = _mm_blend_epi16( xmm8, xmm13, 0x07 ); - xmm9 = _mm_blend_epi16( xmm9, xmm14, 0x07 ); - xmm11 = _mm_blend_epi16( xmm11, xmm15, 0x07 ); - - xmm6 = _mm_madd_epi16( xmm6, c0 ); - xmm8 = _mm_madd_epi16( xmm8, c0 ); - xmm9 = _mm_madd_epi16( xmm9, c0 ); - xmm11 = _mm_madd_epi16( xmm11, c0 ); - - xmm12 = _mm_shuffle_epi32( xmm6, 0x1B ); - xmm13 = _mm_shuffle_epi32( xmm8, 0x1B ); - xmm14 = _mm_shuffle_epi32( xmm9, 0x1B ); - xmm15 = _mm_shuffle_epi32( xmm11, 0x1B ); - - xmm6 = _mm_add_epi32( xmm6, xmm12 ); - xmm8 = _mm_add_epi32( xmm8, xmm13 ); - xmm9 = _mm_add_epi32( xmm9, xmm14 ); - xmm11 = _mm_add_epi32( xmm11, xmm15 ); - - xmm6 = _mm_blend_epi16( xmm6, xmm8, 0xF0 ); - xmm9 = _mm_blend_epi16( xmm9, xmm11, 0xF0 ); - - xmm12 = _mm_hadd_epi32( xmm6, xmm9 ); - - xmm12 = _mm_add_epi32( xmm12, mmOffset ); - xmm12 = _mm_srai_epi32( xmm12, numBitsMinus1 ); - - xmm12 = _mm_min_epi32( mmMax, _mm_max_epi32( xmm12, mmMin ) ); - - xmm12 = _mm_packus_epi32( xmm12, xmm12 ); - - _mm_storel_epi64( ( __m128i* )( pRec ), xmm12 ); - - pRec += dstStride; - - pImg0 += srcStride; - pImg1 += srcStride; - pImg2 += srcStride; - pImg3 += srcStride; - pImg4 += srcStride; - pImg5 += srcStride; - - } //<-- end of k-loop - - pRec -= ( 4 * dstStride ); +#else + accumA = _mm_srai_epi32(accumA, SHIFT); + accumB = _mm_srai_epi32(accumB, SHIFT); +#endif + accumA = _mm_packs_epi32(accumA, accumB); + accumA = _mm_add_epi16(accumA, cur); + accumA = _mm_min_epi16(mmMax, _mm_max_epi16(accumA, mmMin)); - // restore 2x2 PCM chroma blocks - if( bChroma && isPCMFilterDisabled ) - { - int blkX, blkY; - bool *flags = pcmFlags2x2; - Pel *pcmRec = pcmRec2x2; - for( blkY=0; blkY<4; blkY+=2 ) + if (j + STEP_X <= width) + { + _mm_storeu_si128((__m128i *) (dst + ii * dstStride + j), accumA); + } + else { - for( blkX=0; blkX<4; blkX+=2 ) - { - if( *flags++ ) - { - pRec[(blkY+0)*dstStride + (blkX+0)] = *pcmRec++; - pRec[(blkY+0)*dstStride + (blkX+1)] = *pcmRec++; - pRec[(blkY+1)*dstStride + (blkX+0)] = *pcmRec++; - pRec[(blkY+1)*dstStride + (blkX+1)] = *pcmRec++; - } - } + _mm_storel_epi64((__m128i *) (dst + ii * dstStride + j), accumA); } } - pRec += 4; } - pRec += 4 * dstStride; - - pImgYPad0 += srcStride2; - pImgYPad1 += srcStride2; - pImgYPad2 += srcStride2; - pImgYPad3 += srcStride2; - pImgYPad4 += srcStride2; - pImgYPad5 += srcStride2; + src += srcStride * STEP_Y; + dst += dstStride * STEP_Y; } } -template<X86_VEXT vext> -static void simdFilter7x7Blk( AlfClassifier** classifier, const PelUnitBuf &recDst, const CPelUnitBuf& recSrc, const Area& blk, const ComponentID compId, short* filterSet, const ClpRng& clpRng, CodingStructure& cs ) +constexpr uint16_t sh(int x) { - static const unsigned char mask0[16] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 6, 7, 4, 5, 2, 3 }; - static const unsigned char mask00[16] = { 2, 3, 0, 1, 0, 0, 0, 0, 8, 9, 0, 0, 0, 0, 0, 1 }; - static const unsigned char mask02[16] = { 0, 0, 0, 0, 2, 3, 10, 11, 0, 0, 10, 11, 2, 3, 0, 0 }; - static const unsigned char mask20[16] = { 0, 0, 4, 5, 0, 0, 0, 0, 0, 0, 6, 7, 0, 0, 0, 0 }; - static const unsigned char mask22[16] = { 14, 15, 0, 0, 6, 7, 4, 5, 12, 13, 0, 0, 8, 9, 0, 1 }; - static const unsigned char mask35[16] = { 4, 5, 2, 3, 0, 1, 14, 15, 12, 13, 10, 11, 8, 9, 6, 7 }; - - const bool bChroma = isChroma( compId ); + return 0x0202 * (x & 7) + 0x0100 + 0x1010 * (x & 8); +} - if( bChroma ) +static const uint16_t shuffleTab[4][2][8] = { { - CHECK( 0, "Chroma doesn't support 7x7" ); - } - const SPS* sps = cs.slice->getSPS(); - bool isDualTree = CS::isDualITree(cs); - bool isPCMFilterDisabled = sps->getPCMFilterDisableFlag(); - ChromaFormat nChromaFormat = sps->getChromaFormatIdc(); - const CPelBuf srcLuma = recSrc.get( compId ); - PelBuf dstLuma = recDst.get( compId ); - - const int srcStride = srcLuma.stride; - const int dstStride = dstLuma.stride; - - const Pel* srcExt = srcLuma.buf; - Pel* dst = dstLuma.buf; - - const Pel *pImgYPad0, *pImgYPad1, *pImgYPad2, *pImgYPad3, *pImgYPad4, *pImgYPad5, *pImgYPad6; - - short *coef = filterSet; - const Pel *pImg0, *pImg1, *pImg2, *pImg3, *pImg4; - const Pel *pImg5, *pImg6; - - const int numBitsMinus1 = AdaptiveLoopFilter::m_NUM_BITS - 1; - const int offset = ( 1 << ( AdaptiveLoopFilter::m_NUM_BITS - 2 ) ); - - const int startHeight = blk.y; - const int endHeight = blk.y + blk.height; - const int startWidth = blk.x; - const int endWidth = blk.x + blk.width; - - Pel* imgYRecPost = dst; - imgYRecPost += startHeight * dstStride; + { sh(0), sh(1), sh(2), sh(3), sh(4), sh(5), sh(6), sh(7) }, + { sh(8), sh(9), sh(10), sh(11), sh(12), sh(13), sh(14), sh(15) }, + }, + { + { sh(9), sh(4), sh(10), sh(8), sh(1), sh(5), sh(11), sh(7) }, + { sh(3), sh(0), sh(2), sh(6), sh(12), sh(13), sh(14), sh(15) }, + }, + { + { sh(0), sh(3), sh(2), sh(1), sh(8), sh(7), sh(6), sh(5) }, + { sh(4), sh(9), sh(10), sh(11), sh(12), sh(13), sh(14), sh(15) }, + }, + { + { sh(9), sh(8), sh(10), sh(4), sh(3), sh(7), sh(11), sh(5) }, + { sh(1), sh(0), sh(2), sh(6), sh(12), sh(13), sh(14), sh(15) }, + }, +}; - int transposeIdx = 0; +template<X86_VEXT vext> +static void simdFilter7x7Blk(AlfClassifier **classifier, const PelUnitBuf &recDst, const CPelUnitBuf &recSrc, + const Area &blkDst, const Area &blk, const ComponentID compId, const short *filterSet, + const short *fClipSet, const ClpRng &clpRng, CodingStructure &cs, const int vbCTUHeight, + int vbPos) +{ + CHECK((vbCTUHeight & (vbCTUHeight - 1)) != 0, "vbCTUHeight must be a power of 2"); + CHECK(isChroma(compId), "7x7 ALF filter is meant for luma only"); - const int clsSizeY = 4; - const int clsSizeX = 4; - bool pcmFlags2x2[4] = {0,0,0,0}; - Pel pcmRec2x2[16]; + const CPelBuf srcBuffer = recSrc.get(compId); + PelBuf dstBuffer = recDst.get(compId); - CHECK( startHeight % clsSizeY, "Wrong startHeight in filtering" ); - CHECK( startWidth % clsSizeX, "Wrong startWidth in filtering" ); - CHECK( ( endHeight - startHeight ) % clsSizeY, "Wrong endHeight in filtering" ); - CHECK( ( endWidth - startWidth ) % clsSizeX, "Wrong endWidth in filtering" ); + const size_t srcStride = srcBuffer.stride; + const size_t dstStride = dstBuffer.stride; - const Pel* imgYRec = srcExt; + constexpr int SHIFT = AdaptiveLoopFilter::m_NUM_BITS - 1; + constexpr int ROUND = 1 << (SHIFT - 1); - Pel *pRec; - AlfClassifier *pClass = nullptr; + const size_t width = blk.width; + const size_t height = blk.height; - int dstStride2 = dstStride * clsSizeY; - int srcStride2 = srcStride * clsSizeY; + constexpr size_t STEP_X = 8; + constexpr size_t STEP_Y = 4; - const __m128i mmOffset = _mm_set1_epi32( offset ); - const __m128i mmMin = _mm_set1_epi32( clpRng.min ); - const __m128i mmMax = _mm_set1_epi32( clpRng.max ); + CHECK(blk.y % STEP_Y, "Wrong startHeight in filtering"); + CHECK(blk.x % STEP_X, "Wrong startWidth in filtering"); + CHECK(height % STEP_Y, "Wrong endHeight in filtering"); + CHECK(width % STEP_X, "Wrong endWidth in filtering"); - const __m128i xmm10 = _mm_loadu_si128( ( __m128i* )mask35 ); + const Pel *src = srcBuffer.buf + blk.y * srcStride + blk.x; + Pel * dst = dstBuffer.buf + blkDst.y * dstStride + blkDst.x; - pImgYPad0 = imgYRec + startHeight * srcStride + startWidth; - pImgYPad1 = pImgYPad0 + srcStride; - pImgYPad2 = pImgYPad0 - srcStride; - pImgYPad3 = pImgYPad1 + srcStride; - pImgYPad4 = pImgYPad2 - srcStride; - pImgYPad5 = pImgYPad3 + srcStride; - pImgYPad6 = pImgYPad4 - srcStride; + const __m128i mmOffset = _mm_set1_epi32(ROUND); + const __m128i mmMin = _mm_set1_epi16( clpRng.min ); + const __m128i mmMax = _mm_set1_epi16( clpRng.max ); - pRec = imgYRecPost + startWidth; - for( int i = 0; i < endHeight - startHeight; i += 4 ) + for (size_t i = 0; i < height; i += STEP_Y) { - pRec = imgYRecPost + startWidth + i * dstStride; + const AlfClassifier *pClass = classifier[blkDst.y + i] + blkDst.x; - if( !bChroma ) + for (size_t j = 0; j < width; j += STEP_X) { - pClass = classifier[startHeight + i] + startWidth; - } + __m128i params[2][2][6]; - for( int j = 0; j < endWidth - startWidth; j += 4 ) - { - if( !bChroma ) + for (int k = 0; k < 2; ++k) { - AlfClassifier& cl = pClass[j]; - transposeIdx = cl.transposeIdx; - if ( isPCMFilterDisabled && cl.classIdx == AdaptiveLoopFilter::m_ALF_UNUSED_CLASSIDX && transposeIdx == AdaptiveLoopFilter::m_ALF_UNUSED_TRANSPOSIDX ) - { - pRec += 4; - continue; - } - coef = filterSet + cl.classIdx * MAX_NUM_ALF_LUMA_COEFF; + const AlfClassifier &cl = pClass[j + 4 * k]; + + const int transposeIdx = cl.transposeIdx; + const int classIdx = cl.classIdx; + + static_assert(sizeof(*filterSet) == 2, "ALF coeffs must be 16-bit wide"); + static_assert(sizeof(*fClipSet) == 2, "ALF clip values must be 16-bit wide"); + + __m128i rawCoeff0, rawCoeff1; + __m128i rawClip0, rawClip1; + + rawCoeff0 = _mm_loadu_si128((const __m128i *) (filterSet + classIdx * MAX_NUM_ALF_LUMA_COEFF)); + rawCoeff1 = _mm_loadl_epi64((const __m128i *) (filterSet + classIdx * MAX_NUM_ALF_LUMA_COEFF + 8)); + + rawClip0 = _mm_loadu_si128((const __m128i *) (fClipSet + classIdx * MAX_NUM_ALF_LUMA_COEFF)); + rawClip1 = _mm_loadl_epi64((const __m128i *) (fClipSet + classIdx * MAX_NUM_ALF_LUMA_COEFF + 8)); + + const __m128i s0 = _mm_loadu_si128((const __m128i *) shuffleTab[transposeIdx][0]); + const __m128i s1 = _mm_xor_si128(s0, _mm_set1_epi8((char) 0x80)); + const __m128i s2 = _mm_loadu_si128((const __m128i *) shuffleTab[transposeIdx][1]); + const __m128i s3 = _mm_xor_si128(s2, _mm_set1_epi8((char) 0x80)); + + const __m128i rawCoeffLo = _mm_or_si128(_mm_shuffle_epi8(rawCoeff0, s0), _mm_shuffle_epi8(rawCoeff1, s1)); + const __m128i rawCoeffHi = _mm_or_si128(_mm_shuffle_epi8(rawCoeff0, s2), _mm_shuffle_epi8(rawCoeff1, s3)); + const __m128i rawClipLo = _mm_or_si128(_mm_shuffle_epi8(rawClip0, s0), _mm_shuffle_epi8(rawClip1, s1)); + const __m128i rawClipHi = _mm_or_si128(_mm_shuffle_epi8(rawClip0, s2), _mm_shuffle_epi8(rawClip1, s3)); + + params[k][0][0] = _mm_shuffle_epi32(rawCoeffLo, 0x00); + params[k][0][1] = _mm_shuffle_epi32(rawCoeffLo, 0x55); + params[k][0][2] = _mm_shuffle_epi32(rawCoeffLo, 0xaa); + params[k][0][3] = _mm_shuffle_epi32(rawCoeffLo, 0xff); + params[k][0][4] = _mm_shuffle_epi32(rawCoeffHi, 0x00); + params[k][0][5] = _mm_shuffle_epi32(rawCoeffHi, 0x55); + params[k][1][0] = _mm_shuffle_epi32(rawClipLo, 0x00); + params[k][1][1] = _mm_shuffle_epi32(rawClipLo, 0x55); + params[k][1][2] = _mm_shuffle_epi32(rawClipLo, 0xaa); + params[k][1][3] = _mm_shuffle_epi32(rawClipLo, 0xff); + params[k][1][4] = _mm_shuffle_epi32(rawClipHi, 0x00); + params[k][1][5] = _mm_shuffle_epi32(rawClipHi, 0x55); } - else if ( isPCMFilterDisabled ) - { - int blkX, blkY; - bool *flags = pcmFlags2x2; - Pel *pcmRec = pcmRec2x2; - // check which chroma 2x2 blocks use PCM - // chroma PCM may not be aligned with 4x4 ALF processing grid - for( blkY=0; blkY<4; blkY+=2 ) + for (size_t ii = 0; ii < STEP_Y; ii++) + { + const Pel *pImg0, *pImg1, *pImg2, *pImg3, *pImg4, *pImg5, *pImg6; + + pImg0 = src + j + ii * srcStride; + pImg1 = pImg0 + srcStride; + pImg2 = pImg0 - srcStride; + pImg3 = pImg1 + srcStride; + pImg4 = pImg2 - srcStride; + pImg5 = pImg3 + srcStride; + pImg6 = pImg4 - srcStride; + + const int yVb = (blkDst.y + i + ii) & (vbCTUHeight - 1); + if (yVb < vbPos && (yVb >= vbPos - 4)) // above { - for( blkX=0; blkX<4; blkX+=2 ) - { - Position pos(j+startWidth+blkX, i+startHeight+blkY); - CodingUnit* cu = isDualTree ? cs.getCU(pos, CH_C) : cs.getCU(recalcPosition(nChromaFormat, CH_C, CH_L, pos), CH_L); - *flags++ = cu->ipcm ? 1 : 0; - - // save original samples from 2x2 PCM blocks - if( cu->ipcm ) - { - *pcmRec++ = pRec[(blkY+0)*dstStride + (blkX+0)]; - *pcmRec++ = pRec[(blkY+0)*dstStride + (blkX+1)]; - *pcmRec++ = pRec[(blkY+1)*dstStride + (blkX+0)]; - *pcmRec++ = pRec[(blkY+1)*dstStride + (blkX+1)]; - } - } - } + pImg1 = (yVb == vbPos - 1) ? pImg0 : pImg1; + pImg3 = (yVb >= vbPos - 2) ? pImg1 : pImg3; + pImg5 = (yVb >= vbPos - 3) ? pImg3 : pImg5; - // skip entire 4x4 if all chroma 2x2 blocks use PCM - if( pcmFlags2x2[0] && pcmFlags2x2[1] && pcmFlags2x2[2] && pcmFlags2x2[3] ) + pImg2 = (yVb == vbPos - 1) ? pImg0 : pImg2; + pImg4 = (yVb >= vbPos - 2) ? pImg2 : pImg4; + pImg6 = (yVb >= vbPos - 3) ? pImg4 : pImg6; + } + else if (yVb >= vbPos && (yVb <= vbPos + 3)) // bottom { - pRec += 4; - continue; + pImg2 = (yVb == vbPos) ? pImg0 : pImg2; + pImg4 = (yVb <= vbPos + 1) ? pImg2 : pImg4; + pImg6 = (yVb <= vbPos + 2) ? pImg4 : pImg6; + + pImg1 = (yVb == vbPos) ? pImg0 : pImg1; + pImg3 = (yVb <= vbPos + 1) ? pImg1 : pImg3; + pImg5 = (yVb <= vbPos + 2) ? pImg3 : pImg5; } - } + __m128i cur = _mm_loadu_si128((const __m128i *) pImg0); - __m128i c0, c2, t1, t2; + __m128i accumA = mmOffset; + __m128i accumB = mmOffset; - t1 = _mm_loadu_si128( ( __m128i* )( coef + 0 ) ); - t2 = _mm_loadu_si128( ( __m128i* )( coef + 1 ) ); - c2 = _mm_loadu_si128( ( __m128i* )( coef + 4 - 3 ) ); - c0 = _mm_loadu_si128( ( __m128i* )( coef + 9 - 1 ) ); + auto process2coeffs = [&](const int i, const Pel *ptr0, const Pel *ptr1, const Pel *ptr2, const Pel *ptr3) { + const __m128i val00 = _mm_sub_epi16(_mm_loadu_si128((const __m128i *) ptr0), cur); + const __m128i val10 = _mm_sub_epi16(_mm_loadu_si128((const __m128i *) ptr2), cur); + const __m128i val01 = _mm_sub_epi16(_mm_loadu_si128((const __m128i *) ptr1), cur); + const __m128i val11 = _mm_sub_epi16(_mm_loadu_si128((const __m128i *) ptr3), cur); - c0 = _mm_blend_epi16( c0, t1, 0x01 ); - c2 = _mm_blend_epi16( c2, t2, 0x07 ); + __m128i val01A = _mm_unpacklo_epi16(val00, val10); + __m128i val01B = _mm_unpackhi_epi16(val00, val10); + __m128i val01C = _mm_unpacklo_epi16(val01, val11); + __m128i val01D = _mm_unpackhi_epi16(val01, val11); - if( transposeIdx & 1 ) - { - t1 = _mm_loadu_si128( ( __m128i* )mask00 ); - t2 = _mm_loadu_si128( ( __m128i* )mask02 ); - __m128i t3 = _mm_loadu_si128( ( __m128i* )mask20 ); - __m128i t4 = _mm_loadu_si128( ( __m128i* )mask22 ); - - t1 = _mm_shuffle_epi8( c0, t1 ); - t2 = _mm_shuffle_epi8( c2, t2 ); - t3 = _mm_shuffle_epi8( c0, t3 ); - t4 = _mm_shuffle_epi8( c2, t4 ); - - c0 = _mm_blend_epi16( t1, t2, 0x6C ); - c2 = _mm_blend_epi16( t4, t3, 0x22 ); - } - else - { - c0 = _mm_shuffle_epi8( c0, _mm_loadu_si128( ( __m128i* )mask0 ) ); - } + __m128i limit01A = params[0][1][i]; + __m128i limit01B = params[1][1][i]; - if( transposeIdx == 0 || transposeIdx == 3 ) - { - c2 = _mm_shuffle_epi8( c2, xmm10 ); - } + val01A = _mm_min_epi16(val01A, limit01A); + val01B = _mm_min_epi16(val01B, limit01B); + val01C = _mm_min_epi16(val01C, limit01A); + val01D = _mm_min_epi16(val01D, limit01B); - pImg0 = pImgYPad0 + j; - pImg1 = pImgYPad1 + j; - pImg2 = pImgYPad2 + j; - pImg3 = pImgYPad3 + j; - pImg4 = pImgYPad4 + j; - pImg5 = pImgYPad5 + j; - pImg6 = pImgYPad6 + j; + limit01A = _mm_sub_epi16(_mm_setzero_si128(), limit01A); + limit01B = _mm_sub_epi16(_mm_setzero_si128(), limit01B); - for( int k = 0; k < 4; k++ ) - { - __m128i xmm6 = _mm_lddqu_si128( ( __m128i* ) pImg6 ); - __m128i xmm4 = _mm_lddqu_si128( ( __m128i* ) ( pImg4 - 1 ) ); - __m128i xmm2 = _mm_lddqu_si128( ( __m128i* ) ( pImg2 - 2 ) ); - __m128i xmm0 = _mm_lddqu_si128( ( __m128i* ) ( pImg0 - 3 ) ); - __m128i xmm11 = _mm_lddqu_si128( ( __m128i* ) ( pImg0 + 5 ) ); - __m128i xmm1 = _mm_lddqu_si128( ( __m128i* ) ( pImg1 - 2 - 1 ) ); - __m128i xmm8 = _mm_lddqu_si128( ( __m128i* ) ( pImg1 + 5 ) ); - __m128i xmm3 = _mm_lddqu_si128( ( __m128i* ) ( pImg3 - 2 ) ); - __m128i xmm5 = _mm_lddqu_si128( ( __m128i* ) ( pImg5 - 1 ) ); - - xmm6 = _mm_add_epi16( xmm6, _mm_srli_si128( xmm5, 2 ) ); - - __m128i xmm12 = _mm_blend_epi16( _mm_slli_si128( xmm0, 2 ), xmm6, 0x01 ); - __m128i xmm13 = _mm_blend_epi16( xmm0, _mm_srli_si128( xmm6, 2 ), 0x01 ); - - __m128i xmm14 = _mm_blend_epi16( _mm_slli_si128( xmm2, 6 ), xmm4, 0x07 ); - __m128i xmm16 = _mm_blend_epi16( _mm_slli_si128( xmm1, 4 ), _mm_srli_si128( xmm3, 2 ), 0x07 ); - xmm14 = _mm_shuffle_epi8( xmm14, xmm10 ); - xmm14 = _mm_add_epi16( xmm14, xmm16 ); - __m128i xmm15 = _mm_blend_epi16( _mm_slli_si128( xmm2, 4 ), _mm_srli_si128( xmm4, 2 ), 0x07 ); - __m128i xmm17 = _mm_blend_epi16( _mm_slli_si128( xmm1, 2 ), _mm_srli_si128( xmm3, 4 ), 0x07 ); - xmm15 = _mm_shuffle_epi8( xmm15, xmm10 ); - xmm15 = _mm_add_epi16( xmm15, xmm17 ); - - xmm12 = _mm_madd_epi16( xmm12, c0 ); - xmm13 = _mm_madd_epi16( xmm13, c0 ); - xmm14 = _mm_madd_epi16( xmm14, c2 ); - xmm15 = _mm_madd_epi16( xmm15, c2 ); - - xmm12 = _mm_add_epi32( xmm12, xmm14 ); - xmm13 = _mm_add_epi32( xmm13, xmm15 ); - xmm14 = _mm_shuffle_epi32( xmm12, 0x1B ); - xmm15 = _mm_shuffle_epi32( xmm13, 0x1B ); - xmm12 = _mm_add_epi32( xmm12, xmm14 ); - xmm13 = _mm_add_epi32( xmm13, xmm15 ); - - __m128i xmm7 = _mm_blend_epi16( xmm12, xmm13, 0xF0 ); - - xmm12 = _mm_blend_epi16( _mm_alignr_epi8( xmm11, xmm0, 2 ), _mm_srli_si128( xmm6, 4 ), 0x01 ); - xmm13 = _mm_blend_epi16( _mm_alignr_epi8( xmm11, xmm0, 4 ), _mm_srli_si128( xmm6, 6 ), 0x01 ); - - xmm14 = _mm_blend_epi16( _mm_slli_si128( xmm2, 2 ), _mm_srli_si128( xmm4, 4 ), 0x07 ); - xmm16 = _mm_blend_epi16( xmm1, _mm_srli_si128( xmm3, 6 ), 0x07 ); - xmm14 = _mm_shuffle_epi8( xmm14, xmm10 ); - xmm14 = _mm_add_epi16( xmm14, xmm16 ); - xmm15 = _mm_blend_epi16( xmm2, _mm_srli_si128( xmm4, 6 ), 0x07 ); - xmm8 = _mm_alignr_epi8( xmm8, xmm1, 2 ); - xmm17 = _mm_blend_epi16( xmm8, _mm_srli_si128( xmm3, 8 ), 0x07 ); - xmm15 = _mm_shuffle_epi8( xmm15, xmm10 ); - xmm15 = _mm_add_epi16( xmm15, xmm17 ); - - xmm12 = _mm_madd_epi16( xmm12, c0 ); - xmm13 = _mm_madd_epi16( xmm13, c0 ); - xmm14 = _mm_madd_epi16( xmm14, c2 ); - xmm15 = _mm_madd_epi16( xmm15, c2 ); - - xmm12 = _mm_add_epi32( xmm12, xmm14 ); - xmm13 = _mm_add_epi32( xmm13, xmm15 ); - xmm14 = _mm_shuffle_epi32( xmm12, 0x1B ); - xmm15 = _mm_shuffle_epi32( xmm13, 0x1B ); - xmm12 = _mm_add_epi32( xmm12, xmm14 ); - xmm13 = _mm_add_epi32( xmm13, xmm15 ); - - __m128i xmm9 = _mm_blend_epi16( xmm12, xmm13, 0xF0 ); - - xmm12 = _mm_hadd_epi32( xmm7, xmm9 ); - - xmm12 = _mm_add_epi32( xmm12, mmOffset ); - xmm12 = _mm_srai_epi32( xmm12, numBitsMinus1 ); - - xmm12 = _mm_min_epi32( mmMax, _mm_max_epi32( xmm12, mmMin ) ); - - xmm12 = _mm_packus_epi32( xmm12, xmm12 ); - - _mm_storel_epi64( ( __m128i* )( pRec ), xmm12 ); - - pRec += dstStride; - - pImg0 += srcStride; - pImg1 += srcStride; - pImg2 += srcStride; - pImg3 += srcStride; - pImg4 += srcStride; - pImg5 += srcStride; - pImg6 += srcStride; - } + val01A = _mm_max_epi16(val01A, limit01A); + val01B = _mm_max_epi16(val01B, limit01B); + val01C = _mm_max_epi16(val01C, limit01A); + val01D = _mm_max_epi16(val01D, limit01B); - pRec -= ( 4 * dstStride ); + val01A = _mm_add_epi16(val01A, val01C); + val01B = _mm_add_epi16(val01B, val01D); - // restore 2x2 PCM chroma blocks - if( bChroma && isPCMFilterDisabled ) - { - int blkX, blkY; - bool *flags = pcmFlags2x2; - Pel *pcmRec = pcmRec2x2; - for( blkY=0; blkY<4; blkY+=2 ) + const __m128i coeff01A = params[0][0][i]; + const __m128i coeff01B = params[1][0][i]; + + accumA = _mm_add_epi32(accumA, _mm_madd_epi16(val01A, coeff01A)); + accumB = _mm_add_epi32(accumB, _mm_madd_epi16(val01B, coeff01B)); + }; + + + process2coeffs(0, pImg5 + 0, pImg6 + 0, pImg3 + 1, pImg4 - 1); + process2coeffs(1, pImg3 + 0, pImg4 + 0, pImg3 - 1, pImg4 + 1); + process2coeffs(2, pImg1 + 2, pImg2 - 2, pImg1 + 1, pImg2 - 1); + process2coeffs(3, pImg1 + 0, pImg2 + 0, pImg1 - 1, pImg2 + 1); + process2coeffs(4, pImg1 - 2, pImg2 + 2, pImg0 + 3, pImg0 - 3); + process2coeffs(5, pImg0 + 2, pImg0 - 2, pImg0 + 1, pImg0 - 1); + + +#if JVET_Q0150 + bool isNearVBabove = yVb < vbPos && (yVb >= vbPos - 1); + bool isNearVBbelow = yVb >= vbPos && (yVb <= vbPos); + if (!(isNearVBabove || isNearVBbelow)) { - for( blkX=0; blkX<4; blkX+=2 ) - { - if( *flags++ ) - { - pRec[(blkY+0)*dstStride + (blkX+0)] = *pcmRec++; - pRec[(blkY+0)*dstStride + (blkX+1)] = *pcmRec++; - pRec[(blkY+1)*dstStride + (blkX+0)] = *pcmRec++; - pRec[(blkY+1)*dstStride + (blkX+1)] = *pcmRec++; - } - } + accumA = _mm_srai_epi32(accumA, SHIFT); + accumB = _mm_srai_epi32(accumB, SHIFT); } - } + else + { + accumA = _mm_srai_epi32(accumA, SHIFT + 3); + accumB = _mm_srai_epi32(accumB, SHIFT + 3); + } +#else + accumA = _mm_srai_epi32(accumA, SHIFT); + accumB = _mm_srai_epi32(accumB, SHIFT); +#endif + accumA = _mm_packs_epi32(accumA, accumB); + accumA = _mm_add_epi16(accumA, cur); + accumA = _mm_min_epi16(mmMax, _mm_max_epi16(accumA, mmMin)); - pRec += 4; + _mm_storeu_si128((__m128i *) (dst + ii * dstStride + j), accumA); + } } - pRec += dstStride2; - - pImgYPad0 += srcStride2; - pImgYPad1 += srcStride2; - pImgYPad2 += srcStride2; - pImgYPad3 += srcStride2; - pImgYPad4 += srcStride2; - pImgYPad5 += srcStride2; - pImgYPad6 += srcStride2; + src += srcStride * STEP_Y; + dst += dstStride * STEP_Y; } } @@ -918,5 +690,4 @@ void AdaptiveLoopFilter::_initAdaptiveLoopFilterX86() } template void AdaptiveLoopFilter::_initAdaptiveLoopFilterX86<SIMDX86>(); -#endif //#ifdef TARGET_SIMD_X86 -//! \} +#endif // TARGET_SIMD_X86 diff --git a/source/Lib/CommonLib/x86/AffineGradientSearchX86.h b/source/Lib/CommonLib/x86/AffineGradientSearchX86.h index b49d703b9113dabc67a7c9d0637da9035a8cb3f7..bc8676e258c6090ce4577931d3aa1ac4e451c729 100644 --- a/source/Lib/CommonLib/x86/AffineGradientSearchX86.h +++ b/source/Lib/CommonLib/x86/AffineGradientSearchX86.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/source/Lib/CommonLib/x86/BufferX86.h b/source/Lib/CommonLib/x86/BufferX86.h index 00399014c56ec3d9e68b860ab55618e549f06238..c763a2977ca6fb745ea905d289bed80fba69787b 100644 --- a/source/Lib/CommonLib/x86/BufferX86.h +++ b/source/Lib/CommonLib/x86/BufferX86.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -43,7 +43,7 @@ #include "CommonDefX86.h" #include "CommonLib/Unit.h" #include "CommonLib/Buffer.h" - +#include "CommonLib/InterpolationFilter.h" #if ENABLE_SIMD_OPT_BUFFER #ifdef TARGET_SIMD_X86 @@ -53,44 +53,33 @@ void addAvg_SSE( const int16_t* src0, int src0Stride, const int16_t* src1, int s { if( W == 8 ) { - // TODO: AVX2 impl - { - __m128i vzero = _mm_setzero_si128(); - __m128i voffset = _mm_set1_epi32( offset ); - __m128i vibdimin = _mm_set1_epi16( clpRng.min ); - __m128i vibdimax = _mm_set1_epi16( clpRng.max ); + CHECK(offset & 1, "offset must be even"); + CHECK(offset < -32768 || offset > 32767, "offset must be a 16-bit value"); - for( int row = 0; row < height; row++ ) - { - for( int col = 0; col < width; col += 8 ) - { - __m128i vsrc0 = _mm_loadu_si128( ( const __m128i * )&src0[col] ); - __m128i vsrc1 = _mm_loadu_si128( ( const __m128i * )&src1[col] ); + __m128i vibdimin = _mm_set1_epi16(clpRng.min); + __m128i vibdimax = _mm_set1_epi16(clpRng.max); - __m128i vtmp, vsum, vdst; - vsum = _mm_cvtepi16_epi32 ( vsrc0 ); - vdst = _mm_cvtepi16_epi32 ( vsrc1 ); - vsum = _mm_add_epi32 ( vsum, vdst ); - vsum = _mm_add_epi32 ( vsum, voffset ); - vtmp = _mm_srai_epi32 ( vsum, shift ); - - vsrc0 = _mm_unpackhi_epi64 ( vsrc0, vzero ); - vsrc1 = _mm_unpackhi_epi64 ( vsrc1, vzero ); - vsum = _mm_cvtepi16_epi32 ( vsrc0 ); - vdst = _mm_cvtepi16_epi32 ( vsrc1 ); - vsum = _mm_add_epi32 ( vsum, vdst ); - vsum = _mm_add_epi32 ( vsum, voffset ); - vsum = _mm_srai_epi32 ( vsum, shift ); - vsum = _mm_packs_epi32 ( vtmp, vsum ); - - vsum = _mm_min_epi16( vibdimax, _mm_max_epi16( vibdimin, vsum ) ); - _mm_storeu_si128( ( __m128i * )&dst[col], vsum ); - } - - src0 += src0Stride; - src1 += src1Stride; - dst += dstStride; + for (int row = 0; row < height; row++) + { + for (int col = 0; col < width; col += 8) + { + __m128i vsrc0 = _mm_loadu_si128((const __m128i *) &src0[col]); + __m128i vsrc1 = _mm_loadu_si128((const __m128i *) &src1[col]); + + vsrc0 = _mm_xor_si128(vsrc0, _mm_set1_epi16(0x7fff)); + vsrc1 = _mm_xor_si128(vsrc1, _mm_set1_epi16(0x7fff)); + vsrc0 = _mm_avg_epu16(vsrc0, vsrc1); + vsrc0 = _mm_xor_si128(vsrc0, _mm_set1_epi16(0x7fff)); + vsrc0 = _mm_adds_epi16(vsrc0, _mm_set1_epi16(offset >> 1)); + vsrc0 = _mm_sra_epi16(vsrc0, _mm_cvtsi32_si128(shift - 1)); + vsrc0 = _mm_max_epi16(vsrc0, vibdimin); + vsrc0 = _mm_min_epi16(vsrc0, vibdimax); + _mm_storeu_si128((__m128i *) &dst[col], vsrc0); } + + src0 += src0Stride; + src1 += src1Stride; + dst += dstStride; } } else if( W == 4 ) @@ -131,126 +120,118 @@ void addAvg_SSE( const int16_t* src0, int src0Stride, const int16_t* src1, int s template<X86_VEXT vext> void copyBufferSimd(Pel *src, int srcStride, Pel *dst, int dstStride, int width, int height) { - __m128i x; -#ifdef USE_AVX2 - __m256i x16; -#endif - int j, temp; - for (int i = 0; i < height; i++) + if (width < 8) { - j = 0; - temp = width; -#ifdef USE_AVX2 - while ((temp >> 4) > 0) - { - x16 = _mm256_loadu_si256((const __m256i*)(&src[i * srcStride + j])); - _mm256_storeu_si256((__m256i*)(&dst[i * dstStride + j]), x16); - j += 16; - temp -= 16; - } -#endif - while ((temp >> 3) > 0) - { - x = _mm_loadu_si128((const __m128i*)(&src[ i * srcStride + j])); - _mm_storeu_si128((__m128i*)(&dst[ i * dstStride + j]), x); - j += 8; - temp -= 8; - } - while ((temp >> 2) > 0) + CHECK(width < 4, "width must be at least 4"); + + for (size_t x = 0; x < width; x += 4) { - x = _mm_loadl_epi64((const __m128i*)(&src[i * srcStride + j])); - _mm_storel_epi64((__m128i*)(&dst[i*dstStride + j]), x); - j += 4; - temp -= 4; + if (x > width - 4) + x = width - 4; + for (size_t y = 0; y < height; y++) + { + __m128i val = _mm_loadl_epi64((const __m128i *) (src + y * srcStride + x)); + _mm_storel_epi64((__m128i *) (dst + y * dstStride + x), val); + } } - while (temp > 0) + } + else + { + for (size_t x = 0; x < width; x += 8) { - dst[i * dstStride + j] = src[i * srcStride + j]; - j++; - temp--; + if (x > width - 8) + x = width - 8; + for (size_t y = 0; y < height; y++) + { + __m128i val = _mm_loadu_si128((const __m128i *) (src + y * srcStride + x)); + _mm_storeu_si128((__m128i *) (dst + y * dstStride + x), val); + } } } } - template<X86_VEXT vext> void paddingSimd(Pel *dst, int stride, int width, int height, int padSize) { - __m128i x; -#ifdef USE_AVX2 - __m256i x16; -#endif - int temp, j; - for (int i = 1; i <= padSize; i++) + size_t extWidth = width + 2 * padSize; + CHECK(extWidth < 8, "width plus 2 times padding size must be at least 8"); + + if (padSize == 1) { - j = 0; - temp = width; -#ifdef USE_AVX2 - while ((temp >> 4) > 0) + for (size_t i = 0; i < height; i++) { + Pel left = dst[i * stride]; + Pel right = dst[i * stride + width - 1]; + dst[i * stride - 1] = left; + dst[i * stride + width] = right; + } - x16 = _mm256_loadu_si256((const __m256i*)(&(dst[j]))); - _mm256_storeu_si256((__m256i*)(dst + j - i*stride), x16); - x16 = _mm256_loadu_si256((const __m256i*)(dst + j + (height - 1)*stride)); - _mm256_storeu_si256((__m256i*)(dst + j + (height - 1 + i)*stride), x16); - + dst -= 1; - j = j + 16; - temp = temp - 16; - } -#endif - while ((temp >> 3) > 0) + for (size_t i = 0; i < extWidth - 8; i++) { + __m128i top = _mm_loadu_si128((const __m128i *) (dst + i)); + _mm_storeu_si128((__m128i *) (dst - stride + i), top); + } + __m128i top = _mm_loadu_si128((const __m128i *) (dst + extWidth - 8)); + _mm_storeu_si128((__m128i *) (dst - stride + extWidth - 8), top); - x = _mm_loadu_si128((const __m128i*)(&(dst[j]))); - _mm_storeu_si128((__m128i*)(dst + j - i*stride), x); - x = _mm_loadu_si128((const __m128i*)(dst + j + (height - 1)*stride)); - _mm_storeu_si128((__m128i*)(dst + j + (height - 1 + i)*stride), x); + dst += height * stride; - j = j + 8; - temp = temp - 8; - } - while ((temp >> 2) > 0) + for (size_t i = 0; i < extWidth - 8; i++) { - x = _mm_loadl_epi64((const __m128i*)(&dst[j])); - _mm_storel_epi64((__m128i*)(dst + j - i*stride), x); - x = _mm_loadl_epi64((const __m128i*)(dst + j + (height - 1)*stride)); - _mm_storel_epi64((__m128i*)(dst + j + (height - 1 + i)*stride), x); - - j = j + 4; - temp = temp - 4; + __m128i bottom = _mm_loadu_si128((const __m128i *) (dst - stride + i)); + _mm_storeu_si128((__m128i *) (dst + i), bottom); } - while (temp > 0) + __m128i bottom = _mm_loadu_si128((const __m128i *) (dst - stride + extWidth - 8)); + _mm_storeu_si128((__m128i *) (dst + extWidth - 8), bottom); + } + else if (padSize == 2) + { + for (size_t i = 0; i < height; i++) { - dst[j - i*stride] = dst[j]; - dst[j + (height - 1 + i)*stride] = dst[j + (height - 1)*stride]; - j++; - temp--; + Pel left = dst[i * stride]; + Pel right = dst[i * stride + width - 1]; + dst[i * stride - 2] = left; + dst[i * stride - 1] = left; + dst[i * stride + width] = right; + dst[i * stride + width + 1] = right; } - } + dst -= 2; - //Left and Right Padding - Pel* ptr1 = dst - padSize*stride; - Pel* ptr2 = dst - padSize*stride + width - 1; - int offset = 0; - for (int i = 0; i < height + 2 * padSize; i++) - { - offset = stride * i; - for (int j = 1; j <= padSize; j++) + for (size_t i = 0; i < extWidth - 8; i++) { - *(ptr1 - j + offset) = *(ptr1 + offset); - *(ptr2 + j + offset) = *(ptr2 + offset); + __m128i top = _mm_loadu_si128((const __m128i *) (dst + i)); + _mm_storeu_si128((__m128i *) (dst - 2 * stride + i), top); + _mm_storeu_si128((__m128i *) (dst - stride + i), top); } + __m128i top = _mm_loadu_si128((const __m128i *) (dst + extWidth - 8)); + _mm_storeu_si128((__m128i *) (dst - 2 * stride + extWidth - 8), top); + _mm_storeu_si128((__m128i *) (dst - stride + extWidth - 8), top); + + dst += height * stride; + for (size_t i = 0; i < extWidth - 8; i++) + { + __m128i bottom = _mm_loadu_si128((const __m128i *) (dst - stride + i)); + _mm_storeu_si128((__m128i *) (dst + i), bottom); + _mm_storeu_si128((__m128i *) (dst + stride + i), bottom); + } + __m128i bottom = _mm_loadu_si128((const __m128i *) (dst - stride + extWidth - 8)); + _mm_storeu_si128((__m128i *) (dst + extWidth - 8), bottom); + _mm_storeu_si128((__m128i *) (dst + stride + extWidth - 8), bottom); + } + else + { + CHECK(false, "padding size must be 1 or 2"); } } + template< X86_VEXT vext > void addBIOAvg4_SSE(const Pel* src0, int src0Stride, const Pel* src1, int src1Stride, Pel *dst, int dstStride, const Pel *gradX0, const Pel *gradX1, const Pel *gradY0, const Pel*gradY1, int gradStride, int width, int height, int tmpx, int tmpy, int shift, int offset, const ClpRng& clpRng) { - __m128i mm_tmpx = _mm_unpacklo_epi64(_mm_set1_epi16(tmpx), _mm_set1_epi16(tmpy)); - __m128i mm_boffset = _mm_set1_epi32(1); - __m128i mm_offset = _mm_set1_epi32(offset); + __m128i c = _mm_unpacklo_epi16(_mm_set1_epi16(tmpx), _mm_set1_epi16(tmpy)); __m128i vibdimin = _mm_set1_epi16(clpRng.min); __m128i vibdimax = _mm_set1_epi16(clpRng.max); @@ -258,20 +239,22 @@ void addBIOAvg4_SSE(const Pel* src0, int src0Stride, const Pel* src1, int src1St { for (int x = 0; x < width; x += 4) { - __m128i mm_a = _mm_unpacklo_epi64(_mm_loadl_epi64((const __m128i *)(gradX0 + x)), _mm_loadl_epi64((const __m128i *)(gradY0 + x))); - __m128i mm_b = _mm_unpacklo_epi64(_mm_loadl_epi64((const __m128i *)(gradX1 + x)), _mm_loadl_epi64((const __m128i *)(gradY1 + x))); - mm_a = _mm_sub_epi16(mm_a, mm_b); - mm_b = _mm_mulhi_epi16(mm_a, mm_tmpx); - mm_a = _mm_mullo_epi16(mm_a, mm_tmpx); - - __m128i mm_sum = _mm_add_epi32(_mm_unpacklo_epi16(mm_a, mm_b), _mm_unpackhi_epi16(mm_a, mm_b)); - mm_sum = _mm_srai_epi32(_mm_add_epi32(mm_sum, mm_boffset), 1); - mm_a = _mm_cvtepi16_epi32(_mm_loadl_epi64((const __m128i *)(src0 + x))); - mm_b = _mm_cvtepi16_epi32(_mm_loadl_epi64((const __m128i *)(src1 + x))); - mm_sum = _mm_add_epi32(_mm_add_epi32(mm_sum, mm_a), _mm_add_epi32(mm_b, mm_offset)); - mm_sum = _mm_packs_epi32(_mm_srai_epi32(mm_sum, shift), mm_a); - mm_sum = _mm_min_epi16(vibdimax, _mm_max_epi16(vibdimin, mm_sum)); - _mm_storel_epi64((__m128i *)(dst + x), mm_sum); + __m128i a = _mm_unpacklo_epi16(_mm_loadl_epi64((const __m128i *) (gradX0 + x)), + _mm_loadl_epi64((const __m128i *) (gradY0 + x))); + __m128i b = _mm_unpacklo_epi16(_mm_loadl_epi64((const __m128i *) (gradX1 + x)), + _mm_loadl_epi64((const __m128i *) (gradY1 + x))); + a = _mm_sub_epi16(a, b); + __m128i sum = _mm_madd_epi16(a, c); + + a = _mm_unpacklo_epi16(_mm_loadl_epi64((const __m128i *) (src0 + x)), + _mm_loadl_epi64((const __m128i *) (src1 + x))); + sum = _mm_add_epi32(sum, _mm_madd_epi16(a, _mm_set1_epi16(1))); + sum = _mm_add_epi32(sum, _mm_set1_epi32(offset)); + sum = _mm_sra_epi32(sum, _mm_cvtsi32_si128(shift)); + sum = _mm_packs_epi32(sum, sum); + sum = _mm_max_epi16(sum, vibdimin); + sum = _mm_min_epi16(sum, vibdimax); + _mm_storel_epi64((__m128i *) (dst + x), sum); } dst += dstStride; src0 += src0Stride; src1 += src1Stride; gradX0 += gradStride; gradX1 += gradStride; gradY0 += gradStride; gradY1 += gradStride; @@ -279,44 +262,343 @@ void addBIOAvg4_SSE(const Pel* src0, int src0Stride, const Pel* src1, int src1St } template< X86_VEXT vext > +void calcBIOSums_SSE(const Pel* srcY0Tmp, const Pel* srcY1Tmp, Pel* gradX0, Pel* gradX1, Pel* gradY0, Pel* gradY1, int xu, int yu, const int src0Stride, const int src1Stride, const int widthG, const int bitDepth, int* sumAbsGX, int* sumAbsGY, int* sumDIX, int* sumDIY, int* sumSignGY_GX) + +{ + int shift4 = 4; + int shift5 = 1; + + __m128i sumAbsGXTmp = _mm_setzero_si128(); + __m128i sumDIXTmp = _mm_setzero_si128(); + __m128i sumAbsGYTmp = _mm_setzero_si128(); + __m128i sumDIYTmp = _mm_setzero_si128(); + __m128i sumSignGyGxTmp = _mm_setzero_si128(); + + for (int y = 0; y < 6; y++) + { + // Note: loading 8 values also works, but valgrind doesn't like it + auto load6values = [](const Pel *ptr) { + __m128i a = _mm_loadl_epi64((const __m128i *) ptr); + __m128i b = _mm_cvtsi32_si128(*(uint32_t *) (ptr + 4)); + return _mm_unpacklo_epi64(a, b); + }; + + __m128i shiftSrcY0Tmp = _mm_srai_epi16(load6values(srcY0Tmp), shift4); + __m128i shiftSrcY1Tmp = _mm_srai_epi16(load6values(srcY1Tmp), shift4); + __m128i loadGradX0 = load6values(gradX0); + __m128i loadGradX1 = load6values(gradX1); + __m128i loadGradY0 = load6values(gradY0); + __m128i loadGradY1 = load6values(gradY1); + + __m128i subTemp1 = _mm_sub_epi16(shiftSrcY1Tmp, shiftSrcY0Tmp); + __m128i packTempX = _mm_srai_epi16(_mm_add_epi16(loadGradX0, loadGradX1), shift5); + __m128i packTempY = _mm_srai_epi16(_mm_add_epi16(loadGradY0, loadGradY1), shift5); + __m128i gX = _mm_abs_epi16(packTempX); + __m128i gY = _mm_abs_epi16(packTempY); + __m128i dIX = _mm_sign_epi16(subTemp1, packTempX ); + __m128i dIY = _mm_sign_epi16(subTemp1, packTempY ); + __m128i signGY_GX = _mm_sign_epi16(packTempX, packTempY ); + + sumAbsGXTmp = _mm_add_epi16(sumAbsGXTmp, gX); + sumDIXTmp = _mm_add_epi16(sumDIXTmp, dIX); + sumAbsGYTmp = _mm_add_epi16(sumAbsGYTmp, gY); + sumDIYTmp = _mm_add_epi16(sumDIYTmp, dIY); + sumSignGyGxTmp = _mm_add_epi16(sumSignGyGxTmp, signGY_GX); + srcY0Tmp += src0Stride; + srcY1Tmp += src1Stride; + gradX0 += widthG; + gradX1 += widthG; + gradY0 += widthG; + gradY1 += widthG; + } + + sumAbsGXTmp = _mm_madd_epi16(sumAbsGXTmp, _mm_setr_epi16(1, 1, 1, 1, 1, 1, 0, 0)); + sumDIXTmp = _mm_madd_epi16(sumDIXTmp, _mm_setr_epi16(1, 1, 1, 1, 1, 1, 0, 0)); + sumAbsGYTmp = _mm_madd_epi16(sumAbsGYTmp, _mm_setr_epi16(1, 1, 1, 1, 1, 1, 0, 0)); + sumDIYTmp = _mm_madd_epi16(sumDIYTmp, _mm_setr_epi16(1, 1, 1, 1, 1, 1, 0, 0)); + sumSignGyGxTmp = _mm_madd_epi16(sumSignGyGxTmp, _mm_setr_epi16(1, 1, 1, 1, 1, 1, 0, 0)); + + __m128i a12 = _mm_unpacklo_epi32(sumAbsGXTmp, sumAbsGYTmp); + __m128i a3 = _mm_unpackhi_epi32(sumAbsGXTmp, sumAbsGYTmp); + __m128i b12 = _mm_unpacklo_epi32(sumDIXTmp, sumDIYTmp); + __m128i b3 = _mm_unpackhi_epi32(sumDIXTmp, sumDIYTmp); + __m128i c1 = _mm_unpacklo_epi64(a12, b12); + __m128i c2 = _mm_unpackhi_epi64(a12, b12); + __m128i c3 = _mm_unpacklo_epi64(a3, b3); + + c1 = _mm_add_epi32(c1, c2); + c1 = _mm_add_epi32(c1, c3); + + *sumAbsGX = _mm_cvtsi128_si32(c1); + *sumAbsGY = _mm_cvtsi128_si32(_mm_shuffle_epi32(c1, 0x55)); + *sumDIX = _mm_cvtsi128_si32(_mm_shuffle_epi32(c1, 0xaa)); + *sumDIY = _mm_cvtsi128_si32(_mm_shuffle_epi32(c1, 0xff)); + + sumSignGyGxTmp = _mm_add_epi32(sumSignGyGxTmp, _mm_shuffle_epi32(sumSignGyGxTmp, 0x4e)); // 01001110 + sumSignGyGxTmp = _mm_add_epi32(sumSignGyGxTmp, _mm_shuffle_epi32(sumSignGyGxTmp, 0xb1)); // 10110001 + *sumSignGY_GX = _mm_cvtsi128_si32(sumSignGyGxTmp); +} + +template< X86_VEXT vext > +void applyPROF_SSE(Pel* dstPel, int dstStride, const Pel* srcPel, int srcStride, int width, int height, const Pel* gradX, const Pel* gradY, int gradStride, const int* dMvX, const int* dMvY, int dMvStride, const bool& bi, int shiftNum, Pel offset, const ClpRng& clpRng) +{ + CHECKD((width & 3), "block width error!"); + + const int dILimit = 1 << std::max<int>(clpRng.bd + 1, 13); + +#ifdef USE_AVX2 + __m256i mm_dmvx, mm_dmvy, mm_gradx, mm_grady, mm_dI, mm_dI0, mm_src; + __m256i mm_offset = _mm256_set1_epi16(offset); + __m256i vibdimin = _mm256_set1_epi16(clpRng.min); + __m256i vibdimax = _mm256_set1_epi16(clpRng.max); + __m256i mm_dimin = _mm256_set1_epi32(-dILimit); + __m256i mm_dimax = _mm256_set1_epi32(dILimit - 1); +#else + __m128i mm_dmvx, mm_dmvy, mm_gradx, mm_grady, mm_dI, mm_dI0; + __m128i mm_offset = _mm_set1_epi16(offset); + __m128i vibdimin = _mm_set1_epi16(clpRng.min); + __m128i vibdimax = _mm_set1_epi16(clpRng.max); + __m128i mm_dimin = _mm_set1_epi32(-dILimit); + __m128i mm_dimax = _mm_set1_epi32(dILimit - 1); +#endif + +#if USE_AVX2 + for (int h = 0; h < height; h += 4) +#else + for (int h = 0; h < height; h += 2) +#endif + { + const int* vX = dMvX; + const int* vY = dMvY; + const Pel* gX = gradX; + const Pel* gY = gradY; + const Pel* src = srcPel; + Pel* dst = dstPel; + + for (int w = 0; w < width; w += 4) + { +#if USE_AVX2 + const int *vX0 = vX, *vY0 = vY; + const Pel *gX0 = gX, *gY0 = gY; + + // first two rows + mm_dmvx = _mm256_inserti128_si256(_mm256_castsi128_si256(_mm_loadu_si128((const __m128i *)vX0)), _mm_loadu_si128((const __m128i *)(vX0 + dMvStride)), 1); + mm_dmvy = _mm256_inserti128_si256(_mm256_castsi128_si256(_mm_loadu_si128((const __m128i *)vY0)), _mm_loadu_si128((const __m128i *)(vY0 + dMvStride)), 1); + mm_gradx = _mm256_inserti128_si256( + _mm256_castsi128_si256(_mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)gX0))), + _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)(gX0 + gradStride))), 1); + mm_grady = _mm256_inserti128_si256( + _mm256_castsi128_si256(_mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)gY0))), + _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)(gY0 + gradStride))), 1); + mm_dI0 = _mm256_add_epi32(_mm256_mullo_epi32(mm_dmvx, mm_gradx), _mm256_mullo_epi32(mm_dmvy, mm_grady)); + mm_dI0 = _mm256_min_epi32(mm_dimax, _mm256_max_epi32(mm_dimin, mm_dI0)); + + // next two rows + vX0 += (dMvStride << 1); vY0 += (dMvStride << 1); gX0 += (gradStride << 1); gY0 += (gradStride << 1); + mm_dmvx = _mm256_inserti128_si256(_mm256_castsi128_si256(_mm_loadu_si128((const __m128i *)vX0)), _mm_loadu_si128((const __m128i *)(vX0 + dMvStride)), 1); + mm_dmvy = _mm256_inserti128_si256(_mm256_castsi128_si256(_mm_loadu_si128((const __m128i *)vY0)), _mm_loadu_si128((const __m128i *)(vY0 + dMvStride)), 1); + mm_gradx = _mm256_inserti128_si256( + _mm256_castsi128_si256(_mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)gX0))), + _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)(gX0 + gradStride))), 1); + mm_grady = _mm256_inserti128_si256( + _mm256_castsi128_si256(_mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)gY0))), + _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)(gY0 + gradStride))), 1); + mm_dI = _mm256_add_epi32(_mm256_mullo_epi32(mm_dmvx, mm_gradx), _mm256_mullo_epi32(mm_dmvy, mm_grady)); + mm_dI = _mm256_min_epi32(mm_dimax, _mm256_max_epi32(mm_dimin, mm_dI)); + + // combine four rows + mm_dI = _mm256_packs_epi32(mm_dI0, mm_dI); + const Pel* src0 = src + srcStride; + mm_src = _mm256_inserti128_si256( + _mm256_castsi128_si256(_mm_unpacklo_epi64(_mm_loadl_epi64((const __m128i *)src), _mm_loadl_epi64((const __m128i *)(src + (srcStride << 1))))), + _mm_unpacklo_epi64(_mm_loadl_epi64((const __m128i *)src0), _mm_loadl_epi64((const __m128i *)(src0 + (srcStride << 1)))), + 1 + ); + mm_dI = _mm256_add_epi16(mm_dI, mm_src); + if (!bi) + { + mm_dI = _mm256_srai_epi16(_mm256_adds_epi16(mm_dI, mm_offset), shiftNum); + mm_dI = _mm256_min_epi16(vibdimax, _mm256_max_epi16(vibdimin, mm_dI)); + } + + // store final results + __m128i dITmp = _mm256_extractf128_si256(mm_dI, 1); + Pel* dst0 = dst; + _mm_storel_epi64((__m128i *)dst0, _mm256_castsi256_si128(mm_dI)); + dst0 += dstStride; _mm_storel_epi64((__m128i *)dst0, dITmp); + dst0 += dstStride; _mm_storel_epi64((__m128i *)dst0, _mm_unpackhi_epi64(_mm256_castsi256_si128(mm_dI), _mm256_castsi256_si128(mm_dI))); + dst0 += dstStride; _mm_storel_epi64((__m128i *)dst0, _mm_unpackhi_epi64(dITmp, dITmp)); +#else + // first row + mm_dmvx = _mm_loadu_si128((const __m128i *)vX); + mm_dmvy = _mm_loadu_si128((const __m128i *)vY); + mm_gradx = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)gX)); + mm_grady = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)gY)); + mm_dI0 = _mm_add_epi32(_mm_mullo_epi32(mm_dmvx, mm_gradx), _mm_mullo_epi32(mm_dmvy, mm_grady)); + mm_dI0 = _mm_min_epi32(mm_dimax, _mm_max_epi32(mm_dimin, mm_dI0)); + + // second row + mm_dmvx = _mm_loadu_si128((const __m128i *)(vX + dMvStride)); + mm_dmvy = _mm_loadu_si128((const __m128i *)(vY + dMvStride)); + mm_gradx = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)(gX + gradStride))); + mm_grady = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)(gY + gradStride))); + mm_dI = _mm_add_epi32(_mm_mullo_epi32(mm_dmvx, mm_gradx), _mm_mullo_epi32(mm_dmvy, mm_grady)); + mm_dI = _mm_min_epi32(mm_dimax, _mm_max_epi32(mm_dimin, mm_dI)); + + // combine both rows + mm_dI = _mm_packs_epi32(mm_dI0, mm_dI); + mm_dI = _mm_add_epi16(_mm_unpacklo_epi64(_mm_loadl_epi64((const __m128i *)src), _mm_loadl_epi64((const __m128i *)(src + srcStride))), mm_dI); + if (!bi) + { + mm_dI = _mm_srai_epi16(_mm_adds_epi16(mm_dI, mm_offset), shiftNum); + mm_dI = _mm_min_epi16(vibdimax, _mm_max_epi16(vibdimin, mm_dI)); + } + + _mm_storel_epi64((__m128i *)dst, mm_dI); + _mm_storel_epi64((__m128i *)(dst + dstStride), _mm_unpackhi_epi64(mm_dI, mm_dI)); +#endif + vX += 4; vY += 4; gX += 4; gY += 4; src += 4; dst += 4; + } + +#if USE_AVX2 + dMvX += (dMvStride << 2); + dMvY += (dMvStride << 2); + gradX += (gradStride << 2); + gradY += (gradStride << 2); + srcPel += (srcStride << 2); + dstPel += (dstStride << 2); +#else + dMvX += (dMvStride << 1); + dMvY += (dMvStride << 1); + gradX += (gradStride << 1); + gradY += (gradStride << 1); + srcPel += (srcStride << 1); + dstPel += (dstStride << 1); +#endif + } +} + + +template< X86_VEXT vext > +void roundIntVector_SIMD(int* v, int size, unsigned int nShift, const int dmvLimit) +{ + CHECKD(size % 16 != 0, "Size must be multiple of 16!"); +#ifdef USE_AVX512 + if (vext >= AVX512 && size >= 16) + { + __m512i dMvMin = _mm256_set1_epi32(-dmvLimit); + __m512i dMvMax = _mm256_set1_epi32( dmvLimit ); + __m512i nOffset = _mm512_set1_epi32((1 << (nShift - 1))); + __m512i vones = _mm512_set1_epi32(1); + __m512i vzero = _mm512_setzero_si512(); + for (int i = 0; i < size; i += 16, v += 16) + { + __m512i src = _mm512_loadu_si512(v); + __mmask16 mask = _mm512_cmpge_epi32_mask(src, vzero); + src = __mm512_add_epi32(src, nOffset); + __mm512i dst = _mm512_srai_epi32(_mm512_mask_sub_epi32(src, mask, src, vones), nShift); + dst = _mm512_min_epi32(dMvMax, _mm512_max_epi32(dMvMin, dst)); + _mm512_storeu_si512(v, dst); + } + } + else +#endif +#ifdef USE_AVX2 + if (vext >= AVX2 && size >= 8) + { + __m256i dMvMin = _mm256_set1_epi32(-dmvLimit); + __m256i dMvMax = _mm256_set1_epi32( dmvLimit ); + __m256i nOffset = _mm256_set1_epi32(1 << (nShift - 1)); + __m256i vzero = _mm256_setzero_si256(); + for (int i = 0; i < size; i += 8, v += 8) + { + __m256i src = _mm256_lddqu_si256((__m256i*)v); + __m256i of = _mm256_cmpgt_epi32(src, vzero); + __m256i dst = _mm256_srai_epi32(_mm256_add_epi32(_mm256_add_epi32(src, nOffset), of), nShift); + dst = _mm256_min_epi32(dMvMax, _mm256_max_epi32(dMvMin, dst)); + _mm256_storeu_si256((__m256i*)v, dst); + } + } + else +#endif + { + __m128i dMvMin = _mm_set1_epi32(-dmvLimit); + __m128i dMvMax = _mm_set1_epi32( dmvLimit ); + __m128i nOffset = _mm_set1_epi32((1 << (nShift - 1))); + __m128i vzero = _mm_setzero_si128(); + for (int i = 0; i < size; i += 4, v += 4) + { + __m128i src = _mm_loadu_si128((__m128i*)v); + __m128i of = _mm_cmpgt_epi32(src, vzero); + __m128i dst = _mm_srai_epi32(_mm_add_epi32(_mm_add_epi32(src, nOffset), of), nShift); + dst = _mm_min_epi32(dMvMax, _mm_max_epi32(dMvMin, dst)); + _mm_storeu_si128((__m128i*)v, dst); + } + } +} + +template< X86_VEXT vext, bool PAD = true> void gradFilter_SSE(Pel* src, int srcStride, int width, int height, int gradStride, Pel* gradX, Pel* gradY, const int bitDepth) { - __m128i vzero = _mm_setzero_si128(); Pel* srcTmp = src + srcStride + 1; Pel* gradXTmp = gradX + gradStride + 1; Pel* gradYTmp = gradY + gradStride + 1; int widthInside = width - 2 * BIO_EXTEND_SIZE; int heightInside = height - 2 * BIO_EXTEND_SIZE; - int shift1 = std::max<int>(2, (14 - bitDepth)); + int shift1 = 6; + __m128i mmShift1 = _mm_cvtsi32_si128( shift1 ); + assert((widthInside & 3) == 0); + if ( ( widthInside & 7 ) == 0 ) + { + for (int y = 0; y < heightInside; y++) + { + int x = 0; + for ( ; x < widthInside; x += 8 ) + { + __m128i mmPixTop = _mm_sra_epi16( _mm_loadu_si128( ( __m128i* ) ( srcTmp + x - srcStride ) ), mmShift1 ); + __m128i mmPixBottom = _mm_sra_epi16( _mm_loadu_si128( ( __m128i* ) ( srcTmp + x + srcStride ) ), mmShift1 ); + __m128i mmPixLeft = _mm_sra_epi16( _mm_loadu_si128( ( __m128i* ) ( srcTmp + x - 1 ) ), mmShift1 ); + __m128i mmPixRight = _mm_sra_epi16( _mm_loadu_si128( ( __m128i* ) ( srcTmp + x + 1 ) ), mmShift1 ); - assert((widthInside & 3) == 0); + __m128i mmGradVer = _mm_sub_epi16( mmPixBottom, mmPixTop ); + __m128i mmGradHor = _mm_sub_epi16( mmPixRight, mmPixLeft ); - for (int y = 0; y < heightInside; y++) + _mm_storeu_si128( ( __m128i * ) ( gradYTmp + x ), mmGradVer ); + _mm_storeu_si128( ( __m128i * ) ( gradXTmp + x ), mmGradHor ); + } + gradXTmp += gradStride; + gradYTmp += gradStride; + srcTmp += srcStride; + } + } + else { - int x = 0; - for (; x < widthInside; x += 4) + __m128i mmPixTop = _mm_sra_epi16( _mm_unpacklo_epi64( _mm_loadl_epi64( (__m128i*) ( srcTmp - srcStride ) ), _mm_loadl_epi64( (__m128i*) ( srcTmp ) ) ), mmShift1 ); + for ( int y = 0; y < heightInside; y += 2 ) { - __m128i mmPixTop = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)(srcTmp + x - srcStride))); - __m128i mmPixBottom = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)(srcTmp + x + srcStride))); - __m128i mmPixLeft = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)(srcTmp + x - 1))); - __m128i mmPixRight = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)(srcTmp + x + 1))); - - __m128i mmGradVer = _mm_sra_epi32(_mm_sub_epi32(mmPixBottom, mmPixTop), _mm_cvtsi32_si128(shift1)); - __m128i mmGradHor = _mm_sra_epi32(_mm_sub_epi32(mmPixRight, mmPixLeft), _mm_cvtsi32_si128(shift1)); - mmGradVer = _mm_packs_epi32(mmGradVer, vzero); - mmGradHor = _mm_packs_epi32(mmGradHor, vzero); - - _mm_storel_epi64((__m128i *)(gradYTmp + x), mmGradVer); - _mm_storel_epi64((__m128i *)(gradXTmp + x), mmGradHor); + __m128i mmPixBottom = _mm_sra_epi16( _mm_unpacklo_epi64( _mm_loadl_epi64( (__m128i*) ( srcTmp + srcStride ) ), _mm_loadl_epi64( (__m128i*) ( srcTmp + ( srcStride << 1 ) ) ) ), mmShift1 ); + __m128i mmPixLeft = _mm_sra_epi16( _mm_unpacklo_epi64( _mm_loadl_epi64( (__m128i*) ( srcTmp - 1 ) ), _mm_loadl_epi64( (__m128i*) ( srcTmp - 1 + srcStride ) ) ), mmShift1 ); + __m128i mmPixRight = _mm_sra_epi16( _mm_unpacklo_epi64( _mm_loadl_epi64( (__m128i*) ( srcTmp + 1 ) ), _mm_loadl_epi64( (__m128i*) ( srcTmp + 1 + srcStride ) ) ), mmShift1 ); + + __m128i mmGradVer = _mm_sub_epi16( mmPixBottom, mmPixTop ); + __m128i mmGradHor = _mm_sub_epi16( mmPixRight, mmPixLeft ); + + _mm_storel_epi64( (__m128i *) gradYTmp, mmGradVer ); + _mm_storel_epi64( (__m128i *) ( gradYTmp + gradStride ), _mm_unpackhi_epi64( mmGradVer, mmGradHor ) ); + _mm_storel_epi64( (__m128i *) gradXTmp, mmGradHor ); + _mm_storel_epi64( (__m128i *) ( gradXTmp + gradStride ), _mm_unpackhi_epi64( mmGradHor, mmGradVer ) ); + + mmPixTop = mmPixBottom; + gradXTmp += gradStride << 1; + gradYTmp += gradStride << 1; + srcTmp += srcStride << 1; } - - gradXTmp += gradStride; - gradYTmp += gradStride; - srcTmp += srcStride; } + if (PAD) + { gradXTmp = gradX + gradStride + 1; gradYTmp = gradY + gradStride + 1; for (int y = 0; y < heightInside; y++) @@ -336,155 +618,10 @@ void gradFilter_SSE(Pel* src, int srcStride, int width, int height, int gradStri ::memcpy(gradXTmp + heightInside*gradStride, gradXTmp + (heightInside - 1)*gradStride, sizeof(Pel)*(width)); ::memcpy(gradYTmp - gradStride, gradYTmp, sizeof(Pel)*(width)); ::memcpy(gradYTmp + heightInside*gradStride, gradYTmp + (heightInside - 1)*gradStride, sizeof(Pel)*(width)); -} - -template< X86_VEXT vext > -void calcBIOPar_SSE(const Pel* srcY0Temp, const Pel* srcY1Temp, const Pel* gradX0, const Pel* gradX1, const Pel* gradY0, const Pel* gradY1, int* dotProductTemp1, int* dotProductTemp2, int* dotProductTemp3, int* dotProductTemp5, int* dotProductTemp6, const int src0Stride, const int src1Stride, const int gradStride, const int widthG, const int heightG, const int bitDepth) -{ - int shift4 = std::min<int>(8, (bitDepth - 4)); - int shift5 = std::min<int>(5, (bitDepth - 7)); - for (int y = 0; y < heightG; y++) - { - int x = 0; - for (; x < ((widthG >> 3) << 3); x += 8) - { - __m128i mmSrcY0Temp = _mm_sra_epi16(_mm_loadu_si128((__m128i*)(srcY0Temp + x)), _mm_cvtsi32_si128(shift4)); - __m128i mmSrcY1Temp = _mm_sra_epi16(_mm_loadu_si128((__m128i*)(srcY1Temp + x)), _mm_cvtsi32_si128(shift4)); - __m128i mmGradX0 = _mm_loadu_si128((__m128i*)(gradX0 + x)); - __m128i mmGradX1 = _mm_loadu_si128((__m128i*)(gradX1 + x)); - __m128i mmGradY0 = _mm_loadu_si128((__m128i*)(gradY0 + x)); - __m128i mmGradY1 = _mm_loadu_si128((__m128i*)(gradY1 + x)); - - __m128i mmTemp1 = _mm_sub_epi16(mmSrcY1Temp, mmSrcY0Temp); - __m128i mmTempX = _mm_sra_epi16(_mm_add_epi16(mmGradX0, mmGradX1), _mm_cvtsi32_si128(shift5)); - __m128i mmTempY = _mm_sra_epi16(_mm_add_epi16(mmGradY0, mmGradY1), _mm_cvtsi32_si128(shift5)); - - // m_piDotProductTemp1 - __m128i mm_b = _mm_mulhi_epi16(mmTempX, mmTempX); - __m128i mm_a = _mm_mullo_epi16(mmTempX, mmTempX); - - __m128i mm_l = _mm_unpacklo_epi16(mm_a, mm_b); - __m128i mm_h = _mm_unpackhi_epi16(mm_a, mm_b); - - _mm_storeu_si128((__m128i *)(dotProductTemp1 + x), mm_l); - _mm_storeu_si128((__m128i *)(dotProductTemp1 + x + 4), mm_h); - - // m_piDotProductTemp2 - mm_b = _mm_mulhi_epi16(mmTempX, mmTempY); - mm_a = _mm_mullo_epi16(mmTempX, mmTempY); - - mm_l = _mm_unpacklo_epi16(mm_a, mm_b); - mm_h = _mm_unpackhi_epi16(mm_a, mm_b); - - _mm_storeu_si128((__m128i *)(dotProductTemp2 + x), mm_l); - _mm_storeu_si128((__m128i *)(dotProductTemp2 + x + 4), mm_h); - - // m_piDotProductTemp3 - mm_b = _mm_mulhi_epi16(mmTempX, mmTemp1); - mm_a = _mm_mullo_epi16(mmTempX, mmTemp1); - - mm_l = _mm_unpacklo_epi16(mm_a, mm_b); - mm_h = _mm_unpackhi_epi16(mm_a, mm_b); - - _mm_storeu_si128((__m128i *)(dotProductTemp3 + x), mm_l); - _mm_storeu_si128((__m128i *)(dotProductTemp3 + x + 4), mm_h); - - // m_piDotProductTemp5 - mm_b = _mm_mulhi_epi16(mmTempY, mmTempY); - mm_a = _mm_mullo_epi16(mmTempY, mmTempY); - - mm_l = _mm_unpacklo_epi16(mm_a, mm_b); - mm_h = _mm_unpackhi_epi16(mm_a, mm_b); - - _mm_storeu_si128((__m128i *)(dotProductTemp5 + x), mm_l); - _mm_storeu_si128((__m128i *)(dotProductTemp5 + x + 4), mm_h); - - // m_piDotProductTemp6 - mm_b = _mm_mulhi_epi16(mmTempY, mmTemp1); - mm_a = _mm_mullo_epi16(mmTempY, mmTemp1); - - mm_l = _mm_unpacklo_epi16(mm_a, mm_b); - mm_h = _mm_unpackhi_epi16(mm_a, mm_b); - - _mm_storeu_si128((__m128i *)(dotProductTemp6 + x), mm_l); - _mm_storeu_si128((__m128i *)(dotProductTemp6 + x + 4), mm_h); - } - - for (; x < ((widthG >> 2) << 2); x += 4) - { - __m128i mmSrcY0Temp = _mm_sra_epi16(_mm_loadl_epi64((__m128i*)(srcY0Temp + x)), _mm_cvtsi32_si128(shift4)); - __m128i mmSrcY1Temp = _mm_sra_epi16(_mm_loadl_epi64((__m128i*)(srcY1Temp + x)), _mm_cvtsi32_si128(shift4)); - __m128i mmGradX0 = _mm_loadl_epi64((__m128i*)(gradX0 + x)); - __m128i mmGradX1 = _mm_loadl_epi64((__m128i*)(gradX1 + x)); - __m128i mmGradY0 = _mm_loadl_epi64((__m128i*)(gradY0 + x)); - __m128i mmGradY1 = _mm_loadl_epi64((__m128i*)(gradY1 + x)); - - __m128i mmTemp1 = _mm_sub_epi16(mmSrcY1Temp, mmSrcY0Temp); - __m128i mmTempX = _mm_sra_epi16(_mm_add_epi16(mmGradX0, mmGradX1), _mm_cvtsi32_si128(shift5)); - __m128i mmTempY = _mm_sra_epi16(_mm_add_epi16(mmGradY0, mmGradY1), _mm_cvtsi32_si128(shift5)); - - // m_piDotProductTemp1 - __m128i mm_b = _mm_mulhi_epi16(mmTempX, mmTempX); - __m128i mm_a = _mm_mullo_epi16(mmTempX, mmTempX); - __m128i mm_l = _mm_unpacklo_epi16(mm_a, mm_b); - - _mm_storeu_si128((__m128i *)(dotProductTemp1 + x), mm_l); - - // m_piDotProductTemp2 - mm_b = _mm_mulhi_epi16(mmTempX, mmTempY); - mm_a = _mm_mullo_epi16(mmTempX, mmTempY); - mm_l = _mm_unpacklo_epi16(mm_a, mm_b); - - _mm_storeu_si128((__m128i *)(dotProductTemp2 + x), mm_l); - - // m_piDotProductTemp3 - mm_b = _mm_mulhi_epi16(mmTempX, mmTemp1); - mm_a = _mm_mullo_epi16(mmTempX, mmTemp1); - mm_l = _mm_unpacklo_epi16(mm_a, mm_b); - - _mm_storeu_si128((__m128i *)(dotProductTemp3 + x), mm_l); - - // m_piDotProductTemp5 - mm_b = _mm_mulhi_epi16(mmTempY, mmTempY); - mm_a = _mm_mullo_epi16(mmTempY, mmTempY); - mm_l = _mm_unpacklo_epi16(mm_a, mm_b); - - _mm_storeu_si128((__m128i *)(dotProductTemp5 + x), mm_l); - - // m_piDotProductTemp6 - mm_b = _mm_mulhi_epi16(mmTempY, mmTemp1); - mm_a = _mm_mullo_epi16(mmTempY, mmTemp1); - mm_l = _mm_unpacklo_epi16(mm_a, mm_b); - - _mm_storeu_si128((__m128i *)(dotProductTemp6 + x), mm_l); - } - - for (; x < widthG; x++) - { - int temp = (srcY0Temp[x] >> shift4) - (srcY1Temp[x] >> shift4); - int tempX = (gradX0[x] + gradX1[x]) >> shift5; - int tempY = (gradY0[x] + gradY1[x]) >> shift5; - dotProductTemp1[x] = tempX * tempX; - dotProductTemp2[x] = tempX * tempY; - dotProductTemp3[x] = -tempX * temp; - dotProductTemp5[x] = tempY * tempY; - dotProductTemp6[x] = -tempY * temp; - } - - srcY0Temp += src0Stride; - srcY1Temp += src1Stride; - gradX0 += gradStride; - gradX1 += gradStride; - gradY0 += gradStride; - gradY1 += gradStride; - dotProductTemp1 += widthG; - dotProductTemp2 += widthG; - dotProductTemp3 += widthG; - dotProductTemp5 += widthG; - dotProductTemp6 += widthG; } } + template< X86_VEXT vext > void calcBlkGradient_SSE(int sx, int sy, int *arraysGx2, int *arraysGxGy, int *arraysGxdI, int *arraysGy2, int *arraysGydI, int &sGx2, int &sGy2, int &sGxGy, int &sGxdI, int &sGydI, int width, int height, int unitSize) { @@ -632,13 +769,13 @@ void reco_SSE( const int16_t* src0, int src0Stride, const int16_t* src1, int src } } -#if ENABLE_SIMD_OPT_GBI +#if ENABLE_SIMD_OPT_BCW template< X86_VEXT vext, int W > -void removeWeightHighFreq_SSE(int16_t* src0, int src0Stride, const int16_t* src1, int src1Stride, int width, int height, int shift, int gbiWeight) +void removeWeightHighFreq_SSE(int16_t* src0, int src0Stride, const int16_t* src1, int src1Stride, int width, int height, int shift, int bcwWeight) { - int normalizer = ((1 << 16) + (gbiWeight>0 ? (gbiWeight >> 1) : -(gbiWeight >> 1))) / gbiWeight; - int weight0 = normalizer << g_GbiLog2WeightBase; - int weight1 = (g_GbiWeightBase - gbiWeight)*normalizer; + int normalizer = ((1 << 16) + (bcwWeight>0 ? (bcwWeight >> 1) : -(bcwWeight >> 1))) / bcwWeight; + int weight0 = normalizer << g_BcwLog2WeightBase; + int weight1 = (g_BcwWeightBase - bcwWeight)*normalizer; int offset = 1 << (shift - 1); if (W == 8) { @@ -651,8 +788,8 @@ void removeWeightHighFreq_SSE(int16_t* src0, int src0Stride, const int16_t* src1 { for (int col = 0; col < width; col += 8) { - __m128i vsrc0 = _mm_load_si128((const __m128i *)&src0[col]); - __m128i vsrc1 = _mm_load_si128((const __m128i *)&src1[col]); + __m128i vsrc0 = _mm_loadu_si128( (const __m128i *)&src0[col] ); + __m128i vsrc1 = _mm_loadu_si128( (const __m128i *)&src1[col] ); __m128i vtmp, vdst, vsrc; vdst = _mm_cvtepi16_epi32(vsrc0); @@ -721,8 +858,8 @@ void removeHighFreq_SSE(int16_t* src0, int src0Stride, const int16_t* src1, int { for (int col = 0; col < width; col += 8) { - __m128i vsrc0 = _mm_load_si128((const __m128i *)&src0[col]); - __m128i vsrc1 = _mm_load_si128((const __m128i *)&src1[col]); + __m128i vsrc0 = _mm_loadu_si128( (const __m128i *)&src0[col] ); + __m128i vsrc1 = _mm_loadu_si128( (const __m128i *)&src1[col] ); vsrc0 = _mm_sub_epi16(_mm_slli_epi16(vsrc0, 1), vsrc1); _mm_store_si128((__m128i *)&src0[col], vsrc0); @@ -919,8 +1056,7 @@ void PelBufferOps::_initPelBufOpsX86() addBIOAvg4 = addBIOAvg4_SSE<vext>; bioGradFilter = gradFilter_SSE<vext>; - calcBIOPar = calcBIOPar_SSE<vext>; - calcBlkGradient = calcBlkGradient_SSE<vext>; + calcBIOSums = calcBIOSums_SSE<vext>; copyBuffer = copyBufferSimd<vext>; padding = paddingSimd<vext>; @@ -929,12 +1065,15 @@ void PelBufferOps::_initPelBufOpsX86() linTf8 = linTf_SSE_entry<vext, 8>; linTf4 = linTf_SSE_entry<vext, 4>; -#if ENABLE_SIMD_OPT_GBI +#if ENABLE_SIMD_OPT_BCW removeWeightHighFreq8 = removeWeightHighFreq_SSE<vext, 8>; removeWeightHighFreq4 = removeWeightHighFreq_SSE<vext, 4>; removeHighFreq8 = removeHighFreq_SSE<vext, 8>; removeHighFreq4 = removeHighFreq_SSE<vext, 4>; #endif + profGradFilter = gradFilter_SSE<vext, false>; + applyPROF = applyPROF_SSE<vext>; + roundIntVector = roundIntVector_SIMD<vext>; } template void PelBufferOps::_initPelBufOpsX86<SIMDX86>(); diff --git a/source/Lib/CommonLib/x86/CommonDefX86.cpp b/source/Lib/CommonLib/x86/CommonDefX86.cpp index d7b5f8cd2c4cc106d5eea32b705344c7fd189518..448b627bb7ed2e01a03ef24c94fa66dc0347f904 100644 --- a/source/Lib/CommonLib/x86/CommonDefX86.cpp +++ b/source/Lib/CommonLib/x86/CommonDefX86.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/source/Lib/CommonLib/x86/CommonDefX86.h b/source/Lib/CommonLib/x86/CommonDefX86.h index 28091299b667645d0c8781bbdf8b7b037c8b541a..29f90397f8521217176ec94724687d50382e18dc 100644 --- a/source/Lib/CommonLib/x86/CommonDefX86.h +++ b/source/Lib/CommonLib/x86/CommonDefX86.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/source/Lib/CommonLib/x86/IbcHashMapX86.h b/source/Lib/CommonLib/x86/IbcHashMapX86.h index 67e4c063d06741b1e4d7fe5dd7870326e6da83de..2d0ce5f033d844e8f8865fad0e431ec399c2621d 100644 --- a/source/Lib/CommonLib/x86/IbcHashMapX86.h +++ b/source/Lib/CommonLib/x86/IbcHashMapX86.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/source/Lib/CommonLib/x86/InitX86.cpp b/source/Lib/CommonLib/x86/InitX86.cpp index 334987013a8315ad5381a103efed5cbf226c448c..458839510c00487d6f61fc80ba13d15197a09de5 100644 --- a/source/Lib/CommonLib/x86/InitX86.cpp +++ b/source/Lib/CommonLib/x86/InitX86.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/source/Lib/CommonLib/x86/InterpolationFilterX86.h b/source/Lib/CommonLib/x86/InterpolationFilterX86.h index 59c455d30fc0a5d7a74e7a3c80226ec10056965d..2b5bda2df04e9ae2626a74e1696069675ad41aa8 100644 --- a/source/Lib/CommonLib/x86/InterpolationFilterX86.h +++ b/source/Lib/CommonLib/x86/InterpolationFilterX86.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -195,21 +195,6 @@ static void fullPelCopyAVX2( const ClpRng& clpRng, const void*_src, int srcStrid template<X86_VEXT vext, bool isFirst, bool isLast> static void simdFilterCopy( const ClpRng& clpRng, const Pel* src, int srcStride, int16_t* dst, int dstStride, int width, int height, bool biMCForDMVR) { -#if !HM_JEM_CLIP_PEL - if( vext >= AVX2 && ( width % 16 ) == 0 ) - { - fullPelCopyAVX2<Pel, 16, isFirst, isLast >( clpRng, src, srcStride, dst, dstStride, width, height ); - } - else if( ( width % 16 ) == 0 ) - { - fullPelCopySSE<Pel, 16, isFirst, isLast >( clpRng, src, srcStride, dst, dstStride, width, height ); - } - else if( ( width % 8 ) == 0 ) - { - fullPelCopySSE<Pel, 8, isFirst, isLast>( clpRng, src, srcStride, dst, dstStride, width, height ); - } - else -#endif { //Scalar InterpolationFilter::filterCopy<isFirst, isLast>( clpRng, src, srcStride, dst, dstStride, width, height, biMCForDMVR); } @@ -1023,6 +1008,110 @@ static inline __m128i simdInterpolateLuma10Bit2P4(int16_t const *src, int srcStr return sumLo; } +#ifdef USE_AVX2 +static inline __m256i simdInterpolateLumaHighBit2P16(int16_t const *src1, int srcStride, __m256i *mmCoeff, const __m256i & mmOffset, __m128i &mmShift) +{ + __m256i mm_mul_lo = _mm256_setzero_si256(); + __m256i mm_mul_hi = _mm256_setzero_si256(); + + for (int coefIdx = 0; coefIdx < 2; coefIdx++) + { + __m256i mmPix = _mm256_lddqu_si256((__m256i*)(src1 + coefIdx * srcStride)); + __m256i mm_hi = _mm256_mulhi_epi16(mmPix, mmCoeff[coefIdx]); + __m256i mm_lo = _mm256_mullo_epi16(mmPix, mmCoeff[coefIdx]); + mm_mul_lo = _mm256_add_epi32(mm_mul_lo, _mm256_unpacklo_epi16(mm_lo, mm_hi)); + mm_mul_hi = _mm256_add_epi32(mm_mul_hi, _mm256_unpackhi_epi16(mm_lo, mm_hi)); + } + mm_mul_lo = _mm256_sra_epi32(_mm256_add_epi32(mm_mul_lo, mmOffset), mmShift); + mm_mul_hi = _mm256_sra_epi32(_mm256_add_epi32(mm_mul_hi, mmOffset), mmShift); + __m256i mm_sum = _mm256_packs_epi32(mm_mul_lo, mm_mul_hi); + return (mm_sum); +} +#endif + +static inline __m128i simdInterpolateLumaHighBit2P8(int16_t const *src1, int srcStride, __m128i *mmCoeff, const __m128i & mmOffset, __m128i &mmShift) +{ + __m128i mm_mul_lo = _mm_setzero_si128(); + __m128i mm_mul_hi = _mm_setzero_si128(); + + for (int coefIdx = 0; coefIdx < 2; coefIdx++) + { + __m128i mmPix = _mm_loadu_si128((__m128i*)(src1 + coefIdx * srcStride)); + __m128i mm_hi = _mm_mulhi_epi16(mmPix, mmCoeff[coefIdx]); + __m128i mm_lo = _mm_mullo_epi16(mmPix, mmCoeff[coefIdx]); + mm_mul_lo = _mm_add_epi32(mm_mul_lo, _mm_unpacklo_epi16(mm_lo, mm_hi)); + mm_mul_hi = _mm_add_epi32(mm_mul_hi, _mm_unpackhi_epi16(mm_lo, mm_hi)); + } + mm_mul_lo = _mm_sra_epi32(_mm_add_epi32(mm_mul_lo, mmOffset), mmShift); + mm_mul_hi = _mm_sra_epi32(_mm_add_epi32(mm_mul_hi, mmOffset), mmShift); + __m128i mm_sum = _mm_packs_epi32(mm_mul_lo, mm_mul_hi); + return(mm_sum); +} + +static inline __m128i simdInterpolateLumaHighBit2P4(int16_t const *src1, int srcStride, __m128i *mmCoeff, const __m128i & mmOffset, __m128i &mmShift) +{ + __m128i mm_sum = _mm_setzero_si128(); + __m128i mm_zero = _mm_setzero_si128(); + for (int coefIdx = 0; coefIdx < 2; coefIdx++) + { + __m128i mmPix = _mm_loadl_epi64((__m128i*)(src1 + coefIdx * srcStride)); + __m128i mm_hi = _mm_mulhi_epi16(mmPix, mmCoeff[coefIdx]); + __m128i mm_lo = _mm_mullo_epi16(mmPix, mmCoeff[coefIdx]); + __m128i mm_mul = _mm_unpacklo_epi16(mm_lo, mm_hi); + mm_sum = _mm_add_epi32(mm_sum, mm_mul); + } + mm_sum = _mm_sra_epi32(_mm_add_epi32(mm_sum, mmOffset), mmShift); + mm_sum = _mm_packs_epi32(mm_sum, mm_zero); + return(mm_sum); +} + +template<X86_VEXT vext, bool isLast> +static void simdInterpolateN2_HIGHBIT_M4(const int16_t* src, int srcStride, int16_t *dst, int dstStride, int cStride, int width, int height, int shift, int offset, const ClpRng& clpRng, int16_t const *c) +{ +#if USE_AVX2 + __m256i mm256Offset = _mm256_set1_epi32(offset); + __m256i mm256Coeff[2]; + for (int n = 0; n < 2; n++) + { + mm256Coeff[n] = _mm256_set1_epi16(c[n]); + } +#endif + __m128i mmOffset = _mm_set1_epi32(offset); + __m128i mmCoeff[2]; + for (int n = 0; n < 2; n++) + mmCoeff[n] = _mm_set1_epi16(c[n]); + + __m128i mmShift = _mm_cvtsi64_si128(shift); + + CHECK(isLast, "Not Supported"); + CHECK(width % 4 != 0, "Not Supported"); + + for (int row = 0; row < height; row++) + { + int col = 0; +#if USE_AVX2 + for (; col < ((width >> 4) << 4); col += 16) + { + __m256i mmFiltered = simdInterpolateLumaHighBit2P16(src + col, cStride, mm256Coeff, mm256Offset, mmShift); + _mm256_storeu_si256((__m256i *)(dst + col), mmFiltered); + } +#endif + for (; col < ((width >> 3) << 3); col += 8) + { + __m128i mmFiltered = simdInterpolateLumaHighBit2P8(src + col, cStride, mmCoeff, mmOffset, mmShift); + _mm_storeu_si128((__m128i *)(dst + col), mmFiltered); + } + + for (; col < ((width >> 2) << 2); col += 4) + { + __m128i mmFiltered = simdInterpolateLumaHighBit2P4(src + col, cStride, mmCoeff, mmOffset, mmShift); + _mm_storel_epi64((__m128i *)(dst + col), mmFiltered); + } + src += srcStride; + dst += dstStride; + } +} + template<X86_VEXT vext, bool isLast> static void simdInterpolateN2_10BIT_M4(const int16_t* src, int srcStride, int16_t *dst, int dstStride, int cStride, int width, int height, int shift, int offset, const ClpRng& clpRng, int16_t const *c) { @@ -1127,7 +1216,6 @@ static void simdFilter( const ClpRng& clpRng, Pel const *src, int srcStride, Pel offset = 1 << (shift - 1); } } - if( clpRng.bd <= 10 ) { if( N == 8 && !( width & 0x07 ) ) { @@ -1179,7 +1267,14 @@ static void simdFilter( const ClpRng& clpRng, Pel const *src, int srcStride, Pel { if (N == 2 && !(width & 0x03)) { + if (clpRng.bd <= 10) + { simdInterpolateN2_10BIT_M4<vext, isLast>(src, srcStride, dst, dstStride, cStride, width, height, shift, offset, clpRng, c); + } + else + { + simdInterpolateN2_HIGHBIT_M4<vext, isLast>(src, srcStride, dst, dstStride, cStride, width, height, shift, offset, clpRng, c); + } return; } } @@ -1232,6 +1327,129 @@ static void simdFilter( const ClpRng& clpRng, Pel const *src, int srcStride, Pel } } +template< X86_VEXT vext > +void xWeightedTriangleBlk_SSE(const PredictionUnit &pu, const uint32_t width, const uint32_t height, const ComponentID compIdx, const bool splitDir, PelUnitBuf& predDst, PelUnitBuf& predSrc0, PelUnitBuf& predSrc1) +{ + Pel* dst = predDst.get(compIdx).buf; + Pel* src0 = predSrc0.get(compIdx).buf; + Pel* src1 = predSrc1.get(compIdx).buf; + int32_t strideDst = predDst.get(compIdx).stride; + int32_t strideSrc0 = predSrc0.get(compIdx).stride; + int32_t strideSrc1 = predSrc1.get(compIdx).stride; + + int32_t chromaScaleX = getComponentScaleX(compIdx, pu.chromaFormat); + int32_t chromaScaleY = getComponentScaleY(compIdx, pu.chromaFormat); + int8_t log2WidthY = floorLog2(width << chromaScaleX) - 1; + int8_t log2HeightY = floorLog2(height << chromaScaleY) - 1; + const char log2WeightBase = 3; + const ClpRng clpRng = pu.cu->slice->clpRngs().comp[compIdx]; + const int32_t shiftWeighted = std::max<int>(2, (IF_INTERNAL_PREC - clpRng.bd)) + log2WeightBase; + const int32_t offsetWeighted = (1 << (shiftWeighted - 1)) + (IF_INTERNAL_OFFS << log2WeightBase); + int16_t *weight = g_triangleWeights[splitDir][log2HeightY][log2WidthY]; + int16_t stepY = width << (chromaScaleX + chromaScaleY); + + const __m128i mmEight = _mm_set1_epi16(8); + const __m128i mmOffset = _mm_set1_epi32(offsetWeighted); + const __m128i mmShift = _mm_cvtsi32_si128(shiftWeighted); + const __m128i mmMin = _mm_set1_epi16(clpRng.min); + const __m128i mmMax = _mm_set1_epi16(clpRng.max); + + if (width == 2) + { + const __m128i mask = _mm_set_epi16( (short) 0x8080, (short) 0x8080, (short) 0x8080, (short) 0x8080, (short) 0x8080, (short) 0x8080, 0x0504, 0x0100 ); + for (int y = 0; y < height; y++) + { + __m128i s0 = _mm_cvtsi32_si128(*(uint32_t *) src0); + __m128i s1 = _mm_cvtsi32_si128(*(uint32_t *) src1); + __m128i w0 = _mm_loadl_epi64((__m128i *) (weight)); + if (chromaScaleX == 1) + { + w0 = _mm_shuffle_epi8(w0, mask); + } + __m128i w1 = _mm_sub_epi16(mmEight, w0); + s0 = _mm_unpacklo_epi16(s0, s1); + w0 = _mm_unpacklo_epi16(w0, w1); + s0 = _mm_add_epi32(_mm_madd_epi16(s0, w0), mmOffset); + s0 = _mm_sra_epi32(s0, mmShift); + s0 = _mm_packs_epi32(s0, s0); + s0 = _mm_min_epi16(mmMax, _mm_max_epi16(s0, mmMin)); + + *(uint32_t *) dst = _mm_cvtsi128_si32(s0); + dst += strideDst; + src0 += strideSrc0; + src1 += strideSrc1; + weight += stepY; + } + } + else if(width == 4) + { + const __m128i mask = _mm_set_epi16( (short) 0x8080, (short) 0x8080, (short) 0x8080, (short) 0x8080, 0x0D0C, 0x0908, 0x0504, 0x0100 ); + for (int y = 0; y < height; y++) + { + __m128i s0 = _mm_loadl_epi64((__m128i *) (src0)); + __m128i s1 = _mm_loadl_epi64((__m128i *) (src1)); + __m128i w0 = _mm_loadu_si128((__m128i *) (weight)); + if (chromaScaleX == 1) + { + w0 = _mm_shuffle_epi8(w0, mask); + } + __m128i w1 = _mm_sub_epi16(mmEight, w0); + s0 = _mm_unpacklo_epi16(s0, s1); + w0 = _mm_unpacklo_epi16(w0, w1); + s0 = _mm_add_epi32(_mm_madd_epi16(s0, w0), mmOffset); + s0 = _mm_sra_epi32(s0, mmShift); + s0 = _mm_packs_epi32(s0, s0); + s0 = _mm_min_epi16(mmMax, _mm_max_epi16(s0, mmMin)); + _mm_storel_epi64((__m128i *) (dst), s0); + dst += strideDst; + src0 += strideSrc0; + src1 += strideSrc1; + weight += stepY; + } + } + else + { + const __m128i mask1 = _mm_set_epi16( 0x0D0C, 0x0908, 0x0504, 0x0100, (short) 0x8080, (short) 0x8080, (short) 0x8080, (short) 0x8080 ); + const __m128i mask2 = _mm_set_epi16( (short) 0x8080, (short) 0x8080, (short) 0x8080, (short) 0x8080, 0x0D0C, 0x0908, 0x0504, 0x0100 ); + for (int y = 0; y < height; y++) + { + for (int x = 0; x < width; x += 8) + { + __m128i s0 = _mm_loadu_si128((__m128i *) (src0 + x)); + __m128i s1 = _mm_loadu_si128((__m128i *) (src1 + x)); + + __m128i w0 = _mm_loadu_si128((__m128i *) (weight + (x << chromaScaleX))); + if (chromaScaleX == 1) + { + __m128i w01 = _mm_loadu_si128((__m128i *) (weight + (x << chromaScaleX) + 8)); + w0 = _mm_shuffle_epi8(w0, mask1); + w01 = _mm_shuffle_epi8(w01, mask2); + w0 = _mm_alignr_epi8(w01, w0, 8); + } + __m128i w1 = _mm_sub_epi16(mmEight, w0); + + __m128i s0tmp = _mm_unpacklo_epi16(s0, s1); + __m128i w0tmp = _mm_unpacklo_epi16(w0, w1); + s0tmp = _mm_add_epi32(_mm_madd_epi16(s0tmp, w0tmp), mmOffset); + s0tmp = _mm_sra_epi32(s0tmp, mmShift); + + s0 = _mm_unpackhi_epi16(s0, s1); + w0 = _mm_unpackhi_epi16(w0, w1); + s0 = _mm_add_epi32(_mm_madd_epi16(s0, w0), mmOffset); + s0 = _mm_sra_epi32(s0, mmShift); + + s0 = _mm_packs_epi32(s0tmp, s0); + s0 = _mm_min_epi16(mmMax, _mm_max_epi16(s0, mmMin)); + _mm_storeu_si128((__m128i *) (dst + x), s0); + } + dst += strideDst; + src0 += strideSrc0; + src1 += strideSrc1; + weight += stepY; + } + } +} + template <X86_VEXT vext> void InterpolationFilter::_initInterpolationFilterX86() { @@ -1270,6 +1488,8 @@ void InterpolationFilter::_initInterpolationFilterX86() m_filterCopy[0][1] = simdFilterCopy<vext, false, true>; m_filterCopy[1][0] = simdFilterCopy<vext, true, false>; m_filterCopy[1][1] = simdFilterCopy<vext, true, true>; + + m_weightedTriangleBlk = xWeightedTriangleBlk_SSE<vext>; } template void InterpolationFilter::_initInterpolationFilterX86<SIMDX86>(); diff --git a/source/Lib/CommonLib/x86/RdCostX86.h b/source/Lib/CommonLib/x86/RdCostX86.h index 109b6b1a221c057ae0494581fd1544583e165d6f..b5e3288be1949c3540b222b59ef011d53d84b32a 100644 --- a/source/Lib/CommonLib/x86/RdCostX86.h +++ b/source/Lib/CommonLib/x86/RdCostX86.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -43,7 +43,10 @@ #ifdef TARGET_SIMD_X86 -template< typename Torg, typename Tcur, X86_VEXT vext > +typedef Pel Torg; +typedef Pel Tcur; + +template<X86_VEXT vext > Distortion RdCost::xGetSSE_SIMD( const DistParam &rcDtParam ) { if( rcDtParam.bitDepth > 10 ) @@ -67,8 +70,8 @@ Distortion RdCost::xGetSSE_SIMD( const DistParam &rcDtParam ) { for( int iX = 0; iX < iCols; iX+=16 ) { - __m256i Src1 = ( sizeof( Torg ) > 1 ) ? ( _mm256_lddqu_si256( ( __m256i* )( &pSrc1[iX] ) ) ) : ( _mm256_unpacklo_epi8( _mm256_permute4x64_epi64( _mm256_castsi128_si256( _mm_lddqu_si128( ( __m128i* )( &pSrc1[iX] ) ) ), 0xD8 ), _mm256_setzero_si256() ) ); - __m256i Src2 = ( sizeof( Tcur ) > 1 ) ? ( _mm256_lddqu_si256( ( __m256i* )( &pSrc2[iX] ) ) ) : ( _mm256_unpacklo_epi8( _mm256_permute4x64_epi64( _mm256_castsi128_si256( _mm_lddqu_si128( ( __m128i* )( &pSrc2[iX] ) ) ), 0xD8 ), _mm256_setzero_si256() ) ); + __m256i Src1 = ( _mm256_lddqu_si256( ( __m256i* )( &pSrc1[iX] ) ) ); + __m256i Src2 = ( _mm256_lddqu_si256( ( __m256i* )( &pSrc2[iX] ) ) ); __m256i Diff = _mm256_sub_epi16( Src1, Src2 ); __m256i Res = _mm256_madd_epi16( Diff, Diff ); Sum = _mm256_add_epi32( Sum, Res ); @@ -125,7 +128,7 @@ Distortion RdCost::xGetSSE_SIMD( const DistParam &rcDtParam ) } -template< typename Torg, typename Tcur, int iWidth, X86_VEXT vext > +template<int iWidth, X86_VEXT vext > Distortion RdCost::xGetSSE_NxN_SIMD( const DistParam &rcDtParam ) { if( rcDtParam.bitDepth > 10 || rcDtParam.applyWeight ) @@ -354,7 +357,7 @@ Distortion RdCost::xGetSAD_NxN_SIMD( const DistParam &rcDtParam ) if( iWidth == 4 ) { - if( iRows == 4 ) + if( iRows == 4 && iSubShift == 0 ) { __m128i vzero = _mm_setzero_si128(); __m128i vsum = vzero; @@ -453,7 +456,6 @@ Distortion RdCost::xGetSAD_NxN_SIMD( const DistParam &rcDtParam ) } -template< typename Torg, typename Tcur > static uint32_t xCalcHAD4x4_SSE( const Torg *piOrg, const Tcur *piCur, const int iStrideOrg, const int iStrideCur ) { __m128i r0 = ( sizeof( Torg ) > 1 ) ? ( _mm_loadl_epi64( ( const __m128i* )&piOrg[0] ) ) : ( _mm_unpacklo_epi8( _mm_cvtsi32_si128( *(const int*)&piOrg[0] ), _mm_setzero_si128() ) ); @@ -538,172 +540,124 @@ static uint32_t xCalcHAD4x4_SSE( const Torg *piOrg, const Tcur *piCur, const int } //working up to 12-bit -template< typename Torg, typename Tcur/*, bool bHorDownsampling*/ > static uint32_t xCalcHAD8x8_SSE( const Torg *piOrg, const Tcur *piCur, const int iStrideOrg, const int iStrideCur, const int iBitDepth ) { - __m128i m1[8], m2[8]; + __m128i m1[8][2], m2[8][2]; for( int k = 0; k < 8; k++ ) { __m128i r0 = ( sizeof( Torg ) > 1 ) ? ( _mm_loadu_si128( ( __m128i* )piOrg ) ) : ( _mm_unpacklo_epi8( _mm_loadl_epi64( ( const __m128i* )piOrg ), _mm_setzero_si128() ) ); __m128i r1 = ( sizeof( Tcur ) > 1 ) ? ( _mm_lddqu_si128( ( __m128i* )piCur ) ) : ( _mm_unpacklo_epi8( _mm_loadl_epi64( ( const __m128i* )piCur ), _mm_setzero_si128() ) ); // th _mm_loadu_si128( (__m128i*)piCur ) - m2[k] = _mm_sub_epi16( r0, r1 ); + m2[k][0] = _mm_sub_epi16( r0, r1 ); + m2[k][1] = _mm_cvtepi16_epi32( _mm_srli_si128( m2[k][0], 8 ) ); + m2[k][0] = _mm_cvtepi16_epi32( m2[k][0] ); piCur += iStrideCur; piOrg += iStrideOrg; } - //horizontal - m1[0] = _mm_add_epi16( m2[0], m2[4] ); - m1[1] = _mm_add_epi16( m2[1], m2[5] ); - m1[2] = _mm_add_epi16( m2[2], m2[6] ); - m1[3] = _mm_add_epi16( m2[3], m2[7] ); - m1[4] = _mm_sub_epi16( m2[0], m2[4] ); - m1[5] = _mm_sub_epi16( m2[1], m2[5] ); - m1[6] = _mm_sub_epi16( m2[2], m2[6] ); - m1[7] = _mm_sub_epi16( m2[3], m2[7] ); - - m2[0] = _mm_add_epi16( m1[0], m1[2] ); - m2[1] = _mm_add_epi16( m1[1], m1[3] ); - m2[2] = _mm_sub_epi16( m1[0], m1[2] ); - m2[3] = _mm_sub_epi16( m1[1], m1[3] ); - m2[4] = _mm_add_epi16( m1[4], m1[6] ); - m2[5] = _mm_add_epi16( m1[5], m1[7] ); - m2[6] = _mm_sub_epi16( m1[4], m1[6] ); - m2[7] = _mm_sub_epi16( m1[5], m1[7] ); + for( int i = 0; i < 2; i++ ) + { + //horizontal + m1[0][i] = _mm_add_epi32( m2[0][i], m2[4][i] ); + m1[1][i] = _mm_add_epi32( m2[1][i], m2[5][i] ); + m1[2][i] = _mm_add_epi32( m2[2][i], m2[6][i] ); + m1[3][i] = _mm_add_epi32( m2[3][i], m2[7][i] ); + m1[4][i] = _mm_sub_epi32( m2[0][i], m2[4][i] ); + m1[5][i] = _mm_sub_epi32( m2[1][i], m2[5][i] ); + m1[6][i] = _mm_sub_epi32( m2[2][i], m2[6][i] ); + m1[7][i] = _mm_sub_epi32( m2[3][i], m2[7][i] ); + + m2[0][i] = _mm_add_epi32( m1[0][i], m1[2][i] ); + m2[1][i] = _mm_add_epi32( m1[1][i], m1[3][i] ); + m2[2][i] = _mm_sub_epi32( m1[0][i], m1[2][i] ); + m2[3][i] = _mm_sub_epi32( m1[1][i], m1[3][i] ); + m2[4][i] = _mm_add_epi32( m1[4][i], m1[6][i] ); + m2[5][i] = _mm_add_epi32( m1[5][i], m1[7][i] ); + m2[6][i] = _mm_sub_epi32( m1[4][i], m1[6][i] ); + m2[7][i] = _mm_sub_epi32( m1[5][i], m1[7][i] ); + + m1[0][i] = _mm_add_epi32( m2[0][i], m2[1][i] ); + m1[1][i] = _mm_sub_epi32( m2[0][i], m2[1][i] ); + m1[2][i] = _mm_add_epi32( m2[2][i], m2[3][i] ); + m1[3][i] = _mm_sub_epi32( m2[2][i], m2[3][i] ); + m1[4][i] = _mm_add_epi32( m2[4][i], m2[5][i] ); + m1[5][i] = _mm_sub_epi32( m2[4][i], m2[5][i] ); + m1[6][i] = _mm_add_epi32( m2[6][i], m2[7][i] ); + m1[7][i] = _mm_sub_epi32( m2[6][i], m2[7][i] ); + + m2[0][i] = _mm_unpacklo_epi32( m1[0][i], m1[1][i] ); + m2[1][i] = _mm_unpacklo_epi32( m1[2][i], m1[3][i] ); + m2[2][i] = _mm_unpackhi_epi32( m1[0][i], m1[1][i] ); + m2[3][i] = _mm_unpackhi_epi32( m1[2][i], m1[3][i] ); + m2[4][i] = _mm_unpacklo_epi32( m1[4][i], m1[5][i] ); + m2[5][i] = _mm_unpacklo_epi32( m1[6][i], m1[7][i] ); + m2[6][i] = _mm_unpackhi_epi32( m1[4][i], m1[5][i] ); + m2[7][i] = _mm_unpackhi_epi32( m1[6][i], m1[7][i] ); + + m1[0][i] = _mm_unpacklo_epi64( m2[0][i], m2[1][i] ); + m1[1][i] = _mm_unpackhi_epi64( m2[0][i], m2[1][i] ); + m1[2][i] = _mm_unpacklo_epi64( m2[2][i], m2[3][i] ); + m1[3][i] = _mm_unpackhi_epi64( m2[2][i], m2[3][i] ); + m1[4][i] = _mm_unpacklo_epi64( m2[4][i], m2[5][i] ); + m1[5][i] = _mm_unpackhi_epi64( m2[4][i], m2[5][i] ); + m1[6][i] = _mm_unpacklo_epi64( m2[6][i], m2[7][i] ); + m1[7][i] = _mm_unpackhi_epi64( m2[6][i], m2[7][i] ); + } - m1[0] = _mm_add_epi16( m2[0], m2[1] ); - m1[1] = _mm_sub_epi16( m2[0], m2[1] ); - m1[2] = _mm_add_epi16( m2[2], m2[3] ); - m1[3] = _mm_sub_epi16( m2[2], m2[3] ); - m1[4] = _mm_add_epi16( m2[4], m2[5] ); - m1[5] = _mm_sub_epi16( m2[4], m2[5] ); - m1[6] = _mm_add_epi16( m2[6], m2[7] ); - m1[7] = _mm_sub_epi16( m2[6], m2[7] ); + __m128i n1[8][2]; + __m128i n2[8][2]; + for( int i = 0; i < 8; i++ ) { - m2[0] = _mm_unpacklo_epi16( m1[0], m1[1] ); - m2[1] = _mm_unpacklo_epi16( m1[2], m1[3] ); - m2[2] = _mm_unpacklo_epi16( m1[4], m1[5] ); - m2[3] = _mm_unpacklo_epi16( m1[6], m1[7] ); - m2[4] = _mm_unpackhi_epi16( m1[0], m1[1] ); - m2[5] = _mm_unpackhi_epi16( m1[2], m1[3] ); - m2[6] = _mm_unpackhi_epi16( m1[4], m1[5] ); - m2[7] = _mm_unpackhi_epi16( m1[6], m1[7] ); + int ii = i % 4; + int ij = i >> 2; - m1[0] = _mm_unpacklo_epi32( m2[0], m2[1] ); - m1[1] = _mm_unpackhi_epi32( m2[0], m2[1] ); - m1[2] = _mm_unpacklo_epi32( m2[2], m2[3] ); - m1[3] = _mm_unpackhi_epi32( m2[2], m2[3] ); - m1[4] = _mm_unpacklo_epi32( m2[4], m2[5] ); - m1[5] = _mm_unpackhi_epi32( m2[4], m2[5] ); - m1[6] = _mm_unpacklo_epi32( m2[6], m2[7] ); - m1[7] = _mm_unpackhi_epi32( m2[6], m2[7] ); - - m2[0] = _mm_unpacklo_epi64( m1[0], m1[2] ); - m2[1] = _mm_unpackhi_epi64( m1[0], m1[2] ); - m2[2] = _mm_unpacklo_epi64( m1[1], m1[3] ); - m2[3] = _mm_unpackhi_epi64( m1[1], m1[3] ); - m2[4] = _mm_unpacklo_epi64( m1[4], m1[6] ); - m2[5] = _mm_unpackhi_epi64( m1[4], m1[6] ); - m2[6] = _mm_unpacklo_epi64( m1[5], m1[7] ); - m2[7] = _mm_unpackhi_epi64( m1[5], m1[7] ); + n2[i][0] = m1[ii ][ij]; + n2[i][1] = m1[ii + 4][ij]; } - if( iBitDepth >= 10 /*sizeof( Torg ) > 1 || sizeof( Tcur ) > 1*/ ) + for( int i = 0; i < 2; i++ ) { - // if (g_bitDepthY >=10){ - __m128i n1[8][2]; - __m128i n2[8][2]; - - for( int i = 0; i < 8; i++ ) - { - n2[i][0] = _mm_cvtepi16_epi32( m2[i] ); - n2[i][1] = _mm_cvtepi16_epi32( _mm_shuffle_epi32( m2[i], 0xEE ) ); - } - - for( int i = 0; i < 2; i++ ) - { - n1[0][i] = _mm_add_epi32( n2[0][i], n2[4][i] ); - n1[1][i] = _mm_add_epi32( n2[1][i], n2[5][i] ); - n1[2][i] = _mm_add_epi32( n2[2][i], n2[6][i] ); - n1[3][i] = _mm_add_epi32( n2[3][i], n2[7][i] ); - n1[4][i] = _mm_sub_epi32( n2[0][i], n2[4][i] ); - n1[5][i] = _mm_sub_epi32( n2[1][i], n2[5][i] ); - n1[6][i] = _mm_sub_epi32( n2[2][i], n2[6][i] ); - n1[7][i] = _mm_sub_epi32( n2[3][i], n2[7][i] ); - - n2[0][i] = _mm_add_epi32( n1[0][i], n1[2][i] ); - n2[1][i] = _mm_add_epi32( n1[1][i], n1[3][i] ); - n2[2][i] = _mm_sub_epi32( n1[0][i], n1[2][i] ); - n2[3][i] = _mm_sub_epi32( n1[1][i], n1[3][i] ); - n2[4][i] = _mm_add_epi32( n1[4][i], n1[6][i] ); - n2[5][i] = _mm_add_epi32( n1[5][i], n1[7][i] ); - n2[6][i] = _mm_sub_epi32( n1[4][i], n1[6][i] ); - n2[7][i] = _mm_sub_epi32( n1[5][i], n1[7][i] ); - - n1[0][i] = _mm_abs_epi32( _mm_add_epi32( n2[0][i], n2[1][i] ) ); - n1[1][i] = _mm_abs_epi32( _mm_sub_epi32( n2[0][i], n2[1][i] ) ); - n1[2][i] = _mm_abs_epi32( _mm_add_epi32( n2[2][i], n2[3][i] ) ); - n1[3][i] = _mm_abs_epi32( _mm_sub_epi32( n2[2][i], n2[3][i] ) ); - n1[4][i] = _mm_abs_epi32( _mm_add_epi32( n2[4][i], n2[5][i] ) ); - n1[5][i] = _mm_abs_epi32( _mm_sub_epi32( n2[4][i], n2[5][i] ) ); - n1[6][i] = _mm_abs_epi32( _mm_add_epi32( n2[6][i], n2[7][i] ) ); - n1[7][i] = _mm_abs_epi32( _mm_sub_epi32( n2[6][i], n2[7][i] ) ); - } - for( int i = 0; i < 8; i++ ) - { - m1[i] = _mm_add_epi32( n1[i][0], n1[i][1] ); - } + n1[0][i] = _mm_add_epi32( n2[0][i], n2[4][i] ); + n1[1][i] = _mm_add_epi32( n2[1][i], n2[5][i] ); + n1[2][i] = _mm_add_epi32( n2[2][i], n2[6][i] ); + n1[3][i] = _mm_add_epi32( n2[3][i], n2[7][i] ); + n1[4][i] = _mm_sub_epi32( n2[0][i], n2[4][i] ); + n1[5][i] = _mm_sub_epi32( n2[1][i], n2[5][i] ); + n1[6][i] = _mm_sub_epi32( n2[2][i], n2[6][i] ); + n1[7][i] = _mm_sub_epi32( n2[3][i], n2[7][i] ); + + n2[0][i] = _mm_add_epi32( n1[0][i], n1[2][i] ); + n2[1][i] = _mm_add_epi32( n1[1][i], n1[3][i] ); + n2[2][i] = _mm_sub_epi32( n1[0][i], n1[2][i] ); + n2[3][i] = _mm_sub_epi32( n1[1][i], n1[3][i] ); + n2[4][i] = _mm_add_epi32( n1[4][i], n1[6][i] ); + n2[5][i] = _mm_add_epi32( n1[5][i], n1[7][i] ); + n2[6][i] = _mm_sub_epi32( n1[4][i], n1[6][i] ); + n2[7][i] = _mm_sub_epi32( n1[5][i], n1[7][i] ); + + n1[0][i] = _mm_abs_epi32( _mm_add_epi32( n2[0][i], n2[1][i] ) ); + n1[1][i] = _mm_abs_epi32( _mm_sub_epi32( n2[0][i], n2[1][i] ) ); + n1[2][i] = _mm_abs_epi32( _mm_add_epi32( n2[2][i], n2[3][i] ) ); + n1[3][i] = _mm_abs_epi32( _mm_sub_epi32( n2[2][i], n2[3][i] ) ); + n1[4][i] = _mm_abs_epi32( _mm_add_epi32( n2[4][i], n2[5][i] ) ); + n1[5][i] = _mm_abs_epi32( _mm_sub_epi32( n2[4][i], n2[5][i] ) ); + n1[6][i] = _mm_abs_epi32( _mm_add_epi32( n2[6][i], n2[7][i] ) ); + n1[7][i] = _mm_abs_epi32( _mm_sub_epi32( n2[6][i], n2[7][i] ) ); } - else + for( int i = 0; i < 8; i++ ) { - m1[0] = _mm_add_epi16( m2[0], m2[4] ); - m1[1] = _mm_add_epi16( m2[1], m2[5] ); - m1[2] = _mm_add_epi16( m2[2], m2[6] ); - m1[3] = _mm_add_epi16( m2[3], m2[7] ); - m1[4] = _mm_sub_epi16( m2[0], m2[4] ); - m1[5] = _mm_sub_epi16( m2[1], m2[5] ); - m1[6] = _mm_sub_epi16( m2[2], m2[6] ); - m1[7] = _mm_sub_epi16( m2[3], m2[7] ); - - m2[0] = _mm_add_epi16( m1[0], m1[2] ); - m2[1] = _mm_add_epi16( m1[1], m1[3] ); - m2[2] = _mm_sub_epi16( m1[0], m1[2] ); - m2[3] = _mm_sub_epi16( m1[1], m1[3] ); - m2[4] = _mm_add_epi16( m1[4], m1[6] ); - m2[5] = _mm_add_epi16( m1[5], m1[7] ); - m2[6] = _mm_sub_epi16( m1[4], m1[6] ); - m2[7] = _mm_sub_epi16( m1[5], m1[7] ); - - m1[0] = _mm_abs_epi16( _mm_add_epi16( m2[0], m2[1] ) ); - m1[1] = _mm_abs_epi16( _mm_sub_epi16( m2[0], m2[1] ) ); - m1[2] = _mm_abs_epi16( _mm_add_epi16( m2[2], m2[3] ) ); - m1[3] = _mm_abs_epi16( _mm_sub_epi16( m2[2], m2[3] ) ); - m1[4] = _mm_abs_epi16( _mm_add_epi16( m2[4], m2[5] ) ); - m1[5] = _mm_abs_epi16( _mm_sub_epi16( m2[4], m2[5] ) ); - m1[6] = _mm_abs_epi16( _mm_add_epi16( m2[6], m2[7] ) ); - m1[7] = _mm_abs_epi16( _mm_sub_epi16( m2[6], m2[7] ) ); - - __m128i ma1, ma2; - __m128i vzero = _mm_setzero_si128(); - - for( int i = 0; i < 8; i++ ) - { - ma1 = _mm_unpacklo_epi16( m1[i], vzero ); - ma2 = _mm_unpackhi_epi16( m1[i], vzero ); - m1[i] = _mm_add_epi32( ma1, ma2 ); - } + m1[i][0] = _mm_add_epi32( n1[i][0], n1[i][1] ); } + m1[0][0] = _mm_add_epi32( m1[0][0], m1[1][0] ); + m1[2][0] = _mm_add_epi32( m1[2][0], m1[3][0] ); + m1[4][0] = _mm_add_epi32( m1[4][0], m1[5][0] ); + m1[6][0] = _mm_add_epi32( m1[6][0], m1[7][0] ); - m1[0] = _mm_add_epi32( m1[0], m1[1] ); - m1[2] = _mm_add_epi32( m1[2], m1[3] ); - m1[4] = _mm_add_epi32( m1[4], m1[5] ); - m1[6] = _mm_add_epi32( m1[6], m1[7] ); - - m1[0] = _mm_add_epi32( m1[0], m1[2] ); - m1[4] = _mm_add_epi32( m1[4], m1[6] ); - __m128i iSum = _mm_add_epi32( m1[0], m1[4] ); + m1[0][0] = _mm_add_epi32( m1[0][0], m1[2][0] ); + m1[4][0] = _mm_add_epi32( m1[4][0], m1[6][0] ); + __m128i iSum = _mm_add_epi32( m1[0][0], m1[4][0] ); iSum = _mm_hadd_epi32( iSum, iSum ); iSum = _mm_hadd_epi32( iSum, iSum ); @@ -716,10 +670,9 @@ static uint32_t xCalcHAD8x8_SSE( const Torg *piOrg, const Tcur *piCur, const int //working up to 12-bit -template< typename Torg, typename Tcur/*, bool bHorDownsampling*/ > static uint32_t xCalcHAD16x8_SSE( const Torg *piOrg, const Tcur *piCur, const int iStrideOrg, const int iStrideCur, const int iBitDepth ) { - __m128i m1[16][2], m2[16][2]; + __m128i m1[16][2][2], m2[16][2][2]; __m128i iSum = _mm_setzero_si128(); for( int l = 0; l < 2; l++ ) @@ -728,345 +681,186 @@ static uint32_t xCalcHAD16x8_SSE( const Torg *piOrg, const Tcur *piCur, const in const Tcur *piCurPtr = piCur + l*8; for( int k = 0; k < 8; k++ ) { - __m128i r0 = (sizeof( Torg ) > 1) ? (_mm_loadu_si128( (__m128i*)piOrgPtr )) : (_mm_unpacklo_epi8( _mm_loadl_epi64( (const __m128i*)piOrgPtr ), _mm_setzero_si128() )); - __m128i r1 = (sizeof( Tcur ) > 1) ? (_mm_lddqu_si128( (__m128i*)piCurPtr )) : (_mm_unpacklo_epi8( _mm_loadl_epi64( (const __m128i*)piCurPtr ), _mm_setzero_si128() )); // th _mm_loadu_si128( (__m128i*)piCurPtr ) - m2[k][l] = _mm_sub_epi16( r0, r1 ); + __m128i r0 = _mm_loadu_si128( (__m128i*) piOrgPtr ); + __m128i r1 = _mm_lddqu_si128( (__m128i*) piCurPtr ); + m2[k][l][0] = _mm_sub_epi16( r0, r1 ); + m2[k][l][1] = _mm_cvtepi16_epi32( _mm_srli_si128( m2[k][l][0], 8 ) ); + m2[k][l][0] = _mm_cvtepi16_epi32( m2[k][l][0] ); piCurPtr += iStrideCur; piOrgPtr += iStrideOrg; } - //vertical - m1[0][l] = _mm_add_epi16( m2[0][l], m2[4][l] ); - m1[1][l] = _mm_add_epi16( m2[1][l], m2[5][l] ); - m1[2][l] = _mm_add_epi16( m2[2][l], m2[6][l] ); - m1[3][l] = _mm_add_epi16( m2[3][l], m2[7][l] ); - m1[4][l] = _mm_sub_epi16( m2[0][l], m2[4][l] ); - m1[5][l] = _mm_sub_epi16( m2[1][l], m2[5][l] ); - m1[6][l] = _mm_sub_epi16( m2[2][l], m2[6][l] ); - m1[7][l] = _mm_sub_epi16( m2[3][l], m2[7][l] ); - - m2[0][l] = _mm_add_epi16( m1[0][l], m1[2][l] ); - m2[1][l] = _mm_add_epi16( m1[1][l], m1[3][l] ); - m2[2][l] = _mm_sub_epi16( m1[0][l], m1[2][l] ); - m2[3][l] = _mm_sub_epi16( m1[1][l], m1[3][l] ); - m2[4][l] = _mm_add_epi16( m1[4][l], m1[6][l] ); - m2[5][l] = _mm_add_epi16( m1[5][l], m1[7][l] ); - m2[6][l] = _mm_sub_epi16( m1[4][l], m1[6][l] ); - m2[7][l] = _mm_sub_epi16( m1[5][l], m1[7][l] ); - - m1[0][l] = _mm_add_epi16( m2[0][l], m2[1][l] ); - m1[1][l] = _mm_sub_epi16( m2[0][l], m2[1][l] ); - m1[2][l] = _mm_add_epi16( m2[2][l], m2[3][l] ); - m1[3][l] = _mm_sub_epi16( m2[2][l], m2[3][l] ); - m1[4][l] = _mm_add_epi16( m2[4][l], m2[5][l] ); - m1[5][l] = _mm_sub_epi16( m2[4][l], m2[5][l] ); - m1[6][l] = _mm_add_epi16( m2[6][l], m2[7][l] ); - m1[7][l] = _mm_sub_epi16( m2[6][l], m2[7][l] ); + for( int i = 0; i < 2; i++ ) + { + //vertical + m1[0][l][i] = _mm_add_epi32( m2[0][l][i], m2[4][l][i] ); + m1[1][l][i] = _mm_add_epi32( m2[1][l][i], m2[5][l][i] ); + m1[2][l][i] = _mm_add_epi32( m2[2][l][i], m2[6][l][i] ); + m1[3][l][i] = _mm_add_epi32( m2[3][l][i], m2[7][l][i] ); + m1[4][l][i] = _mm_sub_epi32( m2[0][l][i], m2[4][l][i] ); + m1[5][l][i] = _mm_sub_epi32( m2[1][l][i], m2[5][l][i] ); + m1[6][l][i] = _mm_sub_epi32( m2[2][l][i], m2[6][l][i] ); + m1[7][l][i] = _mm_sub_epi32( m2[3][l][i], m2[7][l][i] ); + + m2[0][l][i] = _mm_add_epi32( m1[0][l][i], m1[2][l][i] ); + m2[1][l][i] = _mm_add_epi32( m1[1][l][i], m1[3][l][i] ); + m2[2][l][i] = _mm_sub_epi32( m1[0][l][i], m1[2][l][i] ); + m2[3][l][i] = _mm_sub_epi32( m1[1][l][i], m1[3][l][i] ); + m2[4][l][i] = _mm_add_epi32( m1[4][l][i], m1[6][l][i] ); + m2[5][l][i] = _mm_add_epi32( m1[5][l][i], m1[7][l][i] ); + m2[6][l][i] = _mm_sub_epi32( m1[4][l][i], m1[6][l][i] ); + m2[7][l][i] = _mm_sub_epi32( m1[5][l][i], m1[7][l][i] ); + + m1[0][l][i] = _mm_add_epi32( m2[0][l][i], m2[1][l][i] ); + m1[1][l][i] = _mm_sub_epi32( m2[0][l][i], m2[1][l][i] ); + m1[2][l][i] = _mm_add_epi32( m2[2][l][i], m2[3][l][i] ); + m1[3][l][i] = _mm_sub_epi32( m2[2][l][i], m2[3][l][i] ); + m1[4][l][i] = _mm_add_epi32( m2[4][l][i], m2[5][l][i] ); + m1[5][l][i] = _mm_sub_epi32( m2[4][l][i], m2[5][l][i] ); + m1[6][l][i] = _mm_add_epi32( m2[6][l][i], m2[7][l][i] ); + m1[7][l][i] = _mm_sub_epi32( m2[6][l][i], m2[7][l][i] ); + } } - __m128i vzero = _mm_setzero_si128(); - // 4 x 8x4 blocks // 0 1 // 2 3 - if( iBitDepth >= 10 ) - { - // transpose and do horizontal in two steps - for( int l = 0; l < 2; l++ ) - { - int off = l * 4; - - // transpose 8x4 -> 4x8 - // 0 1 -> 0 and -> 2 - // 1 2 3 3 - - // transpose 8x4 -> 4x8, block 0(2) - m2[0][0] = _mm_unpacklo_epi16( m1[0 + off][0], m1[1 + off][0] ); - m2[1][0] = _mm_unpacklo_epi16( m1[2 + off][0], m1[3 + off][0] ); - m2[2][0] = _mm_unpackhi_epi16( m1[0 + off][0], m1[1 + off][0] ); - m2[3][0] = _mm_unpackhi_epi16( m1[2 + off][0], m1[3 + off][0] ); - - m1[0][0] = _mm_unpacklo_epi32( m2[0][0], m2[1][0] ); - m1[1][0] = _mm_unpackhi_epi32( m2[0][0], m2[1][0] ); - m1[2][0] = _mm_unpacklo_epi32( m2[2][0], m2[3][0] ); - m1[3][0] = _mm_unpackhi_epi32( m2[2][0], m2[3][0] ); - - m2[0][0] = _mm_unpacklo_epi64( m1[0][0], vzero ); - m2[1][0] = _mm_unpackhi_epi64( m1[0][0], vzero ); - m2[2][0] = _mm_unpacklo_epi64( m1[1][0], vzero ); - m2[3][0] = _mm_unpackhi_epi64( m1[1][0], vzero ); - m2[4][0] = _mm_unpacklo_epi64( m1[2][0], vzero ); - m2[5][0] = _mm_unpackhi_epi64( m1[2][0], vzero ); - m2[6][0] = _mm_unpacklo_epi64( m1[3][0], vzero ); - m2[7][0] = _mm_unpackhi_epi64( m1[3][0], vzero ); - - // transpose 8x4 -> 4x8, block 1(3) - m2[0 + 8][0] = _mm_unpacklo_epi16( m1[0 + off][1], m1[1 + off][1] ); - m2[1 + 8][0] = _mm_unpacklo_epi16( m1[2 + off][1], m1[3 + off][1] ); - m2[2 + 8][0] = _mm_unpackhi_epi16( m1[0 + off][1], m1[1 + off][1] ); - m2[3 + 8][0] = _mm_unpackhi_epi16( m1[2 + off][1], m1[3 + off][1] ); - - m1[0 + 8][0] = _mm_unpacklo_epi32( m2[0 + 8][0], m2[1 + 8][0] ); - m1[1 + 8][0] = _mm_unpackhi_epi32( m2[0 + 8][0], m2[1 + 8][0] ); - m1[2 + 8][0] = _mm_unpacklo_epi32( m2[2 + 8][0], m2[3 + 8][0] ); - m1[3 + 8][0] = _mm_unpackhi_epi32( m2[2 + 8][0], m2[3 + 8][0] ); - - m2[0 + 8][0] = _mm_unpacklo_epi64( m1[0 + 8][0], vzero ); - m2[1 + 8][0] = _mm_unpackhi_epi64( m1[0 + 8][0], vzero ); - m2[2 + 8][0] = _mm_unpacklo_epi64( m1[1 + 8][0], vzero ); - m2[3 + 8][0] = _mm_unpackhi_epi64( m1[1 + 8][0], vzero ); - m2[4 + 8][0] = _mm_unpacklo_epi64( m1[2 + 8][0], vzero ); - m2[5 + 8][0] = _mm_unpackhi_epi64( m1[2 + 8][0], vzero ); - m2[6 + 8][0] = _mm_unpacklo_epi64( m1[3 + 8][0], vzero ); - m2[7 + 8][0] = _mm_unpackhi_epi64( m1[3 + 8][0], vzero ); - - // horizontal - //if( iBitDepth >= 10 ) - { - __m128i n1[16]; - __m128i n2[16]; - - for( int i = 0; i < 16; i++ ) - { - n1[i] = _mm_cvtepi16_epi32( m2[i][0] ); - } - - n2[0] = _mm_add_epi32( n1[0], n1[8] ); - n2[1] = _mm_add_epi32( n1[1], n1[9] ); - n2[2] = _mm_add_epi32( n1[2], n1[10] ); - n2[3] = _mm_add_epi32( n1[3], n1[11] ); - n2[4] = _mm_add_epi32( n1[4], n1[12] ); - n2[5] = _mm_add_epi32( n1[5], n1[13] ); - n2[6] = _mm_add_epi32( n1[6], n1[14] ); - n2[7] = _mm_add_epi32( n1[7], n1[15] ); - n2[8] = _mm_sub_epi32( n1[0], n1[8] ); - n2[9] = _mm_sub_epi32( n1[1], n1[9] ); - n2[10] = _mm_sub_epi32( n1[2], n1[10] ); - n2[11] = _mm_sub_epi32( n1[3], n1[11] ); - n2[12] = _mm_sub_epi32( n1[4], n1[12] ); - n2[13] = _mm_sub_epi32( n1[5], n1[13] ); - n2[14] = _mm_sub_epi32( n1[6], n1[14] ); - n2[15] = _mm_sub_epi32( n1[7], n1[15] ); - - n1[0] = _mm_add_epi32( n2[0], n2[4] ); - n1[1] = _mm_add_epi32( n2[1], n2[5] ); - n1[2] = _mm_add_epi32( n2[2], n2[6] ); - n1[3] = _mm_add_epi32( n2[3], n2[7] ); - n1[4] = _mm_sub_epi32( n2[0], n2[4] ); - n1[5] = _mm_sub_epi32( n2[1], n2[5] ); - n1[6] = _mm_sub_epi32( n2[2], n2[6] ); - n1[7] = _mm_sub_epi32( n2[3], n2[7] ); - n1[8] = _mm_add_epi32( n2[8], n2[12] ); - n1[9] = _mm_add_epi32( n2[9], n2[13] ); - n1[10] = _mm_add_epi32( n2[10], n2[14] ); - n1[11] = _mm_add_epi32( n2[11], n2[15] ); - n1[12] = _mm_sub_epi32( n2[8], n2[12] ); - n1[13] = _mm_sub_epi32( n2[9], n2[13] ); - n1[14] = _mm_sub_epi32( n2[10], n2[14] ); - n1[15] = _mm_sub_epi32( n2[11], n2[15] ); - - n2[0] = _mm_add_epi32( n1[0], n1[2] ); - n2[1] = _mm_add_epi32( n1[1], n1[3] ); - n2[2] = _mm_sub_epi32( n1[0], n1[2] ); - n2[3] = _mm_sub_epi32( n1[1], n1[3] ); - n2[4] = _mm_add_epi32( n1[4], n1[6] ); - n2[5] = _mm_add_epi32( n1[5], n1[7] ); - n2[6] = _mm_sub_epi32( n1[4], n1[6] ); - n2[7] = _mm_sub_epi32( n1[5], n1[7] ); - n2[8] = _mm_add_epi32( n1[8], n1[10] ); - n2[9] = _mm_add_epi32( n1[9], n1[11] ); - n2[10] = _mm_sub_epi32( n1[8], n1[10] ); - n2[11] = _mm_sub_epi32( n1[9], n1[11] ); - n2[12] = _mm_add_epi32( n1[12], n1[14] ); - n2[13] = _mm_add_epi32( n1[13], n1[15] ); - n2[14] = _mm_sub_epi32( n1[12], n1[14] ); - n2[15] = _mm_sub_epi32( n1[13], n1[15] ); - - n1[0] = _mm_abs_epi32( _mm_add_epi32( n2[0], n2[1] ) ); - n1[1] = _mm_abs_epi32( _mm_sub_epi32( n2[0], n2[1] ) ); - n1[2] = _mm_abs_epi32( _mm_add_epi32( n2[2], n2[3] ) ); - n1[3] = _mm_abs_epi32( _mm_sub_epi32( n2[2], n2[3] ) ); - n1[4] = _mm_abs_epi32( _mm_add_epi32( n2[4], n2[5] ) ); - n1[5] = _mm_abs_epi32( _mm_sub_epi32( n2[4], n2[5] ) ); - n1[6] = _mm_abs_epi32( _mm_add_epi32( n2[6], n2[7] ) ); - n1[7] = _mm_abs_epi32( _mm_sub_epi32( n2[6], n2[7] ) ); - n1[8] = _mm_abs_epi32( _mm_add_epi32( n2[8], n2[9] ) ); - n1[9] = _mm_abs_epi32( _mm_sub_epi32( n2[8], n2[9] ) ); - n1[10] = _mm_abs_epi32( _mm_add_epi32( n2[10], n2[11] ) ); - n1[11] = _mm_abs_epi32( _mm_sub_epi32( n2[10], n2[11] ) ); - n1[12] = _mm_abs_epi32( _mm_add_epi32( n2[12], n2[13] ) ); - n1[13] = _mm_abs_epi32( _mm_sub_epi32( n2[12], n2[13] ) ); - n1[14] = _mm_abs_epi32( _mm_add_epi32( n2[14], n2[15] ) ); - n1[15] = _mm_abs_epi32( _mm_sub_epi32( n2[14], n2[15] ) ); - - // sum up - n1[0] = _mm_add_epi32( n1[0], n1[1] ); - n1[2] = _mm_add_epi32( n1[2], n1[3] ); - n1[4] = _mm_add_epi32( n1[4], n1[5] ); - n1[6] = _mm_add_epi32( n1[6], n1[7] ); - n1[8] = _mm_add_epi32( n1[8], n1[9] ); - n1[10] = _mm_add_epi32( n1[10], n1[11] ); - n1[12] = _mm_add_epi32( n1[12], n1[13] ); - n1[14] = _mm_add_epi32( n1[14], n1[15] ); - - n1[0] = _mm_add_epi32( n1[0], n1[2] ); - n1[4] = _mm_add_epi32( n1[4], n1[6] ); - n1[8] = _mm_add_epi32( n1[8], n1[10] ); - n1[12] = _mm_add_epi32( n1[12], n1[14] ); - - n1[0] = _mm_add_epi32( n1[0], n1[4] ); - n1[8] = _mm_add_epi32( n1[8], n1[12] ); - - n1[0] = _mm_add_epi32( n1[0], n1[8] ); - iSum = _mm_add_epi32( iSum, n1[0] ); - } - } - } - else + // transpose and do horizontal in two steps + for( int l = 0; l < 2; l++ ) { - const int off = 4; - // transpose 8x8 - // block 0 - m2[0][0] = _mm_unpacklo_epi16( m1[0][0], m1[1][0] ); - m2[1][0] = _mm_unpacklo_epi16( m1[2][0], m1[3][0] ); - m2[2][0] = _mm_unpackhi_epi16( m1[0][0], m1[1][0] ); - m2[3][0] = _mm_unpackhi_epi16( m1[2][0], m1[3][0] ); - - m1[0][0] = _mm_unpacklo_epi32( m2[0][0], m2[1][0] ); - m1[1][0] = _mm_unpackhi_epi32( m2[0][0], m2[1][0] ); - m1[2][0] = _mm_unpacklo_epi32( m2[2][0], m2[3][0] ); - m1[3][0] = _mm_unpackhi_epi32( m2[2][0], m2[3][0] ); - - // block 2 - m2[0 + off][0] = _mm_unpacklo_epi16( m1[0 + off][0], m1[1 + off][0] ); - m2[1 + off][0] = _mm_unpacklo_epi16( m1[2 + off][0], m1[3 + off][0] ); - m2[2 + off][0] = _mm_unpackhi_epi16( m1[0 + off][0], m1[1 + off][0] ); - m2[3 + off][0] = _mm_unpackhi_epi16( m1[2 + off][0], m1[3 + off][0] ); - - m1[0 + off][0] = _mm_unpacklo_epi32( m2[0 + off][0], m2[1 + off][0] ); - m1[1 + off][0] = _mm_unpackhi_epi32( m2[0 + off][0], m2[1 + off][0] ); - m1[2 + off][0] = _mm_unpacklo_epi32( m2[2 + off][0], m2[3 + off][0] ); - m1[3 + off][0] = _mm_unpackhi_epi32( m2[2 + off][0], m2[3 + off][0] ); - - m2[0][0] = _mm_unpacklo_epi64( m1[0][0], m1[0 + off][0] ); - m2[1][0] = _mm_unpackhi_epi64( m1[0][0], m1[0 + off][0] ); - m2[2][0] = _mm_unpacklo_epi64( m1[1][0], m1[1 + off][0] ); - m2[3][0] = _mm_unpackhi_epi64( m1[1][0], m1[1 + off][0] ); - m2[4][0] = _mm_unpacklo_epi64( m1[2][0], m1[2 + off][0] ); - m2[5][0] = _mm_unpackhi_epi64( m1[2][0], m1[2 + off][0] ); - m2[6][0] = _mm_unpacklo_epi64( m1[3][0], m1[3 + off][0] ); - m2[7][0] = _mm_unpackhi_epi64( m1[3][0], m1[3 + off][0] ); - - // transpose 8x8 - // block 1 - m2[0][1] = _mm_unpacklo_epi16( m1[0][1], m1[1][1] ); - m2[1][1] = _mm_unpacklo_epi16( m1[2][1], m1[3][1] ); - m2[2][1] = _mm_unpackhi_epi16( m1[0][1], m1[1][1] ); - m2[3][1] = _mm_unpackhi_epi16( m1[2][1], m1[3][1] ); - - m1[0][1] = _mm_unpacklo_epi32( m2[0][1], m2[1][1] ); - m1[1][1] = _mm_unpackhi_epi32( m2[0][1], m2[1][1] ); - m1[2][1] = _mm_unpacklo_epi32( m2[2][1], m2[3][1] ); - m1[3][1] = _mm_unpackhi_epi32( m2[2][1], m2[3][1] ); - - // block 3 - m2[0 + off][1] = _mm_unpacklo_epi16( m1[0 + off][1], m1[1 + off][1] ); - m2[1 + off][1] = _mm_unpacklo_epi16( m1[2 + off][1], m1[3 + off][1] ); - m2[2 + off][1] = _mm_unpackhi_epi16( m1[0 + off][1], m1[1 + off][1] ); - m2[3 + off][1] = _mm_unpackhi_epi16( m1[2 + off][1], m1[3 + off][1] ); - - m1[0 + off][1] = _mm_unpacklo_epi32( m2[0 + off][1], m2[1 + off][1] ); - m1[1 + off][1] = _mm_unpackhi_epi32( m2[0 + off][1], m2[1 + off][1] ); - m1[2 + off][1] = _mm_unpacklo_epi32( m2[2 + off][1], m2[3 + off][1] ); - m1[3 + off][1] = _mm_unpackhi_epi32( m2[2 + off][1], m2[3 + off][1] ); - - m2[0 + 8][0] = _mm_unpacklo_epi64( m1[0][1], m1[0 + off][1] ); - m2[1 + 8][0] = _mm_unpackhi_epi64( m1[0][1], m1[0 + off][1] ); - m2[2 + 8][0] = _mm_unpacklo_epi64( m1[1][1], m1[1 + off][1] ); - m2[3 + 8][0] = _mm_unpackhi_epi64( m1[1][1], m1[1 + off][1] ); - m2[4 + 8][0] = _mm_unpacklo_epi64( m1[2][1], m1[2 + off][1] ); - m2[5 + 8][0] = _mm_unpackhi_epi64( m1[2][1], m1[2 + off][1] ); - m2[6 + 8][0] = _mm_unpacklo_epi64( m1[3][1], m1[3 + off][1] ); - m2[7 + 8][0] = _mm_unpackhi_epi64( m1[3][1], m1[3 + off][1] ); - - // horizontal - m1[0][0] = _mm_add_epi16( m2[0][0], m2[8][0] ); - m1[1][0] = _mm_add_epi16( m2[1][0], m2[9][0] ); - m1[2][0] = _mm_add_epi16( m2[2][0], m2[10][0] ); - m1[3][0] = _mm_add_epi16( m2[3][0], m2[11][0] ); - m1[4][0] = _mm_add_epi16( m2[4][0], m2[12][0] ); - m1[5][0] = _mm_add_epi16( m2[5][0], m2[13][0] ); - m1[6][0] = _mm_add_epi16( m2[6][0], m2[14][0] ); - m1[7][0] = _mm_add_epi16( m2[7][0], m2[15][0] ); - m1[8][0] = _mm_sub_epi16( m2[0][0], m2[8][0] ); - m1[9][0] = _mm_sub_epi16( m2[1][0], m2[9][0] ); - m1[10][0] = _mm_sub_epi16( m2[2][0], m2[10][0] ); - m1[11][0] = _mm_sub_epi16( m2[3][0], m2[11][0] ); - m1[12][0] = _mm_sub_epi16( m2[4][0], m2[12][0] ); - m1[13][0] = _mm_sub_epi16( m2[5][0], m2[13][0] ); - m1[14][0] = _mm_sub_epi16( m2[6][0], m2[14][0] ); - m1[15][0] = _mm_sub_epi16( m2[7][0], m2[15][0] ); - - m2[0][0] = _mm_add_epi16( m1[0][0], m1[4][0] ); - m2[1][0] = _mm_add_epi16( m1[1][0], m1[5][0] ); - m2[2][0] = _mm_add_epi16( m1[2][0], m1[6][0] ); - m2[3][0] = _mm_add_epi16( m1[3][0], m1[7][0] ); - m2[4][0] = _mm_sub_epi16( m1[0][0], m1[4][0] ); - m2[5][0] = _mm_sub_epi16( m1[1][0], m1[5][0] ); - m2[6][0] = _mm_sub_epi16( m1[2][0], m1[6][0] ); - m2[7][0] = _mm_sub_epi16( m1[3][0], m1[7][0] ); - m2[8][0] = _mm_add_epi16( m1[8][0], m1[12][0] ); - m2[9][0] = _mm_add_epi16( m1[9][0], m1[13][0] ); - m2[10][0] = _mm_add_epi16( m1[10][0], m1[14][0] ); - m2[11][0] = _mm_add_epi16( m1[11][0], m1[15][0] ); - m2[12][0] = _mm_sub_epi16( m1[8][0], m1[12][0] ); - m2[13][0] = _mm_sub_epi16( m1[9][0], m1[13][0] ); - m2[14][0] = _mm_sub_epi16( m1[10][0], m1[14][0] ); - m2[15][0] = _mm_sub_epi16( m1[11][0], m1[15][0] ); - - m1[0][0] = _mm_add_epi16( m2[0][0], m2[2][0] ); - m1[1][0] = _mm_add_epi16( m2[1][0], m2[3][0] ); - m1[2][0] = _mm_sub_epi16( m2[0][0], m2[2][0] ); - m1[3][0] = _mm_sub_epi16( m2[1][0], m2[3][0] ); - m1[4][0] = _mm_add_epi16( m2[4][0], m2[6][0] ); - m1[5][0] = _mm_add_epi16( m2[5][0], m2[7][0] ); - m1[6][0] = _mm_sub_epi16( m2[4][0], m2[6][0] ); - m1[7][0] = _mm_sub_epi16( m2[5][0], m2[7][0] ); - m1[8][0] = _mm_add_epi16( m2[8][0], m2[10][0] ); - m1[9][0] = _mm_add_epi16( m2[9][0], m2[11][0] ); - m1[10][0] = _mm_sub_epi16( m2[8][0], m2[10][0] ); - m1[11][0] = _mm_sub_epi16( m2[9][0], m2[11][0] ); - m1[12][0] = _mm_add_epi16( m2[12][0], m2[14][0] ); - m1[13][0] = _mm_add_epi16( m2[13][0], m2[15][0] ); - m1[14][0] = _mm_sub_epi16( m2[12][0], m2[14][0] ); - m1[15][0] = _mm_sub_epi16( m2[13][0], m2[15][0] ); - - m2[0][0] = _mm_abs_epi16( _mm_add_epi16( m1[0][0], m1[1][0] ) ); - m2[1][0] = _mm_abs_epi16( _mm_sub_epi16( m1[0][0], m1[1][0] ) ); - m2[2][0] = _mm_abs_epi16( _mm_add_epi16( m1[2][0], m1[3][0] ) ); - m2[3][0] = _mm_abs_epi16( _mm_sub_epi16( m1[2][0], m1[3][0] ) ); - m2[4][0] = _mm_abs_epi16( _mm_add_epi16( m1[4][0], m1[5][0] ) ); - m2[5][0] = _mm_abs_epi16( _mm_sub_epi16( m1[4][0], m1[5][0] ) ); - m2[6][0] = _mm_abs_epi16( _mm_add_epi16( m1[6][0], m1[7][0] ) ); - m2[7][0] = _mm_abs_epi16( _mm_sub_epi16( m1[6][0], m1[7][0] ) ); - m2[8][0] = _mm_abs_epi16( _mm_add_epi16( m1[8][0], m1[9][0] ) ); - m2[9][0] = _mm_abs_epi16( _mm_sub_epi16( m1[8][0], m1[9][0] ) ); - m2[10][0] = _mm_abs_epi16( _mm_add_epi16( m1[10][0], m1[11][0] ) ); - m2[11][0] = _mm_abs_epi16( _mm_sub_epi16( m1[10][0], m1[11][0] ) ); - m2[12][0] = _mm_abs_epi16( _mm_add_epi16( m1[12][0], m1[13][0] ) ); - m2[13][0] = _mm_abs_epi16( _mm_sub_epi16( m1[12][0], m1[13][0] ) ); - m2[14][0] = _mm_abs_epi16( _mm_add_epi16( m1[14][0], m1[15][0] ) ); - m2[15][0] = _mm_abs_epi16( _mm_sub_epi16( m1[14][0], m1[15][0] ) ); - - __m128i ma1, ma2; + int off = l * 4; + + __m128i n1[16]; + __m128i n2[16]; + + m2[0][0][0] = _mm_unpacklo_epi32( m1[0 + off][0][0], m1[1 + off][0][0] ); + m2[1][0][0] = _mm_unpacklo_epi32( m1[2 + off][0][0], m1[3 + off][0][0] ); + m2[2][0][0] = _mm_unpackhi_epi32( m1[0 + off][0][0], m1[1 + off][0][0] ); + m2[3][0][0] = _mm_unpackhi_epi32( m1[2 + off][0][0], m1[3 + off][0][0] ); + + m2[0][0][1] = _mm_unpacklo_epi32( m1[0 + off][0][1], m1[1 + off][0][1] ); + m2[1][0][1] = _mm_unpacklo_epi32( m1[2 + off][0][1], m1[3 + off][0][1] ); + m2[2][0][1] = _mm_unpackhi_epi32( m1[0 + off][0][1], m1[1 + off][0][1] ); + m2[3][0][1] = _mm_unpackhi_epi32( m1[2 + off][0][1], m1[3 + off][0][1] ); + + n1[0] = _mm_unpacklo_epi64( m2[0][0][0], m2[1][0][0] ); + n1[1] = _mm_unpackhi_epi64( m2[0][0][0], m2[1][0][0] ); + n1[2] = _mm_unpacklo_epi64( m2[2][0][0], m2[3][0][0] ); + n1[3] = _mm_unpackhi_epi64( m2[2][0][0], m2[3][0][0] ); + n1[4] = _mm_unpacklo_epi64( m2[0][0][1], m2[1][0][1] ); + n1[5] = _mm_unpackhi_epi64( m2[0][0][1], m2[1][0][1] ); + n1[6] = _mm_unpacklo_epi64( m2[2][0][1], m2[3][0][1] ); + n1[7] = _mm_unpackhi_epi64( m2[2][0][1], m2[3][0][1] ); + + // transpose 8x4 -> 4x8, block 1(3) + m2[8+0][0][0] = _mm_unpacklo_epi32( m1[0 + off][1][0], m1[1 + off][1][0] ); + m2[8+1][0][0] = _mm_unpacklo_epi32( m1[2 + off][1][0], m1[3 + off][1][0] ); + m2[8+2][0][0] = _mm_unpackhi_epi32( m1[0 + off][1][0], m1[1 + off][1][0] ); + m2[8+3][0][0] = _mm_unpackhi_epi32( m1[2 + off][1][0], m1[3 + off][1][0] ); + + m2[8+0][0][1] = _mm_unpacklo_epi32( m1[0 + off][1][1], m1[1 + off][1][1] ); + m2[8+1][0][1] = _mm_unpacklo_epi32( m1[2 + off][1][1], m1[3 + off][1][1] ); + m2[8+2][0][1] = _mm_unpackhi_epi32( m1[0 + off][1][1], m1[1 + off][1][1] ); + m2[8+3][0][1] = _mm_unpackhi_epi32( m1[2 + off][1][1], m1[3 + off][1][1] ); + + n1[8+0] = _mm_unpacklo_epi64( m2[8+0][0][0], m2[8+1][0][0] ); + n1[8+1] = _mm_unpackhi_epi64( m2[8+0][0][0], m2[8+1][0][0] ); + n1[8+2] = _mm_unpacklo_epi64( m2[8+2][0][0], m2[8+3][0][0] ); + n1[8+3] = _mm_unpackhi_epi64( m2[8+2][0][0], m2[8+3][0][0] ); + n1[8+4] = _mm_unpacklo_epi64( m2[8+0][0][1], m2[8+1][0][1] ); + n1[8+5] = _mm_unpackhi_epi64( m2[8+0][0][1], m2[8+1][0][1] ); + n1[8+6] = _mm_unpacklo_epi64( m2[8+2][0][1], m2[8+3][0][1] ); + n1[8+7] = _mm_unpackhi_epi64( m2[8+2][0][1], m2[8+3][0][1] ); + + n2[0] = _mm_add_epi32( n1[0], n1[8] ); + n2[1] = _mm_add_epi32( n1[1], n1[9] ); + n2[2] = _mm_add_epi32( n1[2], n1[10] ); + n2[3] = _mm_add_epi32( n1[3], n1[11] ); + n2[4] = _mm_add_epi32( n1[4], n1[12] ); + n2[5] = _mm_add_epi32( n1[5], n1[13] ); + n2[6] = _mm_add_epi32( n1[6], n1[14] ); + n2[7] = _mm_add_epi32( n1[7], n1[15] ); + n2[8] = _mm_sub_epi32( n1[0], n1[8] ); + n2[9] = _mm_sub_epi32( n1[1], n1[9] ); + n2[10] = _mm_sub_epi32( n1[2], n1[10] ); + n2[11] = _mm_sub_epi32( n1[3], n1[11] ); + n2[12] = _mm_sub_epi32( n1[4], n1[12] ); + n2[13] = _mm_sub_epi32( n1[5], n1[13] ); + n2[14] = _mm_sub_epi32( n1[6], n1[14] ); + n2[15] = _mm_sub_epi32( n1[7], n1[15] ); + + n1[0] = _mm_add_epi32( n2[0], n2[4] ); + n1[1] = _mm_add_epi32( n2[1], n2[5] ); + n1[2] = _mm_add_epi32( n2[2], n2[6] ); + n1[3] = _mm_add_epi32( n2[3], n2[7] ); + n1[4] = _mm_sub_epi32( n2[0], n2[4] ); + n1[5] = _mm_sub_epi32( n2[1], n2[5] ); + n1[6] = _mm_sub_epi32( n2[2], n2[6] ); + n1[7] = _mm_sub_epi32( n2[3], n2[7] ); + n1[8] = _mm_add_epi32( n2[8], n2[12] ); + n1[9] = _mm_add_epi32( n2[9], n2[13] ); + n1[10] = _mm_add_epi32( n2[10], n2[14] ); + n1[11] = _mm_add_epi32( n2[11], n2[15] ); + n1[12] = _mm_sub_epi32( n2[8], n2[12] ); + n1[13] = _mm_sub_epi32( n2[9], n2[13] ); + n1[14] = _mm_sub_epi32( n2[10], n2[14] ); + n1[15] = _mm_sub_epi32( n2[11], n2[15] ); + + n2[0] = _mm_add_epi32( n1[0], n1[2] ); + n2[1] = _mm_add_epi32( n1[1], n1[3] ); + n2[2] = _mm_sub_epi32( n1[0], n1[2] ); + n2[3] = _mm_sub_epi32( n1[1], n1[3] ); + n2[4] = _mm_add_epi32( n1[4], n1[6] ); + n2[5] = _mm_add_epi32( n1[5], n1[7] ); + n2[6] = _mm_sub_epi32( n1[4], n1[6] ); + n2[7] = _mm_sub_epi32( n1[5], n1[7] ); + n2[8] = _mm_add_epi32( n1[8], n1[10] ); + n2[9] = _mm_add_epi32( n1[9], n1[11] ); + n2[10] = _mm_sub_epi32( n1[8], n1[10] ); + n2[11] = _mm_sub_epi32( n1[9], n1[11] ); + n2[12] = _mm_add_epi32( n1[12], n1[14] ); + n2[13] = _mm_add_epi32( n1[13], n1[15] ); + n2[14] = _mm_sub_epi32( n1[12], n1[14] ); + n2[15] = _mm_sub_epi32( n1[13], n1[15] ); + + n1[0] = _mm_abs_epi32( _mm_add_epi32( n2[0], n2[1] ) ); + n1[1] = _mm_abs_epi32( _mm_sub_epi32( n2[0], n2[1] ) ); + n1[2] = _mm_abs_epi32( _mm_add_epi32( n2[2], n2[3] ) ); + n1[3] = _mm_abs_epi32( _mm_sub_epi32( n2[2], n2[3] ) ); + n1[4] = _mm_abs_epi32( _mm_add_epi32( n2[4], n2[5] ) ); + n1[5] = _mm_abs_epi32( _mm_sub_epi32( n2[4], n2[5] ) ); + n1[6] = _mm_abs_epi32( _mm_add_epi32( n2[6], n2[7] ) ); + n1[7] = _mm_abs_epi32( _mm_sub_epi32( n2[6], n2[7] ) ); + n1[8] = _mm_abs_epi32( _mm_add_epi32( n2[8], n2[9] ) ); + n1[9] = _mm_abs_epi32( _mm_sub_epi32( n2[8], n2[9] ) ); + n1[10] = _mm_abs_epi32( _mm_add_epi32( n2[10], n2[11] ) ); + n1[11] = _mm_abs_epi32( _mm_sub_epi32( n2[10], n2[11] ) ); + n1[12] = _mm_abs_epi32( _mm_add_epi32( n2[12], n2[13] ) ); + n1[13] = _mm_abs_epi32( _mm_sub_epi32( n2[12], n2[13] ) ); + n1[14] = _mm_abs_epi32( _mm_add_epi32( n2[14], n2[15] ) ); + n1[15] = _mm_abs_epi32( _mm_sub_epi32( n2[14], n2[15] ) ); - for( int i = 0; i < 16; i++ ) - { - ma1 = _mm_unpacklo_epi16( m2[i][0], vzero ); - ma2 = _mm_unpackhi_epi16( m2[i][0], vzero ); - iSum = _mm_add_epi32( iSum, _mm_add_epi32( ma1, ma2 ) ); - } + // sum up + n1[0] = _mm_add_epi32( n1[0], n1[1] ); + n1[2] = _mm_add_epi32( n1[2], n1[3] ); + n1[4] = _mm_add_epi32( n1[4], n1[5] ); + n1[6] = _mm_add_epi32( n1[6], n1[7] ); + n1[8] = _mm_add_epi32( n1[8], n1[9] ); + n1[10] = _mm_add_epi32( n1[10], n1[11] ); + n1[12] = _mm_add_epi32( n1[12], n1[13] ); + n1[14] = _mm_add_epi32( n1[14], n1[15] ); + + n1[0] = _mm_add_epi32( n1[0], n1[2] ); + n1[4] = _mm_add_epi32( n1[4], n1[6] ); + n1[8] = _mm_add_epi32( n1[8], n1[10] ); + n1[12] = _mm_add_epi32( n1[12], n1[14] ); + + n1[0] = _mm_add_epi32( n1[0], n1[4] ); + n1[8] = _mm_add_epi32( n1[8], n1[12] ); + + n1[0] = _mm_add_epi32( n1[0], n1[8] ); + iSum = _mm_add_epi32( iSum, n1[0] ); } iSum = _mm_hadd_epi32( iSum, iSum ); @@ -1081,223 +875,174 @@ static uint32_t xCalcHAD16x8_SSE( const Torg *piOrg, const Tcur *piCur, const in //working up to 12-bit -template< typename Torg, typename Tcur/*, bool bHorDownsampling*/ > static uint32_t xCalcHAD8x16_SSE( const Torg *piOrg, const Tcur *piCur, const int iStrideOrg, const int iStrideCur, const int iBitDepth ) { - __m128i m1[16], m2[16]; + __m128i m1[2][16], m2[2][16]; __m128i iSum = _mm_setzero_si128(); for( int k = 0; k < 16; k++ ) { - __m128i r0 = (sizeof( Torg ) > 1) ? (_mm_loadu_si128( (__m128i*)piOrg )) : (_mm_unpacklo_epi8( _mm_loadl_epi64( (const __m128i*)piOrg ), _mm_setzero_si128() )); - __m128i r1 = (sizeof( Tcur ) > 1) ? (_mm_lddqu_si128( (__m128i*)piCur )) : (_mm_unpacklo_epi8( _mm_loadl_epi64( (const __m128i*)piCur ), _mm_setzero_si128() )); // th _mm_loadu_si128( (__m128i*)piCur ) - m1[k] = _mm_sub_epi16( r0, r1 ); + __m128i r0 =_mm_loadu_si128( (__m128i*)piOrg ); + __m128i r1 =_mm_lddqu_si128( (__m128i*)piCur ); + m1[0][k] = _mm_sub_epi16( r0, r1 ); + m1[1][k] = _mm_cvtepi16_epi32( _mm_srli_si128( m1[0][k], 8 ) ); + m1[0][k] = _mm_cvtepi16_epi32( m1[0][k] ); piCur += iStrideCur; piOrg += iStrideOrg; } - // vertical - m2[0] = _mm_add_epi16( m1[0], m1[8] ); - m2[1] = _mm_add_epi16( m1[1], m1[9] ); - m2[2] = _mm_add_epi16( m1[2], m1[10] ); - m2[3] = _mm_add_epi16( m1[3], m1[11] ); - m2[4] = _mm_add_epi16( m1[4], m1[12] ); - m2[5] = _mm_add_epi16( m1[5], m1[13] ); - m2[6] = _mm_add_epi16( m1[6], m1[14] ); - m2[7] = _mm_add_epi16( m1[7], m1[15] ); - m2[8] = _mm_sub_epi16( m1[0], m1[8] ); - m2[9] = _mm_sub_epi16( m1[1], m1[9] ); - m2[10] = _mm_sub_epi16( m1[2], m1[10] ); - m2[11] = _mm_sub_epi16( m1[3], m1[11] ); - m2[12] = _mm_sub_epi16( m1[4], m1[12] ); - m2[13] = _mm_sub_epi16( m1[5], m1[13] ); - m2[14] = _mm_sub_epi16( m1[6], m1[14] ); - m2[15] = _mm_sub_epi16( m1[7], m1[15] ); - - m1[0] = _mm_add_epi16( m2[0], m2[4] ); - m1[1] = _mm_add_epi16( m2[1], m2[5] ); - m1[2] = _mm_add_epi16( m2[2], m2[6] ); - m1[3] = _mm_add_epi16( m2[3], m2[7] ); - m1[4] = _mm_sub_epi16( m2[0], m2[4] ); - m1[5] = _mm_sub_epi16( m2[1], m2[5] ); - m1[6] = _mm_sub_epi16( m2[2], m2[6] ); - m1[7] = _mm_sub_epi16( m2[3], m2[7] ); - m1[8] = _mm_add_epi16( m2[8], m2[12] ); - m1[9] = _mm_add_epi16( m2[9], m2[13] ); - m1[10] = _mm_add_epi16( m2[10], m2[14] ); - m1[11] = _mm_add_epi16( m2[11], m2[15] ); - m1[12] = _mm_sub_epi16( m2[8], m2[12] ); - m1[13] = _mm_sub_epi16( m2[9], m2[13] ); - m1[14] = _mm_sub_epi16( m2[10], m2[14] ); - m1[15] = _mm_sub_epi16( m2[11], m2[15] ); + for( int i = 0; i < 2; i++ ) + { + // vertical + m2[i][ 0] = _mm_add_epi32( m1[i][ 0], m1[i][ 8] ); + m2[i][ 1] = _mm_add_epi32( m1[i][ 1], m1[i][ 9] ); + m2[i][ 2] = _mm_add_epi32( m1[i][ 2], m1[i][10] ); + m2[i][ 3] = _mm_add_epi32( m1[i][ 3], m1[i][11] ); + m2[i][ 4] = _mm_add_epi32( m1[i][ 4], m1[i][12] ); + m2[i][ 5] = _mm_add_epi32( m1[i][ 5], m1[i][13] ); + m2[i][ 6] = _mm_add_epi32( m1[i][ 6], m1[i][14] ); + m2[i][ 7] = _mm_add_epi32( m1[i][ 7], m1[i][15] ); + m2[i][ 8] = _mm_sub_epi32( m1[i][ 0], m1[i][ 8] ); + m2[i][ 9] = _mm_sub_epi32( m1[i][ 1], m1[i][ 9] ); + m2[i][10] = _mm_sub_epi32( m1[i][ 2], m1[i][10] ); + m2[i][11] = _mm_sub_epi32( m1[i][ 3], m1[i][11] ); + m2[i][12] = _mm_sub_epi32( m1[i][ 4], m1[i][12] ); + m2[i][13] = _mm_sub_epi32( m1[i][ 5], m1[i][13] ); + m2[i][14] = _mm_sub_epi32( m1[i][ 6], m1[i][14] ); + m2[i][15] = _mm_sub_epi32( m1[i][ 7], m1[i][15] ); + + m1[i][ 0] = _mm_add_epi32( m2[i][ 0], m2[i][ 4] ); + m1[i][ 1] = _mm_add_epi32( m2[i][ 1], m2[i][ 5] ); + m1[i][ 2] = _mm_add_epi32( m2[i][ 2], m2[i][ 6] ); + m1[i][ 3] = _mm_add_epi32( m2[i][ 3], m2[i][ 7] ); + m1[i][ 4] = _mm_sub_epi32( m2[i][ 0], m2[i][ 4] ); + m1[i][ 5] = _mm_sub_epi32( m2[i][ 1], m2[i][ 5] ); + m1[i][ 6] = _mm_sub_epi32( m2[i][ 2], m2[i][ 6] ); + m1[i][ 7] = _mm_sub_epi32( m2[i][ 3], m2[i][ 7] ); + m1[i][ 8] = _mm_add_epi32( m2[i][ 8], m2[i][12] ); + m1[i][ 9] = _mm_add_epi32( m2[i][ 9], m2[i][13] ); + m1[i][10] = _mm_add_epi32( m2[i][10], m2[i][14] ); + m1[i][11] = _mm_add_epi32( m2[i][11], m2[i][15] ); + m1[i][12] = _mm_sub_epi32( m2[i][ 8], m2[i][12] ); + m1[i][13] = _mm_sub_epi32( m2[i][ 9], m2[i][13] ); + m1[i][14] = _mm_sub_epi32( m2[i][10], m2[i][14] ); + m1[i][15] = _mm_sub_epi32( m2[i][11], m2[i][15] ); + + m2[i][ 0] = _mm_add_epi32( m1[i][ 0], m1[i][ 2] ); + m2[i][ 1] = _mm_add_epi32( m1[i][ 1], m1[i][ 3] ); + m2[i][ 2] = _mm_sub_epi32( m1[i][ 0], m1[i][ 2] ); + m2[i][ 3] = _mm_sub_epi32( m1[i][ 1], m1[i][ 3] ); + m2[i][ 4] = _mm_add_epi32( m1[i][ 4], m1[i][ 6] ); + m2[i][ 5] = _mm_add_epi32( m1[i][ 5], m1[i][ 7] ); + m2[i][ 6] = _mm_sub_epi32( m1[i][ 4], m1[i][ 6] ); + m2[i][ 7] = _mm_sub_epi32( m1[i][ 5], m1[i][ 7] ); + m2[i][ 8] = _mm_add_epi32( m1[i][ 8], m1[i][10] ); + m2[i][ 9] = _mm_add_epi32( m1[i][ 9], m1[i][11] ); + m2[i][10] = _mm_sub_epi32( m1[i][ 8], m1[i][10] ); + m2[i][11] = _mm_sub_epi32( m1[i][ 9], m1[i][11] ); + m2[i][12] = _mm_add_epi32( m1[i][12], m1[i][14] ); + m2[i][13] = _mm_add_epi32( m1[i][13], m1[i][15] ); + m2[i][14] = _mm_sub_epi32( m1[i][12], m1[i][14] ); + m2[i][15] = _mm_sub_epi32( m1[i][13], m1[i][15] ); + + m1[i][ 0] = _mm_add_epi32( m2[i][ 0], m2[i][ 1] ); + m1[i][ 1] = _mm_sub_epi32( m2[i][ 0], m2[i][ 1] ); + m1[i][ 2] = _mm_add_epi32( m2[i][ 2], m2[i][ 3] ); + m1[i][ 3] = _mm_sub_epi32( m2[i][ 2], m2[i][ 3] ); + m1[i][ 4] = _mm_add_epi32( m2[i][ 4], m2[i][ 5] ); + m1[i][ 5] = _mm_sub_epi32( m2[i][ 4], m2[i][ 5] ); + m1[i][ 6] = _mm_add_epi32( m2[i][ 6], m2[i][ 7] ); + m1[i][ 7] = _mm_sub_epi32( m2[i][ 6], m2[i][ 7] ); + m1[i][ 8] = _mm_add_epi32( m2[i][ 8], m2[i][ 9] ); + m1[i][ 9] = _mm_sub_epi32( m2[i][ 8], m2[i][ 9] ); + m1[i][10] = _mm_add_epi32( m2[i][10], m2[i][11] ); + m1[i][11] = _mm_sub_epi32( m2[i][10], m2[i][11] ); + m1[i][12] = _mm_add_epi32( m2[i][12], m2[i][13] ); + m1[i][13] = _mm_sub_epi32( m2[i][12], m2[i][13] ); + m1[i][14] = _mm_add_epi32( m2[i][14], m2[i][15] ); + m1[i][15] = _mm_sub_epi32( m2[i][14], m2[i][15] ); + } - m2[0] = _mm_add_epi16( m1[0], m1[2] ); - m2[1] = _mm_add_epi16( m1[1], m1[3] ); - m2[2] = _mm_sub_epi16( m1[0], m1[2] ); - m2[3] = _mm_sub_epi16( m1[1], m1[3] ); - m2[4] = _mm_add_epi16( m1[4], m1[6] ); - m2[5] = _mm_add_epi16( m1[5], m1[7] ); - m2[6] = _mm_sub_epi16( m1[4], m1[6] ); - m2[7] = _mm_sub_epi16( m1[5], m1[7] ); - m2[8] = _mm_add_epi16( m1[8], m1[10] ); - m2[9] = _mm_add_epi16( m1[9], m1[11] ); - m2[10] = _mm_sub_epi16( m1[8], m1[10] ); - m2[11] = _mm_sub_epi16( m1[9], m1[11] ); - m2[12] = _mm_add_epi16( m1[12], m1[14] ); - m2[13] = _mm_add_epi16( m1[13], m1[15] ); - m2[14] = _mm_sub_epi16( m1[12], m1[14] ); - m2[15] = _mm_sub_epi16( m1[13], m1[15] ); - - m1[ 0] = _mm_add_epi16( m2[0], m2[1] ); - m1[ 1] = _mm_sub_epi16( m2[0], m2[1] ); - m1[ 2] = _mm_add_epi16( m2[2], m2[3] ); - m1[ 3] = _mm_sub_epi16( m2[2], m2[3] ); - m1[ 4] = _mm_add_epi16( m2[4], m2[5] ); - m1[ 5] = _mm_sub_epi16( m2[4], m2[5] ); - m1[ 6] = _mm_add_epi16( m2[6], m2[7] ); - m1[ 7] = _mm_sub_epi16( m2[6], m2[7] ); - m1[ 8] = _mm_add_epi16( m2[8], m2[9] ); - m1[ 9] = _mm_sub_epi16( m2[8], m2[9] ); - m1[10] = _mm_add_epi16( m2[10], m2[11] ); - m1[11] = _mm_sub_epi16( m2[10], m2[11] ); - m1[12] = _mm_add_epi16( m2[12], m2[13] ); - m1[13] = _mm_sub_epi16( m2[12], m2[13] ); - m1[14] = _mm_add_epi16( m2[14], m2[15] ); - m1[15] = _mm_sub_epi16( m2[14], m2[15] ); + // process horizontal in two steps ( 2 x 8x8 blocks ) + for( int l = 0; l < 4; l++ ) + { + int off = l * 4; - // process horizontal in two steps ( 2 x 8x8 blocks ) + for( int i = 0; i < 2; i++ ) + { + // transpose 4x4 + m2[i][0 + off] = _mm_unpacklo_epi32( m1[i][0 + off], m1[i][1 + off] ); + m2[i][1 + off] = _mm_unpackhi_epi32( m1[i][0 + off], m1[i][1 + off] ); + m2[i][2 + off] = _mm_unpacklo_epi32( m1[i][2 + off], m1[i][3 + off] ); + m2[i][3 + off] = _mm_unpackhi_epi32( m1[i][2 + off], m1[i][3 + off] ); + + m1[i][0 + off] = _mm_unpacklo_epi64( m2[i][0 + off], m2[i][2 + off] ); + m1[i][1 + off] = _mm_unpackhi_epi64( m2[i][0 + off], m2[i][2 + off] ); + m1[i][2 + off] = _mm_unpacklo_epi64( m2[i][1 + off], m2[i][3 + off] ); + m1[i][3 + off] = _mm_unpackhi_epi64( m2[i][1 + off], m2[i][3 + off] ); + } + } for( int l = 0; l < 2; l++ ) { int off = l * 8; - // transpose 8x8 - // blocks 0,1 and 2,3 - m2[0] = _mm_unpacklo_epi16( m1[0 + off], m1[1 + off] ); - m2[1] = _mm_unpacklo_epi16( m1[2 + off], m1[3 + off] ); - m2[2] = _mm_unpacklo_epi16( m1[4 + off], m1[5 + off] ); - m2[3] = _mm_unpacklo_epi16( m1[6 + off], m1[7 + off] ); - - m2[0 + 4] = _mm_unpackhi_epi16( m1[0 + off], m1[1 + off] ); - m2[1 + 4] = _mm_unpackhi_epi16( m1[2 + off], m1[3 + off] ); - m2[2 + 4] = _mm_unpackhi_epi16( m1[4 + off], m1[5 + off] ); - m2[3 + 4] = _mm_unpackhi_epi16( m1[6 + off], m1[7 + off] ); + __m128i n1[2][8]; + __m128i n2[2][8]; - m1[0] = _mm_unpacklo_epi32( m2[0], m2[1] ); - m1[1] = _mm_unpackhi_epi32( m2[0], m2[1] ); - m1[2] = _mm_unpacklo_epi32( m2[2], m2[3] ); - m1[3] = _mm_unpackhi_epi32( m2[2], m2[3] ); - - m2[0] = _mm_unpacklo_epi64( m1[0], m1[2] ); - m2[1] = _mm_unpackhi_epi64( m1[0], m1[2] ); - m2[2] = _mm_unpacklo_epi64( m1[1], m1[3] ); - m2[3] = _mm_unpackhi_epi64( m1[1], m1[3] ); - - m1[0 + 4] = _mm_unpacklo_epi32( m2[0 + 4], m2[1 + 4] ); - m1[1 + 4] = _mm_unpackhi_epi32( m2[0 + 4], m2[1 + 4] ); - m1[2 + 4] = _mm_unpacklo_epi32( m2[2 + 4], m2[3 + 4] ); - m1[3 + 4] = _mm_unpackhi_epi32( m2[2 + 4], m2[3 + 4] ); - - m2[0 + 4] = _mm_unpacklo_epi64( m1[0 + 4], m1[2 + 4] ); - m2[1 + 4] = _mm_unpackhi_epi64( m1[0 + 4], m1[2 + 4] ); - m2[2 + 4] = _mm_unpacklo_epi64( m1[1 + 4], m1[3 + 4] ); - m2[3 + 4] = _mm_unpackhi_epi64( m1[1 + 4], m1[3 + 4] ); - - // horizontal calculation - if( iBitDepth >= 10 ) + for( int i = 0; i < 8; i++ ) { - __m128i n1[8][2]; - __m128i n2[8][2]; + int ii = i % 4; + int ij = i >> 2; - for( int i = 0; i < 8; i++ ) - { - n2[i][0] = _mm_cvtepi16_epi32( m2[i] ); - n2[i][1] = _mm_cvtepi16_epi32( _mm_shuffle_epi32( m2[i], 0xEE ) ); - } - - for( int i = 0; i < 2; i++ ) - { - n1[0][i] = _mm_add_epi32( n2[0][i], n2[4][i] ); - n1[1][i] = _mm_add_epi32( n2[1][i], n2[5][i] ); - n1[2][i] = _mm_add_epi32( n2[2][i], n2[6][i] ); - n1[3][i] = _mm_add_epi32( n2[3][i], n2[7][i] ); - n1[4][i] = _mm_sub_epi32( n2[0][i], n2[4][i] ); - n1[5][i] = _mm_sub_epi32( n2[1][i], n2[5][i] ); - n1[6][i] = _mm_sub_epi32( n2[2][i], n2[6][i] ); - n1[7][i] = _mm_sub_epi32( n2[3][i], n2[7][i] ); - - n2[0][i] = _mm_add_epi32( n1[0][i], n1[2][i] ); - n2[1][i] = _mm_add_epi32( n1[1][i], n1[3][i] ); - n2[2][i] = _mm_sub_epi32( n1[0][i], n1[2][i] ); - n2[3][i] = _mm_sub_epi32( n1[1][i], n1[3][i] ); - n2[4][i] = _mm_add_epi32( n1[4][i], n1[6][i] ); - n2[5][i] = _mm_add_epi32( n1[5][i], n1[7][i] ); - n2[6][i] = _mm_sub_epi32( n1[4][i], n1[6][i] ); - n2[7][i] = _mm_sub_epi32( n1[5][i], n1[7][i] ); - - n1[0][i] = _mm_abs_epi32( _mm_add_epi32( n2[0][i], n2[1][i] ) ); - n1[1][i] = _mm_abs_epi32( _mm_sub_epi32( n2[0][i], n2[1][i] ) ); - n1[2][i] = _mm_abs_epi32( _mm_add_epi32( n2[2][i], n2[3][i] ) ); - n1[3][i] = _mm_abs_epi32( _mm_sub_epi32( n2[2][i], n2[3][i] ) ); - n1[4][i] = _mm_abs_epi32( _mm_add_epi32( n2[4][i], n2[5][i] ) ); - n1[5][i] = _mm_abs_epi32( _mm_sub_epi32( n2[4][i], n2[5][i] ) ); - n1[6][i] = _mm_abs_epi32( _mm_add_epi32( n2[6][i], n2[7][i] ) ); - n1[7][i] = _mm_abs_epi32( _mm_sub_epi32( n2[6][i], n2[7][i] ) ); - } - for( int i = 0; i < 8; i++ ) - { - m1[i] = _mm_add_epi32( n1[i][0], n1[i][1] ); - } + n2[0][i] = m1[ij][off + ii ]; + n2[1][i] = m1[ij][off + ii + 4]; } - else + + for( int i = 0; i < 2; i++ ) { - m1[0] = _mm_add_epi16( m2[0], m2[4] ); - m1[1] = _mm_add_epi16( m2[1], m2[5] ); - m1[2] = _mm_add_epi16( m2[2], m2[6] ); - m1[3] = _mm_add_epi16( m2[3], m2[7] ); - m1[4] = _mm_sub_epi16( m2[0], m2[4] ); - m1[5] = _mm_sub_epi16( m2[1], m2[5] ); - m1[6] = _mm_sub_epi16( m2[2], m2[6] ); - m1[7] = _mm_sub_epi16( m2[3], m2[7] ); - - m2[0] = _mm_add_epi16( m1[0], m1[2] ); - m2[1] = _mm_add_epi16( m1[1], m1[3] ); - m2[2] = _mm_sub_epi16( m1[0], m1[2] ); - m2[3] = _mm_sub_epi16( m1[1], m1[3] ); - m2[4] = _mm_add_epi16( m1[4], m1[6] ); - m2[5] = _mm_add_epi16( m1[5], m1[7] ); - m2[6] = _mm_sub_epi16( m1[4], m1[6] ); - m2[7] = _mm_sub_epi16( m1[5], m1[7] ); - - m1[0] = _mm_abs_epi16( _mm_add_epi16( m2[0], m2[1] ) ); - m1[1] = _mm_abs_epi16( _mm_sub_epi16( m2[0], m2[1] ) ); - m1[2] = _mm_abs_epi16( _mm_add_epi16( m2[2], m2[3] ) ); - m1[3] = _mm_abs_epi16( _mm_sub_epi16( m2[2], m2[3] ) ); - m1[4] = _mm_abs_epi16( _mm_add_epi16( m2[4], m2[5] ) ); - m1[5] = _mm_abs_epi16( _mm_sub_epi16( m2[4], m2[5] ) ); - m1[6] = _mm_abs_epi16( _mm_add_epi16( m2[6], m2[7] ) ); - m1[7] = _mm_abs_epi16( _mm_sub_epi16( m2[6], m2[7] ) ); - - __m128i ma1, ma2; - __m128i vzero = _mm_setzero_si128(); + n1[i][0] = _mm_add_epi32( n2[i][0], n2[i][4] ); + n1[i][1] = _mm_add_epi32( n2[i][1], n2[i][5] ); + n1[i][2] = _mm_add_epi32( n2[i][2], n2[i][6] ); + n1[i][3] = _mm_add_epi32( n2[i][3], n2[i][7] ); + n1[i][4] = _mm_sub_epi32( n2[i][0], n2[i][4] ); + n1[i][5] = _mm_sub_epi32( n2[i][1], n2[i][5] ); + n1[i][6] = _mm_sub_epi32( n2[i][2], n2[i][6] ); + n1[i][7] = _mm_sub_epi32( n2[i][3], n2[i][7] ); + + n2[i][0] = _mm_add_epi32( n1[i][0], n1[i][2] ); + n2[i][1] = _mm_add_epi32( n1[i][1], n1[i][3] ); + n2[i][2] = _mm_sub_epi32( n1[i][0], n1[i][2] ); + n2[i][3] = _mm_sub_epi32( n1[i][1], n1[i][3] ); + n2[i][4] = _mm_add_epi32( n1[i][4], n1[i][6] ); + n2[i][5] = _mm_add_epi32( n1[i][5], n1[i][7] ); + n2[i][6] = _mm_sub_epi32( n1[i][4], n1[i][6] ); + n2[i][7] = _mm_sub_epi32( n1[i][5], n1[i][7] ); + + n1[i][0] = _mm_abs_epi32( _mm_add_epi32( n2[i][0], n2[i][1] ) ); + n1[i][1] = _mm_abs_epi32( _mm_sub_epi32( n2[i][0], n2[i][1] ) ); + n1[i][2] = _mm_abs_epi32( _mm_add_epi32( n2[i][2], n2[i][3] ) ); + n1[i][3] = _mm_abs_epi32( _mm_sub_epi32( n2[i][2], n2[i][3] ) ); + n1[i][4] = _mm_abs_epi32( _mm_add_epi32( n2[i][4], n2[i][5] ) ); + n1[i][5] = _mm_abs_epi32( _mm_sub_epi32( n2[i][4], n2[i][5] ) ); + n1[i][6] = _mm_abs_epi32( _mm_add_epi32( n2[i][6], n2[i][7] ) ); + n1[i][7] = _mm_abs_epi32( _mm_sub_epi32( n2[i][6], n2[i][7] ) ); + } - for( int i = 0; i < 8; i++ ) - { - ma1 = _mm_unpacklo_epi16( m1[i], vzero ); - ma2 = _mm_unpackhi_epi16( m1[i], vzero ); - m1[i] = _mm_add_epi32( ma1, ma2 ); - } + for( int i = 0; i < 8; i++ ) + { + n2[0][i] = _mm_add_epi32( n1[0][i], n1[1][i] ); } - m1[0] = _mm_add_epi32( m1[0], m1[1] ); - m1[2] = _mm_add_epi32( m1[2], m1[3] ); - m1[4] = _mm_add_epi32( m1[4], m1[5] ); - m1[6] = _mm_add_epi32( m1[6], m1[7] ); + n2[0][0] = _mm_add_epi32( n2[0][0], n2[0][1] ); + n2[0][2] = _mm_add_epi32( n2[0][2], n2[0][3] ); + n2[0][4] = _mm_add_epi32( n2[0][4], n2[0][5] ); + n2[0][6] = _mm_add_epi32( n2[0][6], n2[0][7] ); - m1[0] = _mm_add_epi32( m1[0], m1[2] ); - m1[4] = _mm_add_epi32( m1[4], m1[6] ); - iSum = _mm_add_epi32( iSum, _mm_add_epi32( m1[0], m1[4] ) ); + n2[0][0] = _mm_add_epi32( n2[0][0], n2[0][2] ); + n2[0][4] = _mm_add_epi32( n2[0][4], n2[0][6] ); + iSum = _mm_add_epi32( iSum, _mm_add_epi32( n2[0][0], n2[0][4] ) ); } iSum = _mm_hadd_epi32( iSum, iSum ); @@ -1453,7 +1198,6 @@ static uint32_t xCalcHAD8x4_SSE( const Torg *piOrg, const Tcur *piCur, const int } -template< typename Torg, typename Tcur/*, bool bHorDownsampling*/ > static uint32_t xCalcHAD4x8_SSE( const Torg *piOrg, const Tcur *piCur, const int iStrideOrg, const int iStrideCur, const int iBitDepth ) { __m128i m1[8], m2[8]; @@ -1585,182 +1329,154 @@ static uint32_t xCalcHAD4x8_SSE( const Torg *piOrg, const Tcur *piCur, const int } -template< typename Torg, typename Tcur/*, bool bHorDownsampling*/ > static uint32_t xCalcHAD16x16_AVX2( const Torg *piOrg, const Tcur *piCur, const int iStrideOrg, const int iStrideCur, const int iBitDepth ) { uint32_t sad = 0; #ifdef USE_AVX2 - // const int iLoops = ( bHorDownsampling && HAD_DOWNSAMPLING_HOR ) ? ( 1 ) : ( 2 ); const int iLoops = 2; - __m256i m1[8], m2[8]; + __m256i m1[2][8], m2[2][8]; for( int l = 0; l < iLoops; l++ ) { { for( int k = 0; k < 8; k++ ) { - __m256i r0 = ( sizeof( Torg ) > 1 ) ? ( _mm256_lddqu_si256( ( __m256i* )piOrg ) ) : ( _mm256_unpacklo_epi8( _mm256_permute4x64_epi64( _mm256_castsi128_si256( _mm_lddqu_si128( ( __m128i* )piOrg ) ), 0xD8 ), _mm256_setzero_si256() ) ); - __m256i r1 = ( sizeof( Tcur ) > 1 ) ? ( _mm256_lddqu_si256( ( __m256i* )piCur ) ) : ( _mm256_unpacklo_epi8( _mm256_permute4x64_epi64( _mm256_castsi128_si256( _mm_lddqu_si128( ( __m128i* )piCur ) ), 0xD8 ), _mm256_setzero_si256() ) ); - m2[k] = _mm256_sub_epi16( r0, r1 ); + __m256i r0 = _mm256_lddqu_si256( ( __m256i* ) piOrg ); + __m256i r1 = _mm256_lddqu_si256( ( __m256i* ) piCur ); + m2[0][k] = _mm256_sub_epi16( r0, r1 ); + m2[1][k] = _mm256_cvtepi16_epi32( _mm256_extracti128_si256( m2[0][k], 1 ) ); + m2[0][k] = _mm256_cvtepi16_epi32( _mm256_castsi256_si128( m2[0][k] ) ); piCur += iStrideCur; piOrg += iStrideOrg; } } - // horizontal - - m1[0] = _mm256_add_epi16( m2[0], m2[4] ); - m1[1] = _mm256_add_epi16( m2[1], m2[5] ); - m1[2] = _mm256_add_epi16( m2[2], m2[6] ); - m1[3] = _mm256_add_epi16( m2[3], m2[7] ); - m1[4] = _mm256_sub_epi16( m2[0], m2[4] ); - m1[5] = _mm256_sub_epi16( m2[1], m2[5] ); - m1[6] = _mm256_sub_epi16( m2[2], m2[6] ); - m1[7] = _mm256_sub_epi16( m2[3], m2[7] ); - - m2[0] = _mm256_add_epi16( m1[0], m1[2] ); - m2[1] = _mm256_add_epi16( m1[1], m1[3] ); - m2[2] = _mm256_sub_epi16( m1[0], m1[2] ); - m2[3] = _mm256_sub_epi16( m1[1], m1[3] ); - m2[4] = _mm256_add_epi16( m1[4], m1[6] ); - m2[5] = _mm256_add_epi16( m1[5], m1[7] ); - m2[6] = _mm256_sub_epi16( m1[4], m1[6] ); - m2[7] = _mm256_sub_epi16( m1[5], m1[7] ); - - m1[0] = _mm256_add_epi16( m2[0], m2[1] ); - m1[1] = _mm256_sub_epi16( m2[0], m2[1] ); - m1[2] = _mm256_add_epi16( m2[2], m2[3] ); - m1[3] = _mm256_sub_epi16( m2[2], m2[3] ); - m1[4] = _mm256_add_epi16( m2[4], m2[5] ); - m1[5] = _mm256_sub_epi16( m2[4], m2[5] ); - m1[6] = _mm256_add_epi16( m2[6], m2[7] ); - m1[7] = _mm256_sub_epi16( m2[6], m2[7] ); - - // transpose 2 8x8 blocks in parallel - - m2[0] = _mm256_unpacklo_epi16( m1[0], m1[1] ); - m2[1] = _mm256_unpacklo_epi16( m1[2], m1[3] ); - m2[2] = _mm256_unpacklo_epi16( m1[4], m1[5] ); - m2[3] = _mm256_unpacklo_epi16( m1[6], m1[7] ); - m2[4] = _mm256_unpackhi_epi16( m1[0], m1[1] ); - m2[5] = _mm256_unpackhi_epi16( m1[2], m1[3] ); - m2[6] = _mm256_unpackhi_epi16( m1[4], m1[5] ); - m2[7] = _mm256_unpackhi_epi16( m1[6], m1[7] ); + constexpr int perm_unpacklo_epi128 = ( 0 << 0 ) + ( 2 << 4 ); + constexpr int perm_unpackhi_epi128 = ( 1 << 0 ) + ( 3 << 4 ); - m1[0] = _mm256_unpacklo_epi32( m2[0], m2[1] ); - m1[1] = _mm256_unpackhi_epi32( m2[0], m2[1] ); - m1[2] = _mm256_unpacklo_epi32( m2[2], m2[3] ); - m1[3] = _mm256_unpackhi_epi32( m2[2], m2[3] ); - m1[4] = _mm256_unpacklo_epi32( m2[4], m2[5] ); - m1[5] = _mm256_unpackhi_epi32( m2[4], m2[5] ); - m1[6] = _mm256_unpacklo_epi32( m2[6], m2[7] ); - m1[7] = _mm256_unpackhi_epi32( m2[6], m2[7] ); - - m2[0] = _mm256_unpacklo_epi64( m1[0], m1[2] ); - m2[1] = _mm256_unpackhi_epi64( m1[0], m1[2] ); - m2[2] = _mm256_unpacklo_epi64( m1[1], m1[3] ); - m2[3] = _mm256_unpackhi_epi64( m1[1], m1[3] ); - m2[4] = _mm256_unpacklo_epi64( m1[4], m1[6] ); - m2[5] = _mm256_unpackhi_epi64( m1[4], m1[6] ); - m2[6] = _mm256_unpacklo_epi64( m1[5], m1[7] ); - m2[7] = _mm256_unpackhi_epi64( m1[5], m1[7] ); - - // vertical - if( iBitDepth >= 10 ) + for( int i = 0; i < 2; i++ ) { - __m256i n1[8][2]; - __m256i n2[8][2]; + m1[i][0] = _mm256_add_epi32( m2[i][0], m2[i][4] ); + m1[i][1] = _mm256_add_epi32( m2[i][1], m2[i][5] ); + m1[i][2] = _mm256_add_epi32( m2[i][2], m2[i][6] ); + m1[i][3] = _mm256_add_epi32( m2[i][3], m2[i][7] ); + m1[i][4] = _mm256_sub_epi32( m2[i][0], m2[i][4] ); + m1[i][5] = _mm256_sub_epi32( m2[i][1], m2[i][5] ); + m1[i][6] = _mm256_sub_epi32( m2[i][2], m2[i][6] ); + m1[i][7] = _mm256_sub_epi32( m2[i][3], m2[i][7] ); + + m2[i][0] = _mm256_add_epi32( m1[i][0], m1[i][2] ); + m2[i][1] = _mm256_add_epi32( m1[i][1], m1[i][3] ); + m2[i][2] = _mm256_sub_epi32( m1[i][0], m1[i][2] ); + m2[i][3] = _mm256_sub_epi32( m1[i][1], m1[i][3] ); + m2[i][4] = _mm256_add_epi32( m1[i][4], m1[i][6] ); + m2[i][5] = _mm256_add_epi32( m1[i][5], m1[i][7] ); + m2[i][6] = _mm256_sub_epi32( m1[i][4], m1[i][6] ); + m2[i][7] = _mm256_sub_epi32( m1[i][5], m1[i][7] ); + + m1[i][0] = _mm256_add_epi32( m2[i][0], m2[i][1] ); + m1[i][1] = _mm256_sub_epi32( m2[i][0], m2[i][1] ); + m1[i][2] = _mm256_add_epi32( m2[i][2], m2[i][3] ); + m1[i][3] = _mm256_sub_epi32( m2[i][2], m2[i][3] ); + m1[i][4] = _mm256_add_epi32( m2[i][4], m2[i][5] ); + m1[i][5] = _mm256_sub_epi32( m2[i][4], m2[i][5] ); + m1[i][6] = _mm256_add_epi32( m2[i][6], m2[i][7] ); + m1[i][7] = _mm256_sub_epi32( m2[i][6], m2[i][7] ); + + // transpose + // 8x8 + m2[i][0] = _mm256_unpacklo_epi32( m1[i][0], m1[i][1] ); + m2[i][1] = _mm256_unpacklo_epi32( m1[i][2], m1[i][3] ); + m2[i][2] = _mm256_unpacklo_epi32( m1[i][4], m1[i][5] ); + m2[i][3] = _mm256_unpacklo_epi32( m1[i][6], m1[i][7] ); + m2[i][4] = _mm256_unpackhi_epi32( m1[i][0], m1[i][1] ); + m2[i][5] = _mm256_unpackhi_epi32( m1[i][2], m1[i][3] ); + m2[i][6] = _mm256_unpackhi_epi32( m1[i][4], m1[i][5] ); + m2[i][7] = _mm256_unpackhi_epi32( m1[i][6], m1[i][7] ); + + m1[i][0] = _mm256_unpacklo_epi64( m2[i][0], m2[i][1] ); + m1[i][1] = _mm256_unpackhi_epi64( m2[i][0], m2[i][1] ); + m1[i][2] = _mm256_unpacklo_epi64( m2[i][2], m2[i][3] ); + m1[i][3] = _mm256_unpackhi_epi64( m2[i][2], m2[i][3] ); + m1[i][4] = _mm256_unpacklo_epi64( m2[i][4], m2[i][5] ); + m1[i][5] = _mm256_unpackhi_epi64( m2[i][4], m2[i][5] ); + m1[i][6] = _mm256_unpacklo_epi64( m2[i][6], m2[i][7] ); + m1[i][7] = _mm256_unpackhi_epi64( m2[i][6], m2[i][7] ); + + m2[i][0] = _mm256_permute2x128_si256( m1[i][0], m1[i][2], perm_unpacklo_epi128 ); + m2[i][1] = _mm256_permute2x128_si256( m1[i][0], m1[i][2], perm_unpackhi_epi128 ); + m2[i][2] = _mm256_permute2x128_si256( m1[i][1], m1[i][3], perm_unpacklo_epi128 ); + m2[i][3] = _mm256_permute2x128_si256( m1[i][1], m1[i][3], perm_unpackhi_epi128 ); + m2[i][4] = _mm256_permute2x128_si256( m1[i][4], m1[i][6], perm_unpacklo_epi128 ); + m2[i][5] = _mm256_permute2x128_si256( m1[i][4], m1[i][6], perm_unpackhi_epi128 ); + m2[i][6] = _mm256_permute2x128_si256( m1[i][5], m1[i][7], perm_unpacklo_epi128 ); + m2[i][7] = _mm256_permute2x128_si256( m1[i][5], m1[i][7], perm_unpackhi_epi128 ); + } - for( int i = 0; i < 8; i++ ) - { - n2[i][0] = _mm256_cvtepi16_epi32( _mm256_castsi256_si128( _mm256_permute4x64_epi64( m2[i], 0xD8 ) ) ); - n2[i][1] = _mm256_cvtepi16_epi32( _mm256_castsi256_si128( _mm256_permute4x64_epi64( m2[i], 0x8D ) ) ); - } + m1[0][0] = _mm256_permute2x128_si256( m2[0][0], m2[1][0], perm_unpacklo_epi128 ); + m1[0][1] = _mm256_permute2x128_si256( m2[0][1], m2[1][1], perm_unpacklo_epi128 ); + m1[0][2] = _mm256_permute2x128_si256( m2[0][2], m2[1][2], perm_unpacklo_epi128 ); + m1[0][3] = _mm256_permute2x128_si256( m2[0][3], m2[1][3], perm_unpacklo_epi128 ); + m1[0][4] = _mm256_permute2x128_si256( m2[0][4], m2[1][4], perm_unpacklo_epi128 ); + m1[0][5] = _mm256_permute2x128_si256( m2[0][5], m2[1][5], perm_unpacklo_epi128 ); + m1[0][6] = _mm256_permute2x128_si256( m2[0][6], m2[1][6], perm_unpacklo_epi128 ); + m1[0][7] = _mm256_permute2x128_si256( m2[0][7], m2[1][7], perm_unpacklo_epi128 ); + + m1[1][0] = _mm256_permute2x128_si256( m2[0][0], m2[1][0], perm_unpackhi_epi128 ); + m1[1][1] = _mm256_permute2x128_si256( m2[0][1], m2[1][1], perm_unpackhi_epi128 ); + m1[1][2] = _mm256_permute2x128_si256( m2[0][2], m2[1][2], perm_unpackhi_epi128 ); + m1[1][3] = _mm256_permute2x128_si256( m2[0][3], m2[1][3], perm_unpackhi_epi128 ); + m1[1][4] = _mm256_permute2x128_si256( m2[0][4], m2[1][4], perm_unpackhi_epi128 ); + m1[1][5] = _mm256_permute2x128_si256( m2[0][5], m2[1][5], perm_unpackhi_epi128 ); + m1[1][6] = _mm256_permute2x128_si256( m2[0][6], m2[1][6], perm_unpackhi_epi128 ); + m1[1][7] = _mm256_permute2x128_si256( m2[0][7], m2[1][7], perm_unpackhi_epi128 ); - for( int i = 0; i < 2; i++ ) - { - n1[0][i] = _mm256_add_epi32( n2[0][i], n2[4][i] ); - n1[1][i] = _mm256_add_epi32( n2[1][i], n2[5][i] ); - n1[2][i] = _mm256_add_epi32( n2[2][i], n2[6][i] ); - n1[3][i] = _mm256_add_epi32( n2[3][i], n2[7][i] ); - n1[4][i] = _mm256_sub_epi32( n2[0][i], n2[4][i] ); - n1[5][i] = _mm256_sub_epi32( n2[1][i], n2[5][i] ); - n1[6][i] = _mm256_sub_epi32( n2[2][i], n2[6][i] ); - n1[7][i] = _mm256_sub_epi32( n2[3][i], n2[7][i] ); - - n2[0][i] = _mm256_add_epi32( n1[0][i], n1[2][i] ); - n2[1][i] = _mm256_add_epi32( n1[1][i], n1[3][i] ); - n2[2][i] = _mm256_sub_epi32( n1[0][i], n1[2][i] ); - n2[3][i] = _mm256_sub_epi32( n1[1][i], n1[3][i] ); - n2[4][i] = _mm256_add_epi32( n1[4][i], n1[6][i] ); - n2[5][i] = _mm256_add_epi32( n1[5][i], n1[7][i] ); - n2[6][i] = _mm256_sub_epi32( n1[4][i], n1[6][i] ); - n2[7][i] = _mm256_sub_epi32( n1[5][i], n1[7][i] ); - - n1[0][i] = _mm256_abs_epi32( _mm256_add_epi32( n2[0][i], n2[1][i] ) ); - n1[1][i] = _mm256_abs_epi32( _mm256_sub_epi32( n2[0][i], n2[1][i] ) ); - n1[2][i] = _mm256_abs_epi32( _mm256_add_epi32( n2[2][i], n2[3][i] ) ); - n1[3][i] = _mm256_abs_epi32( _mm256_sub_epi32( n2[2][i], n2[3][i] ) ); - n1[4][i] = _mm256_abs_epi32( _mm256_add_epi32( n2[4][i], n2[5][i] ) ); - n1[5][i] = _mm256_abs_epi32( _mm256_sub_epi32( n2[4][i], n2[5][i] ) ); - n1[6][i] = _mm256_abs_epi32( _mm256_add_epi32( n2[6][i], n2[7][i] ) ); - n1[7][i] = _mm256_abs_epi32( _mm256_sub_epi32( n2[6][i], n2[7][i] ) ); - } - for( int i = 0; i < 8; i++ ) - { - m1[i] = _mm256_add_epi32( n1[i][0], n1[i][1] ); - } - } - else + for( int i = 0; i < 2; i++ ) { - m1[0] = _mm256_add_epi16( m2[0], m2[4] ); - m1[1] = _mm256_add_epi16( m2[1], m2[5] ); - m1[2] = _mm256_add_epi16( m2[2], m2[6] ); - m1[3] = _mm256_add_epi16( m2[3], m2[7] ); - m1[4] = _mm256_sub_epi16( m2[0], m2[4] ); - m1[5] = _mm256_sub_epi16( m2[1], m2[5] ); - m1[6] = _mm256_sub_epi16( m2[2], m2[6] ); - m1[7] = _mm256_sub_epi16( m2[3], m2[7] ); - - m2[0] = _mm256_add_epi16( m1[0], m1[2] ); - m2[1] = _mm256_add_epi16( m1[1], m1[3] ); - m2[2] = _mm256_sub_epi16( m1[0], m1[2] ); - m2[3] = _mm256_sub_epi16( m1[1], m1[3] ); - m2[4] = _mm256_add_epi16( m1[4], m1[6] ); - m2[5] = _mm256_add_epi16( m1[5], m1[7] ); - m2[6] = _mm256_sub_epi16( m1[4], m1[6] ); - m2[7] = _mm256_sub_epi16( m1[5], m1[7] ); - - m1[0] = _mm256_abs_epi16( _mm256_add_epi16( m2[0], m2[1] ) ); - m1[1] = _mm256_abs_epi16( _mm256_sub_epi16( m2[0], m2[1] ) ); - m1[2] = _mm256_abs_epi16( _mm256_add_epi16( m2[2], m2[3] ) ); - m1[3] = _mm256_abs_epi16( _mm256_sub_epi16( m2[2], m2[3] ) ); - m1[4] = _mm256_abs_epi16( _mm256_add_epi16( m2[4], m2[5] ) ); - m1[5] = _mm256_abs_epi16( _mm256_sub_epi16( m2[4], m2[5] ) ); - m1[6] = _mm256_abs_epi16( _mm256_add_epi16( m2[6], m2[7] ) ); - m1[7] = _mm256_abs_epi16( _mm256_sub_epi16( m2[6], m2[7] ) ); - - __m256i ma1, ma2; - __m256i vzero = _mm256_setzero_si256(); + m2[i][0] = _mm256_add_epi32( m1[i][0], m1[i][4] ); + m2[i][1] = _mm256_add_epi32( m1[i][1], m1[i][5] ); + m2[i][2] = _mm256_add_epi32( m1[i][2], m1[i][6] ); + m2[i][3] = _mm256_add_epi32( m1[i][3], m1[i][7] ); + m2[i][4] = _mm256_sub_epi32( m1[i][0], m1[i][4] ); + m2[i][5] = _mm256_sub_epi32( m1[i][1], m1[i][5] ); + m2[i][6] = _mm256_sub_epi32( m1[i][2], m1[i][6] ); + m2[i][7] = _mm256_sub_epi32( m1[i][3], m1[i][7] ); + + m1[i][0] = _mm256_add_epi32( m2[i][0], m2[i][2] ); + m1[i][1] = _mm256_add_epi32( m2[i][1], m2[i][3] ); + m1[i][2] = _mm256_sub_epi32( m2[i][0], m2[i][2] ); + m1[i][3] = _mm256_sub_epi32( m2[i][1], m2[i][3] ); + m1[i][4] = _mm256_add_epi32( m2[i][4], m2[i][6] ); + m1[i][5] = _mm256_add_epi32( m2[i][5], m2[i][7] ); + m1[i][6] = _mm256_sub_epi32( m2[i][4], m2[i][6] ); + m1[i][7] = _mm256_sub_epi32( m2[i][5], m2[i][7] ); + + m2[i][0] = _mm256_abs_epi32( _mm256_add_epi32( m1[i][0], m1[i][1] ) ); + m2[i][1] = _mm256_abs_epi32( _mm256_sub_epi32( m1[i][0], m1[i][1] ) ); + m2[i][2] = _mm256_abs_epi32( _mm256_add_epi32( m1[i][2], m1[i][3] ) ); + m2[i][3] = _mm256_abs_epi32( _mm256_sub_epi32( m1[i][2], m1[i][3] ) ); + m2[i][4] = _mm256_abs_epi32( _mm256_add_epi32( m1[i][4], m1[i][5] ) ); + m2[i][5] = _mm256_abs_epi32( _mm256_sub_epi32( m1[i][4], m1[i][5] ) ); + m2[i][6] = _mm256_abs_epi32( _mm256_add_epi32( m1[i][6], m1[i][7] ) ); + m2[i][7] = _mm256_abs_epi32( _mm256_sub_epi32( m1[i][6], m1[i][7] ) ); + } - for( int i = 0; i < 8; i++ ) - { - ma1 = _mm256_unpacklo_epi16( m1[i], vzero ); - ma2 = _mm256_unpackhi_epi16( m1[i], vzero ); - m1[i] = _mm256_add_epi32( ma1, ma2 ); - } + for( int i = 0; i < 8; i++ ) + { + m1[0][i] = _mm256_add_epi32( m2[0][i], m2[1][i] ); } - m1[0] = _mm256_add_epi32( m1[0], m1[1] ); - m1[2] = _mm256_add_epi32( m1[2], m1[3] ); - m1[4] = _mm256_add_epi32( m1[4], m1[5] ); - m1[6] = _mm256_add_epi32( m1[6], m1[7] ); + m1[0][0] = _mm256_add_epi32( m1[0][0], m1[0][1] ); + m1[0][2] = _mm256_add_epi32( m1[0][2], m1[0][3] ); + m1[0][4] = _mm256_add_epi32( m1[0][4], m1[0][5] ); + m1[0][6] = _mm256_add_epi32( m1[0][6], m1[0][7] ); - m1[0] = _mm256_add_epi32( m1[0], m1[2] ); - m1[4] = _mm256_add_epi32( m1[4], m1[6] ); + m1[0][0] = _mm256_add_epi32( m1[0][0], m1[0][2] ); + m1[0][4] = _mm256_add_epi32( m1[0][4], m1[0][6] ); + + __m256i iSum = _mm256_add_epi32( m1[0][0], m1[0][4] ); - __m256i iSum = _mm256_add_epi32( m1[0], m1[4] ); iSum = _mm256_hadd_epi32( iSum, iSum ); iSum = _mm256_hadd_epi32( iSum, iSum ); @@ -1778,103 +1494,143 @@ static uint32_t xCalcHAD16x16_AVX2( const Torg *piOrg, const Tcur *piCur, const return ( sad ); } -template< typename Torg, typename Tcur/*, bool bHorDownsampling*/ > static uint32_t xCalcHAD16x8_AVX2( const Torg *piOrg, const Tcur *piCur, const int iStrideOrg, const int iStrideCur, const int iBitDepth ) { uint32_t sad = 0; #ifdef USE_AVX2 - // const int iLoops = ( bHorDownsampling && HAD_DOWNSAMPLING_HOR ) ? ( 1 ) : ( 2 ); - //const int iLoops = 1; __m256i m1[16], m2[16]; - __m256i vzero = _mm256_setzero_si256(); - - //for( int l = 0; l < iLoops; l++ ) { { for( int k = 0; k < 8; k++ ) { - __m256i r0 = (sizeof( Torg ) > 1) ? (_mm256_lddqu_si256( (__m256i*)piOrg )) : (_mm256_unpacklo_epi8( _mm256_castsi128_si256( _mm_lddqu_si128( (__m128i*)piOrg ) ), vzero )); - __m256i r1 = (sizeof( Tcur ) > 1) ? (_mm256_lddqu_si256( (__m256i*)piCur )) : (_mm256_unpacklo_epi8( _mm256_castsi128_si256( _mm_lddqu_si128( (__m128i*)piCur ) ), vzero )); - m1[k] = _mm256_sub_epi16( r0, r1 ); + __m256i r0 = _mm256_lddqu_si256( (__m256i*)piOrg ); + __m256i r1 = _mm256_lddqu_si256( (__m256i*)piCur ); + m1[k] = _mm256_sub_epi16( r0, r1 ); + m1[k+8] = _mm256_cvtepi16_epi32( _mm256_extracti128_si256( m1[k], 1 ) ); + m1[k] = _mm256_cvtepi16_epi32( _mm256_castsi256_si128 ( m1[k] ) ); piCur += iStrideCur; piOrg += iStrideOrg; } } - // vertical - m2[0] = _mm256_add_epi16( m1[0], m1[4] ); - m2[1] = _mm256_add_epi16( m1[1], m1[5] ); - m2[2] = _mm256_add_epi16( m1[2], m1[6] ); - m2[3] = _mm256_add_epi16( m1[3], m1[7] ); - m2[4] = _mm256_sub_epi16( m1[0], m1[4] ); - m2[5] = _mm256_sub_epi16( m1[1], m1[5] ); - m2[6] = _mm256_sub_epi16( m1[2], m1[6] ); - m2[7] = _mm256_sub_epi16( m1[3], m1[7] ); - - m1[0] = _mm256_add_epi16( m2[0], m2[2] ); - m1[1] = _mm256_add_epi16( m2[1], m2[3] ); - m1[2] = _mm256_sub_epi16( m2[0], m2[2] ); - m1[3] = _mm256_sub_epi16( m2[1], m2[3] ); - m1[4] = _mm256_add_epi16( m2[4], m2[6] ); - m1[5] = _mm256_add_epi16( m2[5], m2[7] ); - m1[6] = _mm256_sub_epi16( m2[4], m2[6] ); - m1[7] = _mm256_sub_epi16( m2[5], m2[7] ); - - m2[0] = _mm256_add_epi16( m1[0], m1[1] ); - m2[1] = _mm256_sub_epi16( m1[0], m1[1] ); - m2[2] = _mm256_add_epi16( m1[2], m1[3] ); - m2[3] = _mm256_sub_epi16( m1[2], m1[3] ); - m2[4] = _mm256_add_epi16( m1[4], m1[5] ); - m2[5] = _mm256_sub_epi16( m1[4], m1[5] ); - m2[6] = _mm256_add_epi16( m1[6], m1[7] ); - m2[7] = _mm256_sub_epi16( m1[6], m1[7] ); + // vertical, first 8x8 + m2[0] = _mm256_add_epi32( m1[0], m1[4] ); + m2[1] = _mm256_add_epi32( m1[1], m1[5] ); + m2[2] = _mm256_add_epi32( m1[2], m1[6] ); + m2[3] = _mm256_add_epi32( m1[3], m1[7] ); + m2[4] = _mm256_sub_epi32( m1[0], m1[4] ); + m2[5] = _mm256_sub_epi32( m1[1], m1[5] ); + m2[6] = _mm256_sub_epi32( m1[2], m1[6] ); + m2[7] = _mm256_sub_epi32( m1[3], m1[7] ); + + m1[0] = _mm256_add_epi32( m2[0], m2[2] ); + m1[1] = _mm256_add_epi32( m2[1], m2[3] ); + m1[2] = _mm256_sub_epi32( m2[0], m2[2] ); + m1[3] = _mm256_sub_epi32( m2[1], m2[3] ); + m1[4] = _mm256_add_epi32( m2[4], m2[6] ); + m1[5] = _mm256_add_epi32( m2[5], m2[7] ); + m1[6] = _mm256_sub_epi32( m2[4], m2[6] ); + m1[7] = _mm256_sub_epi32( m2[5], m2[7] ); + + m2[0] = _mm256_add_epi32( m1[0], m1[1] ); + m2[1] = _mm256_sub_epi32( m1[0], m1[1] ); + m2[2] = _mm256_add_epi32( m1[2], m1[3] ); + m2[3] = _mm256_sub_epi32( m1[2], m1[3] ); + m2[4] = _mm256_add_epi32( m1[4], m1[5] ); + m2[5] = _mm256_sub_epi32( m1[4], m1[5] ); + m2[6] = _mm256_add_epi32( m1[6], m1[7] ); + m2[7] = _mm256_sub_epi32( m1[6], m1[7] ); + + // vertical, second 8x8 + m2[8+0] = _mm256_add_epi32( m1[8+0], m1[8+4] ); + m2[8+1] = _mm256_add_epi32( m1[8+1], m1[8+5] ); + m2[8+2] = _mm256_add_epi32( m1[8+2], m1[8+6] ); + m2[8+3] = _mm256_add_epi32( m1[8+3], m1[8+7] ); + m2[8+4] = _mm256_sub_epi32( m1[8+0], m1[8+4] ); + m2[8+5] = _mm256_sub_epi32( m1[8+1], m1[8+5] ); + m2[8+6] = _mm256_sub_epi32( m1[8+2], m1[8+6] ); + m2[8+7] = _mm256_sub_epi32( m1[8+3], m1[8+7] ); + + m1[8+0] = _mm256_add_epi32( m2[8+0], m2[8+2] ); + m1[8+1] = _mm256_add_epi32( m2[8+1], m2[8+3] ); + m1[8+2] = _mm256_sub_epi32( m2[8+0], m2[8+2] ); + m1[8+3] = _mm256_sub_epi32( m2[8+1], m2[8+3] ); + m1[8+4] = _mm256_add_epi32( m2[8+4], m2[8+6] ); + m1[8+5] = _mm256_add_epi32( m2[8+5], m2[8+7] ); + m1[8+6] = _mm256_sub_epi32( m2[8+4], m2[8+6] ); + m1[8+7] = _mm256_sub_epi32( m2[8+5], m2[8+7] ); + + m2[8+0] = _mm256_add_epi32( m1[8+0], m1[8+1] ); + m2[8+1] = _mm256_sub_epi32( m1[8+0], m1[8+1] ); + m2[8+2] = _mm256_add_epi32( m1[8+2], m1[8+3] ); + m2[8+3] = _mm256_sub_epi32( m1[8+2], m1[8+3] ); + m2[8+4] = _mm256_add_epi32( m1[8+4], m1[8+5] ); + m2[8+5] = _mm256_sub_epi32( m1[8+4], m1[8+5] ); + m2[8+6] = _mm256_add_epi32( m1[8+6], m1[8+7] ); + m2[8+7] = _mm256_sub_epi32( m1[8+6], m1[8+7] ); // transpose - m1[0] = _mm256_unpacklo_epi16( m2[0], m2[1] ); - m1[1] = _mm256_unpacklo_epi16( m2[2], m2[3] ); - m1[2] = _mm256_unpacklo_epi16( m2[4], m2[5] ); - m1[3] = _mm256_unpacklo_epi16( m2[6], m2[7] ); - m1[4] = _mm256_unpackhi_epi16( m2[0], m2[1] ); - m1[5] = _mm256_unpackhi_epi16( m2[2], m2[3] ); - m1[6] = _mm256_unpackhi_epi16( m2[4], m2[5] ); - m1[7] = _mm256_unpackhi_epi16( m2[6], m2[7] ); + constexpr int perm_unpacklo_epi128 = ( 0 << 0 ) + ( 2 << 4 ); + constexpr int perm_unpackhi_epi128 = ( 1 << 0 ) + ( 3 << 4 ); - m2[0] = _mm256_unpacklo_epi32( m1[0], m1[1] ); - m2[1] = _mm256_unpackhi_epi32( m1[0], m1[1] ); - m2[2] = _mm256_unpacklo_epi32( m1[2], m1[3] ); - m2[3] = _mm256_unpackhi_epi32( m1[2], m1[3] ); - m2[4] = _mm256_unpacklo_epi32( m1[4], m1[5] ); - m2[5] = _mm256_unpackhi_epi32( m1[4], m1[5] ); - m2[6] = _mm256_unpacklo_epi32( m1[6], m1[7] ); - m2[7] = _mm256_unpackhi_epi32( m1[6], m1[7] ); + m1[0] = _mm256_unpacklo_epi32( m2[0], m2[1] ); + m1[1] = _mm256_unpacklo_epi32( m2[2], m2[3] ); + m1[2] = _mm256_unpacklo_epi32( m2[4], m2[5] ); + m1[3] = _mm256_unpacklo_epi32( m2[6], m2[7] ); + m1[4] = _mm256_unpackhi_epi32( m2[0], m2[1] ); + m1[5] = _mm256_unpackhi_epi32( m2[2], m2[3] ); + m1[6] = _mm256_unpackhi_epi32( m2[4], m2[5] ); + m1[7] = _mm256_unpackhi_epi32( m2[6], m2[7] ); - m1[0] = _mm256_unpacklo_epi64( m2[0], m2[2] ); - m1[1] = _mm256_unpackhi_epi64( m2[0], m2[2] ); - m1[2] = _mm256_unpacklo_epi64( m2[1], m2[3] ); - m1[3] = _mm256_unpackhi_epi64( m2[1], m2[3] ); - m1[4] = _mm256_unpacklo_epi64( m2[4], m2[6] ); - m1[5] = _mm256_unpackhi_epi64( m2[4], m2[6] ); - m1[6] = _mm256_unpacklo_epi64( m2[5], m2[7] ); - m1[7] = _mm256_unpackhi_epi64( m2[5], m2[7] ); - m1[8] = _mm256_permute2x128_si256( m1[0], vzero, 0x31 ); - m1[9] = _mm256_permute2x128_si256( m1[1], vzero, 0x31 ); - m1[10] = _mm256_permute2x128_si256( m1[2], vzero, 0x31 ); - m1[11] = _mm256_permute2x128_si256( m1[3], vzero, 0x31 ); - m1[12] = _mm256_permute2x128_si256( m1[4], vzero, 0x31 ); - m1[13] = _mm256_permute2x128_si256( m1[5], vzero, 0x31 ); - m1[14] = _mm256_permute2x128_si256( m1[6], vzero, 0x31 ); - m1[15] = _mm256_permute2x128_si256( m1[7], vzero, 0x31 ); + m2[0] = _mm256_unpacklo_epi64( m1[0], m1[1] ); + m2[1] = _mm256_unpackhi_epi64( m1[0], m1[1] ); + m2[2] = _mm256_unpacklo_epi64( m1[2], m1[3] ); + m2[3] = _mm256_unpackhi_epi64( m1[2], m1[3] ); + m2[4] = _mm256_unpacklo_epi64( m1[4], m1[5] ); + m2[5] = _mm256_unpackhi_epi64( m1[4], m1[5] ); + m2[6] = _mm256_unpacklo_epi64( m1[6], m1[7] ); + m2[7] = _mm256_unpackhi_epi64( m1[6], m1[7] ); + + m1[0] = _mm256_permute2x128_si256( m2[0], m2[2], perm_unpacklo_epi128 ); + m1[1] = _mm256_permute2x128_si256( m2[0], m2[2], perm_unpackhi_epi128 ); + m1[2] = _mm256_permute2x128_si256( m2[1], m2[3], perm_unpacklo_epi128 ); + m1[3] = _mm256_permute2x128_si256( m2[1], m2[3], perm_unpackhi_epi128 ); + m1[4] = _mm256_permute2x128_si256( m2[4], m2[6], perm_unpacklo_epi128 ); + m1[5] = _mm256_permute2x128_si256( m2[4], m2[6], perm_unpackhi_epi128 ); + m1[6] = _mm256_permute2x128_si256( m2[5], m2[7], perm_unpacklo_epi128 ); + m1[7] = _mm256_permute2x128_si256( m2[5], m2[7], perm_unpackhi_epi128 ); + + m1[8+0] = _mm256_unpacklo_epi32( m2[8+0], m2[8+1] ); + m1[8+1] = _mm256_unpacklo_epi32( m2[8+2], m2[8+3] ); + m1[8+2] = _mm256_unpacklo_epi32( m2[8+4], m2[8+5] ); + m1[8+3] = _mm256_unpacklo_epi32( m2[8+6], m2[8+7] ); + m1[8+4] = _mm256_unpackhi_epi32( m2[8+0], m2[8+1] ); + m1[8+5] = _mm256_unpackhi_epi32( m2[8+2], m2[8+3] ); + m1[8+6] = _mm256_unpackhi_epi32( m2[8+4], m2[8+5] ); + m1[8+7] = _mm256_unpackhi_epi32( m2[8+6], m2[8+7] ); + + m2[8+0] = _mm256_unpacklo_epi64( m1[8+0], m1[8+1] ); + m2[8+1] = _mm256_unpackhi_epi64( m1[8+0], m1[8+1] ); + m2[8+2] = _mm256_unpacklo_epi64( m1[8+2], m1[8+3] ); + m2[8+3] = _mm256_unpackhi_epi64( m1[8+2], m1[8+3] ); + m2[8+4] = _mm256_unpacklo_epi64( m1[8+4], m1[8+5] ); + m2[8+5] = _mm256_unpackhi_epi64( m1[8+4], m1[8+5] ); + m2[8+6] = _mm256_unpacklo_epi64( m1[8+6], m1[8+7] ); + m2[8+7] = _mm256_unpackhi_epi64( m1[8+6], m1[8+7] ); + + m1[8+0] = _mm256_permute2x128_si256( m2[8+0], m2[8+2], perm_unpacklo_epi128 ); + m1[8+1] = _mm256_permute2x128_si256( m2[8+0], m2[8+2], perm_unpackhi_epi128 ); + m1[8+2] = _mm256_permute2x128_si256( m2[8+1], m2[8+3], perm_unpacklo_epi128 ); + m1[8+3] = _mm256_permute2x128_si256( m2[8+1], m2[8+3], perm_unpackhi_epi128 ); + m1[8+4] = _mm256_permute2x128_si256( m2[8+4], m2[8+6], perm_unpacklo_epi128 ); + m1[8+5] = _mm256_permute2x128_si256( m2[8+4], m2[8+6], perm_unpackhi_epi128 ); + m1[8+6] = _mm256_permute2x128_si256( m2[8+5], m2[8+7], perm_unpacklo_epi128 ); + m1[8+7] = _mm256_permute2x128_si256( m2[8+5], m2[8+7], perm_unpackhi_epi128 ); // horizontal { - // extend to 32bit - for( int i = 0; i < 16; i++ ) - { - m1[i] = _mm256_cvtepi16_epi32( _mm256_castsi256_si128( m1[i] ) ); - } - m2[ 0] = _mm256_add_epi32( m1[0], m1[ 8] ); m2[ 1] = _mm256_add_epi32( m1[1], m1[ 9] ); m2[ 2] = _mm256_add_epi32( m1[2], m1[10] ); @@ -1978,25 +1734,20 @@ static uint32_t xCalcHAD16x8_AVX2( const Torg *piOrg, const Tcur *piCur, const i } -template< typename Torg, typename Tcur/*, bool bHorDownsampling*/ > -static uint32_t xCalcHAD8x16_AVX2( const Torg *piOrg, const Tcur *piCur, const int iStrideOrg, const int iStrideCur, const int iBitDepth ) +static uint32_t xCalcHAD8x16_AVX2( const Pel* piOrg, const Pel* piCur, const int iStrideOrg, const int iStrideCur, const int iBitDepth ) { uint32_t sad = 0; #ifdef USE_AVX2 - // const int iLoops = ( bHorDownsampling && HAD_DOWNSAMPLING_HOR ) ? ( 1 ) : ( 2 ); - //const int iLoops = 1; __m256i m1[16], m2[16]; - __m256i vzero = _mm256_setzero_si256(); - //for( int l = 0; l < iLoops; l++ ) { { for( int k = 0; k < 16; k++ ) { - __m256i r0 = (sizeof( Torg ) > 1) ? ( _mm256_castsi128_si256( _mm_lddqu_si128( (__m128i*)piOrg ) ) ): (_mm256_unpacklo_epi8( _mm256_castsi128_si256( _mm_loadl_epi64( (__m128i*)piOrg ) ), vzero )); - __m256i r1 = (sizeof( Tcur ) > 1) ? ( _mm256_castsi128_si256( _mm_lddqu_si128( (__m128i*)piCur ) ) ): (_mm256_unpacklo_epi8( _mm256_castsi128_si256( _mm_loadl_epi64( (__m128i*)piCur ) ), vzero )); - m1[k] = _mm256_sub_epi16( r0, r1 ); + __m256i r0 = _mm256_cvtepi16_epi32( _mm_lddqu_si128( (__m128i*)piOrg ) ); + __m256i r1 = _mm256_cvtepi16_epi32( _mm_lddqu_si128( (__m128i*)piCur ) ); + m1[k] = _mm256_sub_epi32( r0, r1 ); piCur += iStrideCur; piOrg += iStrideOrg; } @@ -2004,261 +1755,198 @@ static uint32_t xCalcHAD8x16_AVX2( const Torg *piOrg, const Tcur *piCur, const i // vertical - m2[ 0] = _mm256_add_epi16( m1[0], m1[ 8] ); - m2[ 1] = _mm256_add_epi16( m1[1], m1[ 9] ); - m2[ 2] = _mm256_add_epi16( m1[2], m1[10] ); - m2[ 3] = _mm256_add_epi16( m1[3], m1[11] ); - m2[ 4] = _mm256_add_epi16( m1[4], m1[12] ); - m2[ 5] = _mm256_add_epi16( m1[5], m1[13] ); - m2[ 6] = _mm256_add_epi16( m1[6], m1[14] ); - m2[ 7] = _mm256_add_epi16( m1[7], m1[15] ); - m2[ 8] = _mm256_sub_epi16( m1[0], m1[ 8] ); - m2[ 9] = _mm256_sub_epi16( m1[1], m1[ 9] ); - m2[10] = _mm256_sub_epi16( m1[2], m1[10] ); - m2[11] = _mm256_sub_epi16( m1[3], m1[11] ); - m2[12] = _mm256_sub_epi16( m1[4], m1[12] ); - m2[13] = _mm256_sub_epi16( m1[5], m1[13] ); - m2[14] = _mm256_sub_epi16( m1[6], m1[14] ); - m2[15] = _mm256_sub_epi16( m1[7], m1[15] ); - - m1[ 0] = _mm256_add_epi16( m2[ 0], m2[ 4] ); - m1[ 1] = _mm256_add_epi16( m2[ 1], m2[ 5] ); - m1[ 2] = _mm256_add_epi16( m2[ 2], m2[ 6] ); - m1[ 3] = _mm256_add_epi16( m2[ 3], m2[ 7] ); - m1[ 4] = _mm256_sub_epi16( m2[ 0], m2[ 4] ); - m1[ 5] = _mm256_sub_epi16( m2[ 1], m2[ 5] ); - m1[ 6] = _mm256_sub_epi16( m2[ 2], m2[ 6] ); - m1[ 7] = _mm256_sub_epi16( m2[ 3], m2[ 7] ); - m1[ 8] = _mm256_add_epi16( m2[ 8], m2[12] ); - m1[ 9] = _mm256_add_epi16( m2[ 9], m2[13] ); - m1[10] = _mm256_add_epi16( m2[10], m2[14] ); - m1[11] = _mm256_add_epi16( m2[11], m2[15] ); - m1[12] = _mm256_sub_epi16( m2[ 8], m2[12] ); - m1[13] = _mm256_sub_epi16( m2[ 9], m2[13] ); - m1[14] = _mm256_sub_epi16( m2[10], m2[14] ); - m1[15] = _mm256_sub_epi16( m2[11], m2[15] ); - - m2[ 0] = _mm256_add_epi16( m1[ 0], m1[ 2] ); - m2[ 1] = _mm256_add_epi16( m1[ 1], m1[ 3] ); - m2[ 2] = _mm256_sub_epi16( m1[ 0], m1[ 2] ); - m2[ 3] = _mm256_sub_epi16( m1[ 1], m1[ 3] ); - m2[ 4] = _mm256_add_epi16( m1[ 4], m1[ 6] ); - m2[ 5] = _mm256_add_epi16( m1[ 5], m1[ 7] ); - m2[ 6] = _mm256_sub_epi16( m1[ 4], m1[ 6] ); - m2[ 7] = _mm256_sub_epi16( m1[ 5], m1[ 7] ); - m2[ 8] = _mm256_add_epi16( m1[ 8], m1[10] ); - m2[ 9] = _mm256_add_epi16( m1[ 9], m1[11] ); - m2[10] = _mm256_sub_epi16( m1[ 8], m1[10] ); - m2[11] = _mm256_sub_epi16( m1[ 9], m1[11] ); - m2[12] = _mm256_add_epi16( m1[12], m1[14] ); - m2[13] = _mm256_add_epi16( m1[13], m1[15] ); - m2[14] = _mm256_sub_epi16( m1[12], m1[14] ); - m2[15] = _mm256_sub_epi16( m1[13], m1[15] ); - - m1[ 0] = _mm256_add_epi16( m2[ 0], m2[ 1] ); - m1[ 1] = _mm256_sub_epi16( m2[ 0], m2[ 1] ); - m1[ 2] = _mm256_add_epi16( m2[ 2], m2[ 3] ); - m1[ 3] = _mm256_sub_epi16( m2[ 2], m2[ 3] ); - m1[ 4] = _mm256_add_epi16( m2[ 4], m2[ 5] ); - m1[ 5] = _mm256_sub_epi16( m2[ 4], m2[ 5] ); - m1[ 6] = _mm256_add_epi16( m2[ 6], m2[ 7] ); - m1[ 7] = _mm256_sub_epi16( m2[ 6], m2[ 7] ); - m1[ 8] = _mm256_add_epi16( m2[ 8], m2[ 9] ); - m1[ 9] = _mm256_sub_epi16( m2[ 8], m2[ 9] ); - m1[10] = _mm256_add_epi16( m2[10], m2[11] ); - m1[11] = _mm256_sub_epi16( m2[10], m2[11] ); - m1[12] = _mm256_add_epi16( m2[12], m2[13] ); - m1[13] = _mm256_sub_epi16( m2[12], m2[13] ); - m1[14] = _mm256_add_epi16( m2[14], m2[15] ); - m1[15] = _mm256_sub_epi16( m2[14], m2[15] ); - + m2[ 0] = _mm256_add_epi32( m1[0], m1[ 8] ); + m2[ 1] = _mm256_add_epi32( m1[1], m1[ 9] ); + m2[ 2] = _mm256_add_epi32( m1[2], m1[10] ); + m2[ 3] = _mm256_add_epi32( m1[3], m1[11] ); + m2[ 4] = _mm256_add_epi32( m1[4], m1[12] ); + m2[ 5] = _mm256_add_epi32( m1[5], m1[13] ); + m2[ 6] = _mm256_add_epi32( m1[6], m1[14] ); + m2[ 7] = _mm256_add_epi32( m1[7], m1[15] ); + m2[ 8] = _mm256_sub_epi32( m1[0], m1[ 8] ); + m2[ 9] = _mm256_sub_epi32( m1[1], m1[ 9] ); + m2[10] = _mm256_sub_epi32( m1[2], m1[10] ); + m2[11] = _mm256_sub_epi32( m1[3], m1[11] ); + m2[12] = _mm256_sub_epi32( m1[4], m1[12] ); + m2[13] = _mm256_sub_epi32( m1[5], m1[13] ); + m2[14] = _mm256_sub_epi32( m1[6], m1[14] ); + m2[15] = _mm256_sub_epi32( m1[7], m1[15] ); + + m1[ 0] = _mm256_add_epi32( m2[ 0], m2[ 4] ); + m1[ 1] = _mm256_add_epi32( m2[ 1], m2[ 5] ); + m1[ 2] = _mm256_add_epi32( m2[ 2], m2[ 6] ); + m1[ 3] = _mm256_add_epi32( m2[ 3], m2[ 7] ); + m1[ 4] = _mm256_sub_epi32( m2[ 0], m2[ 4] ); + m1[ 5] = _mm256_sub_epi32( m2[ 1], m2[ 5] ); + m1[ 6] = _mm256_sub_epi32( m2[ 2], m2[ 6] ); + m1[ 7] = _mm256_sub_epi32( m2[ 3], m2[ 7] ); + m1[ 8] = _mm256_add_epi32( m2[ 8], m2[12] ); + m1[ 9] = _mm256_add_epi32( m2[ 9], m2[13] ); + m1[10] = _mm256_add_epi32( m2[10], m2[14] ); + m1[11] = _mm256_add_epi32( m2[11], m2[15] ); + m1[12] = _mm256_sub_epi32( m2[ 8], m2[12] ); + m1[13] = _mm256_sub_epi32( m2[ 9], m2[13] ); + m1[14] = _mm256_sub_epi32( m2[10], m2[14] ); + m1[15] = _mm256_sub_epi32( m2[11], m2[15] ); + + m2[ 0] = _mm256_add_epi32( m1[ 0], m1[ 2] ); + m2[ 1] = _mm256_add_epi32( m1[ 1], m1[ 3] ); + m2[ 2] = _mm256_sub_epi32( m1[ 0], m1[ 2] ); + m2[ 3] = _mm256_sub_epi32( m1[ 1], m1[ 3] ); + m2[ 4] = _mm256_add_epi32( m1[ 4], m1[ 6] ); + m2[ 5] = _mm256_add_epi32( m1[ 5], m1[ 7] ); + m2[ 6] = _mm256_sub_epi32( m1[ 4], m1[ 6] ); + m2[ 7] = _mm256_sub_epi32( m1[ 5], m1[ 7] ); + m2[ 8] = _mm256_add_epi32( m1[ 8], m1[10] ); + m2[ 9] = _mm256_add_epi32( m1[ 9], m1[11] ); + m2[10] = _mm256_sub_epi32( m1[ 8], m1[10] ); + m2[11] = _mm256_sub_epi32( m1[ 9], m1[11] ); + m2[12] = _mm256_add_epi32( m1[12], m1[14] ); + m2[13] = _mm256_add_epi32( m1[13], m1[15] ); + m2[14] = _mm256_sub_epi32( m1[12], m1[14] ); + m2[15] = _mm256_sub_epi32( m1[13], m1[15] ); + + m1[ 0] = _mm256_add_epi32( m2[ 0], m2[ 1] ); + m1[ 1] = _mm256_sub_epi32( m2[ 0], m2[ 1] ); + m1[ 2] = _mm256_add_epi32( m2[ 2], m2[ 3] ); + m1[ 3] = _mm256_sub_epi32( m2[ 2], m2[ 3] ); + m1[ 4] = _mm256_add_epi32( m2[ 4], m2[ 5] ); + m1[ 5] = _mm256_sub_epi32( m2[ 4], m2[ 5] ); + m1[ 6] = _mm256_add_epi32( m2[ 6], m2[ 7] ); + m1[ 7] = _mm256_sub_epi32( m2[ 6], m2[ 7] ); + m1[ 8] = _mm256_add_epi32( m2[ 8], m2[ 9] ); + m1[ 9] = _mm256_sub_epi32( m2[ 8], m2[ 9] ); + m1[10] = _mm256_add_epi32( m2[10], m2[11] ); + m1[11] = _mm256_sub_epi32( m2[10], m2[11] ); + m1[12] = _mm256_add_epi32( m2[12], m2[13] ); + m1[13] = _mm256_sub_epi32( m2[12], m2[13] ); + m1[14] = _mm256_add_epi32( m2[14], m2[15] ); + m1[15] = _mm256_sub_epi32( m2[14], m2[15] ); // transpose - // 1. 8x8 - m2[0] = _mm256_unpacklo_epi16( m1[0], m1[1] ); - m2[1] = _mm256_unpacklo_epi16( m1[2], m1[3] ); - m2[2] = _mm256_unpacklo_epi16( m1[4], m1[5] ); - m2[3] = _mm256_unpacklo_epi16( m1[6], m1[7] ); - m2[4] = _mm256_unpackhi_epi16( m1[0], m1[1] ); - m2[5] = _mm256_unpackhi_epi16( m1[2], m1[3] ); - m2[6] = _mm256_unpackhi_epi16( m1[4], m1[5] ); - m2[7] = _mm256_unpackhi_epi16( m1[6], m1[7] ); + constexpr int perm_unpacklo_epi128 = ( 0 << 0 ) + ( 2 << 4 ); + constexpr int perm_unpackhi_epi128 = ( 1 << 0 ) + ( 3 << 4 ); - m1[0] = _mm256_unpacklo_epi32( m2[0], m2[1] ); - m1[1] = _mm256_unpackhi_epi32( m2[0], m2[1] ); - m1[2] = _mm256_unpacklo_epi32( m2[2], m2[3] ); - m1[3] = _mm256_unpackhi_epi32( m2[2], m2[3] ); - m1[4] = _mm256_unpacklo_epi32( m2[4], m2[5] ); - m1[5] = _mm256_unpackhi_epi32( m2[4], m2[5] ); - m1[6] = _mm256_unpacklo_epi32( m2[6], m2[7] ); - m1[7] = _mm256_unpackhi_epi32( m2[6], m2[7] ); + // 1. 8x8 + m2[0] = _mm256_unpacklo_epi32( m1[0], m1[1] ); + m2[1] = _mm256_unpacklo_epi32( m1[2], m1[3] ); + m2[2] = _mm256_unpacklo_epi32( m1[4], m1[5] ); + m2[3] = _mm256_unpacklo_epi32( m1[6], m1[7] ); + m2[4] = _mm256_unpackhi_epi32( m1[0], m1[1] ); + m2[5] = _mm256_unpackhi_epi32( m1[2], m1[3] ); + m2[6] = _mm256_unpackhi_epi32( m1[4], m1[5] ); + m2[7] = _mm256_unpackhi_epi32( m1[6], m1[7] ); - m2[0] = _mm256_unpacklo_epi64( m1[0], m1[2] ); - m2[1] = _mm256_unpackhi_epi64( m1[0], m1[2] ); - m2[2] = _mm256_unpacklo_epi64( m1[1], m1[3] ); - m2[3] = _mm256_unpackhi_epi64( m1[1], m1[3] ); - m2[4] = _mm256_unpacklo_epi64( m1[4], m1[6] ); - m2[5] = _mm256_unpackhi_epi64( m1[4], m1[6] ); - m2[6] = _mm256_unpacklo_epi64( m1[5], m1[7] ); - m2[7] = _mm256_unpackhi_epi64( m1[5], m1[7] ); + m1[0] = _mm256_unpacklo_epi64( m2[0], m2[1] ); + m1[1] = _mm256_unpackhi_epi64( m2[0], m2[1] ); + m1[2] = _mm256_unpacklo_epi64( m2[2], m2[3] ); + m1[3] = _mm256_unpackhi_epi64( m2[2], m2[3] ); + m1[4] = _mm256_unpacklo_epi64( m2[4], m2[5] ); + m1[5] = _mm256_unpackhi_epi64( m2[4], m2[5] ); + m1[6] = _mm256_unpacklo_epi64( m2[6], m2[7] ); + m1[7] = _mm256_unpackhi_epi64( m2[6], m2[7] ); + + m2[0] = _mm256_permute2x128_si256( m1[0], m1[2], perm_unpacklo_epi128 ); + m2[1] = _mm256_permute2x128_si256( m1[0], m1[2], perm_unpackhi_epi128 ); + m2[2] = _mm256_permute2x128_si256( m1[1], m1[3], perm_unpacklo_epi128 ); + m2[3] = _mm256_permute2x128_si256( m1[1], m1[3], perm_unpackhi_epi128 ); + m2[4] = _mm256_permute2x128_si256( m1[4], m1[6], perm_unpacklo_epi128 ); + m2[5] = _mm256_permute2x128_si256( m1[4], m1[6], perm_unpackhi_epi128 ); + m2[6] = _mm256_permute2x128_si256( m1[5], m1[7], perm_unpacklo_epi128 ); + m2[7] = _mm256_permute2x128_si256( m1[5], m1[7], perm_unpackhi_epi128 ); // 2. 8x8 - m2[0+8] = _mm256_unpacklo_epi16( m1[0+8], m1[1+8] ); - m2[1+8] = _mm256_unpacklo_epi16( m1[2+8], m1[3+8] ); - m2[2+8] = _mm256_unpacklo_epi16( m1[4+8], m1[5+8] ); - m2[3+8] = _mm256_unpacklo_epi16( m1[6+8], m1[7+8] ); - m2[4+8] = _mm256_unpackhi_epi16( m1[0+8], m1[1+8] ); - m2[5+8] = _mm256_unpackhi_epi16( m1[2+8], m1[3+8] ); - m2[6+8] = _mm256_unpackhi_epi16( m1[4+8], m1[5+8] ); - m2[7+8] = _mm256_unpackhi_epi16( m1[6+8], m1[7+8] ); - - m1[0+8] = _mm256_unpacklo_epi32( m2[0+8], m2[1+8] ); - m1[1+8] = _mm256_unpackhi_epi32( m2[0+8], m2[1+8] ); - m1[2+8] = _mm256_unpacklo_epi32( m2[2+8], m2[3+8] ); - m1[3+8] = _mm256_unpackhi_epi32( m2[2+8], m2[3+8] ); - m1[4+8] = _mm256_unpacklo_epi32( m2[4+8], m2[5+8] ); - m1[5+8] = _mm256_unpackhi_epi32( m2[4+8], m2[5+8] ); - m1[6+8] = _mm256_unpacklo_epi32( m2[6+8], m2[7+8] ); - m1[7+8] = _mm256_unpackhi_epi32( m2[6+8], m2[7+8] ); - - m2[0+8] = _mm256_unpacklo_epi64( m1[0+8], m1[2+8] ); - m2[1+8] = _mm256_unpackhi_epi64( m1[0+8], m1[2+8] ); - m2[2+8] = _mm256_unpacklo_epi64( m1[1+8], m1[3+8] ); - m2[3+8] = _mm256_unpackhi_epi64( m1[1+8], m1[3+8] ); - m2[4+8] = _mm256_unpacklo_epi64( m1[4+8], m1[6+8] ); - m2[5+8] = _mm256_unpackhi_epi64( m1[4+8], m1[6+8] ); - m2[6+8] = _mm256_unpacklo_epi64( m1[5+8], m1[7+8] ); - m2[7+8] = _mm256_unpackhi_epi64( m1[5+8], m1[7+8] ); - + m2[0+8] = _mm256_unpacklo_epi32( m1[0+8], m1[1+8] ); + m2[1+8] = _mm256_unpacklo_epi32( m1[2+8], m1[3+8] ); + m2[2+8] = _mm256_unpacklo_epi32( m1[4+8], m1[5+8] ); + m2[3+8] = _mm256_unpacklo_epi32( m1[6+8], m1[7+8] ); + m2[4+8] = _mm256_unpackhi_epi32( m1[0+8], m1[1+8] ); + m2[5+8] = _mm256_unpackhi_epi32( m1[2+8], m1[3+8] ); + m2[6+8] = _mm256_unpackhi_epi32( m1[4+8], m1[5+8] ); + m2[7+8] = _mm256_unpackhi_epi32( m1[6+8], m1[7+8] ); + + m1[0+8] = _mm256_unpacklo_epi64( m2[0+8], m2[1+8] ); + m1[1+8] = _mm256_unpackhi_epi64( m2[0+8], m2[1+8] ); + m1[2+8] = _mm256_unpacklo_epi64( m2[2+8], m2[3+8] ); + m1[3+8] = _mm256_unpackhi_epi64( m2[2+8], m2[3+8] ); + m1[4+8] = _mm256_unpacklo_epi64( m2[4+8], m2[5+8] ); + m1[5+8] = _mm256_unpackhi_epi64( m2[4+8], m2[5+8] ); + m1[6+8] = _mm256_unpacklo_epi64( m2[6+8], m2[7+8] ); + m1[7+8] = _mm256_unpackhi_epi64( m2[6+8], m2[7+8] ); + + m2[0+8] = _mm256_permute2x128_si256( m1[0+8], m1[2+8], perm_unpacklo_epi128 ); + m2[1+8] = _mm256_permute2x128_si256( m1[0+8], m1[2+8], perm_unpackhi_epi128 ); + m2[2+8] = _mm256_permute2x128_si256( m1[1+8], m1[3+8], perm_unpacklo_epi128 ); + m2[3+8] = _mm256_permute2x128_si256( m1[1+8], m1[3+8], perm_unpackhi_epi128 ); + m2[4+8] = _mm256_permute2x128_si256( m1[4+8], m1[6+8], perm_unpacklo_epi128 ); + m2[5+8] = _mm256_permute2x128_si256( m1[4+8], m1[6+8], perm_unpackhi_epi128 ); + m2[6+8] = _mm256_permute2x128_si256( m1[5+8], m1[7+8], perm_unpacklo_epi128 ); + m2[7+8] = _mm256_permute2x128_si256( m1[5+8], m1[7+8], perm_unpackhi_epi128 ); // horizontal - if( iBitDepth >= 10 ) - { - // extend to 32bit - //for( int j = 0; j < 16; j+=8 ) - { - for( int i = 0; i < 8; i++ ) - { - m2[i] = _mm256_cvtepi16_epi32( _mm256_castsi256_si128( m2[i] ) ); - } - - m1[0] = _mm256_add_epi32( m2[0], m2[4] ); - m1[1] = _mm256_add_epi32( m2[1], m2[5] ); - m1[2] = _mm256_add_epi32( m2[2], m2[6] ); - m1[3] = _mm256_add_epi32( m2[3], m2[7] ); - m1[4] = _mm256_sub_epi32( m2[0], m2[4] ); - m1[5] = _mm256_sub_epi32( m2[1], m2[5] ); - m1[6] = _mm256_sub_epi32( m2[2], m2[6] ); - m1[7] = _mm256_sub_epi32( m2[3], m2[7] ); - - m2[0] = _mm256_add_epi32( m1[0], m1[2] ); - m2[1] = _mm256_add_epi32( m1[1], m1[3] ); - m2[2] = _mm256_sub_epi32( m1[0], m1[2] ); - m2[3] = _mm256_sub_epi32( m1[1], m1[3] ); - m2[4] = _mm256_add_epi32( m1[4], m1[6] ); - m2[5] = _mm256_add_epi32( m1[5], m1[7] ); - m2[6] = _mm256_sub_epi32( m1[4], m1[6] ); - m2[7] = _mm256_sub_epi32( m1[5], m1[7] ); - - m1[0] = _mm256_abs_epi32( _mm256_add_epi32( m2[0], m2[1] ) ); - m1[1] = _mm256_abs_epi32( _mm256_sub_epi32( m2[0], m2[1] ) ); - m1[2] = _mm256_abs_epi32( _mm256_add_epi32( m2[2], m2[3] ) ); - m1[3] = _mm256_abs_epi32( _mm256_sub_epi32( m2[2], m2[3] ) ); - m1[4] = _mm256_abs_epi32( _mm256_add_epi32( m2[4], m2[5] ) ); - m1[5] = _mm256_abs_epi32( _mm256_sub_epi32( m2[4], m2[5] ) ); - m1[6] = _mm256_abs_epi32( _mm256_add_epi32( m2[6], m2[7] ) ); - m1[7] = _mm256_abs_epi32( _mm256_sub_epi32( m2[6], m2[7] ) ); - - for( int i = 0; i < 8; i++ ) - { - m2[i+8] = _mm256_cvtepi16_epi32( _mm256_castsi256_si128( m2[i+8] ) ); - } + m1[0] = _mm256_add_epi32( m2[0], m2[4] ); + m1[1] = _mm256_add_epi32( m2[1], m2[5] ); + m1[2] = _mm256_add_epi32( m2[2], m2[6] ); + m1[3] = _mm256_add_epi32( m2[3], m2[7] ); + m1[4] = _mm256_sub_epi32( m2[0], m2[4] ); + m1[5] = _mm256_sub_epi32( m2[1], m2[5] ); + m1[6] = _mm256_sub_epi32( m2[2], m2[6] ); + m1[7] = _mm256_sub_epi32( m2[3], m2[7] ); + + m2[0] = _mm256_add_epi32( m1[0], m1[2] ); + m2[1] = _mm256_add_epi32( m1[1], m1[3] ); + m2[2] = _mm256_sub_epi32( m1[0], m1[2] ); + m2[3] = _mm256_sub_epi32( m1[1], m1[3] ); + m2[4] = _mm256_add_epi32( m1[4], m1[6] ); + m2[5] = _mm256_add_epi32( m1[5], m1[7] ); + m2[6] = _mm256_sub_epi32( m1[4], m1[6] ); + m2[7] = _mm256_sub_epi32( m1[5], m1[7] ); + + m1[0] = _mm256_abs_epi32( _mm256_add_epi32( m2[0], m2[1] ) ); + m1[1] = _mm256_abs_epi32( _mm256_sub_epi32( m2[0], m2[1] ) ); + m1[2] = _mm256_abs_epi32( _mm256_add_epi32( m2[2], m2[3] ) ); + m1[3] = _mm256_abs_epi32( _mm256_sub_epi32( m2[2], m2[3] ) ); + m1[4] = _mm256_abs_epi32( _mm256_add_epi32( m2[4], m2[5] ) ); + m1[5] = _mm256_abs_epi32( _mm256_sub_epi32( m2[4], m2[5] ) ); + m1[6] = _mm256_abs_epi32( _mm256_add_epi32( m2[6], m2[7] ) ); + m1[7] = _mm256_abs_epi32( _mm256_sub_epi32( m2[6], m2[7] ) ); + + m1[0 + 8] = _mm256_add_epi32( m2[0 + 8], m2[4 + 8] ); + m1[1 + 8] = _mm256_add_epi32( m2[1 + 8], m2[5 + 8] ); + m1[2 + 8] = _mm256_add_epi32( m2[2 + 8], m2[6 + 8] ); + m1[3 + 8] = _mm256_add_epi32( m2[3 + 8], m2[7 + 8] ); + m1[4 + 8] = _mm256_sub_epi32( m2[0 + 8], m2[4 + 8] ); + m1[5 + 8] = _mm256_sub_epi32( m2[1 + 8], m2[5 + 8] ); + m1[6 + 8] = _mm256_sub_epi32( m2[2 + 8], m2[6 + 8] ); + m1[7 + 8] = _mm256_sub_epi32( m2[3 + 8], m2[7 + 8] ); + + m2[0 + 8] = _mm256_add_epi32( m1[0 + 8], m1[2 + 8] ); + m2[1 + 8] = _mm256_add_epi32( m1[1 + 8], m1[3 + 8] ); + m2[2 + 8] = _mm256_sub_epi32( m1[0 + 8], m1[2 + 8] ); + m2[3 + 8] = _mm256_sub_epi32( m1[1 + 8], m1[3 + 8] ); + m2[4 + 8] = _mm256_add_epi32( m1[4 + 8], m1[6 + 8] ); + m2[5 + 8] = _mm256_add_epi32( m1[5 + 8], m1[7 + 8] ); + m2[6 + 8] = _mm256_sub_epi32( m1[4 + 8], m1[6 + 8] ); + m2[7 + 8] = _mm256_sub_epi32( m1[5 + 8], m1[7 + 8] ); + + m1[0 + 8] = _mm256_abs_epi32( _mm256_add_epi32( m2[0 + 8], m2[1 + 8] ) ); + m1[1 + 8] = _mm256_abs_epi32( _mm256_sub_epi32( m2[0 + 8], m2[1 + 8] ) ); + m1[2 + 8] = _mm256_abs_epi32( _mm256_add_epi32( m2[2 + 8], m2[3 + 8] ) ); + m1[3 + 8] = _mm256_abs_epi32( _mm256_sub_epi32( m2[2 + 8], m2[3 + 8] ) ); + m1[4 + 8] = _mm256_abs_epi32( _mm256_add_epi32( m2[4 + 8], m2[5 + 8] ) ); + m1[5 + 8] = _mm256_abs_epi32( _mm256_sub_epi32( m2[4 + 8], m2[5 + 8] ) ); + m1[6 + 8] = _mm256_abs_epi32( _mm256_add_epi32( m2[6 + 8], m2[7 + 8] ) ); + m1[7 + 8] = _mm256_abs_epi32( _mm256_sub_epi32( m2[6 + 8], m2[7 + 8] ) ); - m1[0 + 8] = _mm256_add_epi32( m2[0 + 8], m2[4 + 8] ); - m1[1 + 8] = _mm256_add_epi32( m2[1 + 8], m2[5 + 8] ); - m1[2 + 8] = _mm256_add_epi32( m2[2 + 8], m2[6 + 8] ); - m1[3 + 8] = _mm256_add_epi32( m2[3 + 8], m2[7 + 8] ); - m1[4 + 8] = _mm256_sub_epi32( m2[0 + 8], m2[4 + 8] ); - m1[5 + 8] = _mm256_sub_epi32( m2[1 + 8], m2[5 + 8] ); - m1[6 + 8] = _mm256_sub_epi32( m2[2 + 8], m2[6 + 8] ); - m1[7 + 8] = _mm256_sub_epi32( m2[3 + 8], m2[7 + 8] ); - - m2[0 + 8] = _mm256_add_epi32( m1[0 + 8], m1[2 + 8] ); - m2[1 + 8] = _mm256_add_epi32( m1[1 + 8], m1[3 + 8] ); - m2[2 + 8] = _mm256_sub_epi32( m1[0 + 8], m1[2 + 8] ); - m2[3 + 8] = _mm256_sub_epi32( m1[1 + 8], m1[3 + 8] ); - m2[4 + 8] = _mm256_add_epi32( m1[4 + 8], m1[6 + 8] ); - m2[5 + 8] = _mm256_add_epi32( m1[5 + 8], m1[7 + 8] ); - m2[6 + 8] = _mm256_sub_epi32( m1[4 + 8], m1[6 + 8] ); - m2[7 + 8] = _mm256_sub_epi32( m1[5 + 8], m1[7 + 8] ); - - m1[0 + 8] = _mm256_abs_epi32( _mm256_add_epi32( m2[0 + 8], m2[1 + 8] ) ); - m1[1 + 8] = _mm256_abs_epi32( _mm256_sub_epi32( m2[0 + 8], m2[1 + 8] ) ); - m1[2 + 8] = _mm256_abs_epi32( _mm256_add_epi32( m2[2 + 8], m2[3 + 8] ) ); - m1[3 + 8] = _mm256_abs_epi32( _mm256_sub_epi32( m2[2 + 8], m2[3 + 8] ) ); - m1[4 + 8] = _mm256_abs_epi32( _mm256_add_epi32( m2[4 + 8], m2[5 + 8] ) ); - m1[5 + 8] = _mm256_abs_epi32( _mm256_sub_epi32( m2[4 + 8], m2[5 + 8] ) ); - m1[6 + 8] = _mm256_abs_epi32( _mm256_add_epi32( m2[6 + 8], m2[7 + 8] ) ); - m1[7 + 8] = _mm256_abs_epi32( _mm256_sub_epi32( m2[6 + 8], m2[7 + 8] ) ); - } - // sum up - m1[0] = _mm256_add_epi32( m1[0], m1[1] ); - m1[1] = _mm256_add_epi32( m1[2], m1[3] ); - m1[2] = _mm256_add_epi32( m1[4], m1[5] ); - m1[3] = _mm256_add_epi32( m1[6], m1[7] ); - m1[4] = _mm256_add_epi32( m1[8], m1[9] ); - m1[5] = _mm256_add_epi32( m1[10], m1[11] ); - m1[6] = _mm256_add_epi32( m1[12], m1[13] ); - m1[7] = _mm256_add_epi32( m1[14], m1[15] ); - } - else - { - // 16x8 - m2[0] = _mm256_permute2x128_si256( m2[0], m2[0 + 8], 0x20 ); - m2[1] = _mm256_permute2x128_si256( m2[1], m2[1 + 8], 0x20 ); - m2[2] = _mm256_permute2x128_si256( m2[2], m2[2 + 8], 0x20 ); - m2[3] = _mm256_permute2x128_si256( m2[3], m2[3 + 8], 0x20 ); - m2[4] = _mm256_permute2x128_si256( m2[4], m2[4 + 8], 0x20 ); - m2[5] = _mm256_permute2x128_si256( m2[5], m2[5 + 8], 0x20 ); - m2[6] = _mm256_permute2x128_si256( m2[6], m2[6 + 8], 0x20 ); - m2[7] = _mm256_permute2x128_si256( m2[7], m2[7 + 8], 0x20 ); - - m1[0] = _mm256_add_epi16( m2[0], m2[4] ); - m1[1] = _mm256_add_epi16( m2[1], m2[5] ); - m1[2] = _mm256_add_epi16( m2[2], m2[6] ); - m1[3] = _mm256_add_epi16( m2[3], m2[7] ); - m1[4] = _mm256_sub_epi16( m2[0], m2[4] ); - m1[5] = _mm256_sub_epi16( m2[1], m2[5] ); - m1[6] = _mm256_sub_epi16( m2[2], m2[6] ); - m1[7] = _mm256_sub_epi16( m2[3], m2[7] ); - - m2[0] = _mm256_add_epi16( m1[0], m1[2] ); - m2[1] = _mm256_add_epi16( m1[1], m1[3] ); - m2[2] = _mm256_sub_epi16( m1[0], m1[2] ); - m2[3] = _mm256_sub_epi16( m1[1], m1[3] ); - m2[4] = _mm256_add_epi16( m1[4], m1[6] ); - m2[5] = _mm256_add_epi16( m1[5], m1[7] ); - m2[6] = _mm256_sub_epi16( m1[4], m1[6] ); - m2[7] = _mm256_sub_epi16( m1[5], m1[7] ); - - m1[0] = _mm256_abs_epi16( _mm256_add_epi16( m2[0], m2[1] ) ); - m1[1] = _mm256_abs_epi16( _mm256_sub_epi16( m2[0], m2[1] ) ); - m1[2] = _mm256_abs_epi16( _mm256_add_epi16( m2[2], m2[3] ) ); - m1[3] = _mm256_abs_epi16( _mm256_sub_epi16( m2[2], m2[3] ) ); - m1[4] = _mm256_abs_epi16( _mm256_add_epi16( m2[4], m2[5] ) ); - m1[5] = _mm256_abs_epi16( _mm256_sub_epi16( m2[4], m2[5] ) ); - m1[6] = _mm256_abs_epi16( _mm256_add_epi16( m2[6], m2[7] ) ); - m1[7] = _mm256_abs_epi16( _mm256_sub_epi16( m2[6], m2[7] ) ); - - __m256i ma1, ma2; - - for( int i = 0; i < 8; i++ ) - { - ma1 = _mm256_unpacklo_epi16( m1[i], vzero ); - ma2 = _mm256_unpackhi_epi16( m1[i], vzero ); - m1[i] = _mm256_add_epi32( ma1, ma2 ); - } - } + // sum up + m1[0] = _mm256_add_epi32( m1[0], m1[1] ); + m1[1] = _mm256_add_epi32( m1[2], m1[3] ); + m1[2] = _mm256_add_epi32( m1[4], m1[5] ); + m1[3] = _mm256_add_epi32( m1[6], m1[7] ); + m1[4] = _mm256_add_epi32( m1[8], m1[9] ); + m1[5] = _mm256_add_epi32( m1[10], m1[11] ); + m1[6] = _mm256_add_epi32( m1[12], m1[13] ); + m1[7] = _mm256_add_epi32( m1[14], m1[15] ); // sum up m1[ 0] = _mm256_add_epi32( m1[ 0], m1[ 1] ); @@ -2286,7 +1974,7 @@ static uint32_t xCalcHAD8x16_AVX2( const Torg *piOrg, const Tcur *piCur, const i } -template< typename Torg, typename Tcur, X86_VEXT vext > +template<X86_VEXT vext> Distortion RdCost::xGetHADs_SIMD( const DistParam &rcDtParam ) { if( rcDtParam.bitDepth > 10 || rcDtParam.applyWeight ) @@ -2294,8 +1982,8 @@ Distortion RdCost::xGetHADs_SIMD( const DistParam &rcDtParam ) return RdCost::xGetHADs( rcDtParam ); } - const Torg* piOrg = (const Torg*)rcDtParam.org.buf; - const Tcur* piCur = (const Tcur*)rcDtParam.cur.buf; + const Pel* piOrg = rcDtParam.org.buf; + const Pel* piCur = rcDtParam.cur.buf; const int iRows = rcDtParam.org.height; const int iCols = rcDtParam.org.width; const int iStrideCur = rcDtParam.cur.stride; @@ -2312,9 +2000,9 @@ Distortion RdCost::xGetHADs_SIMD( const DistParam &rcDtParam ) for( x = 0; x < iCols; x += 16 ) { if( vext >= AVX2 ) - uiSum += xCalcHAD16x8_AVX2<Torg, Tcur>( &piOrg[x], &piCur[x], iStrideOrg, iStrideCur, iBitDepth ); + uiSum += xCalcHAD16x8_AVX2( &piOrg[x], &piCur[x], iStrideOrg, iStrideCur, iBitDepth ); else - uiSum += xCalcHAD16x8_SSE<Torg, Tcur>( &piOrg[x], &piCur[x], iStrideOrg, iStrideCur, iBitDepth ); + uiSum += xCalcHAD16x8_SSE( &piOrg[x], &piCur[x], iStrideOrg, iStrideCur, iBitDepth ); } piOrg += iStrideOrg * 8; piCur += iStrideCur * 8; @@ -2327,9 +2015,9 @@ Distortion RdCost::xGetHADs_SIMD( const DistParam &rcDtParam ) for( x = 0; x < iCols; x += 8 ) { if( vext >= AVX2 ) - uiSum += xCalcHAD8x16_AVX2<Torg, Tcur>( &piOrg[x], &piCur[x], iStrideOrg, iStrideCur, iBitDepth ); + uiSum += xCalcHAD8x16_AVX2( &piOrg[x], &piCur[x], iStrideOrg, iStrideCur, iBitDepth ); else - uiSum += xCalcHAD8x16_SSE<Torg, Tcur>( &piOrg[x], &piCur[x], iStrideOrg, iStrideCur, iBitDepth ); + uiSum += xCalcHAD8x16_SSE( &piOrg[x], &piCur[x], iStrideOrg, iStrideCur, iBitDepth ); } piOrg += iStrideOrg * 16; piCur += iStrideCur * 16; @@ -2341,7 +2029,7 @@ Distortion RdCost::xGetHADs_SIMD( const DistParam &rcDtParam ) { for( x = 0; x < iCols; x += 8 ) { - uiSum += xCalcHAD8x4_SSE<Torg, Tcur>( &piOrg[x], &piCur[x], iStrideOrg, iStrideCur, iBitDepth ); + uiSum += xCalcHAD8x4_SSE( &piOrg[x], &piCur[x], iStrideOrg, iStrideCur, iBitDepth ); } piOrg += iStrideOrg * 4; piCur += iStrideCur * 4; @@ -2367,7 +2055,7 @@ Distortion RdCost::xGetHADs_SIMD( const DistParam &rcDtParam ) { for( x = 0; x < iCols; x += 16 ) { - uiSum += xCalcHAD16x16_AVX2<Torg, Tcur>( &piOrg[x], &piCur[x], iStrideOrg, iStrideCur, iBitDepth ); + uiSum += xCalcHAD16x16_AVX2( &piOrg[x], &piCur[x], iStrideOrg, iStrideCur, iBitDepth ); } piOrg += iOffsetOrg; piCur += iOffsetCur; @@ -2381,7 +2069,7 @@ Distortion RdCost::xGetHADs_SIMD( const DistParam &rcDtParam ) { for( x = 0; x < iCols; x += 8 ) { - uiSum += xCalcHAD8x8_SSE<Torg, Tcur>( &piOrg[x], &piCur[x], iStrideOrg, iStrideCur, iBitDepth ); + uiSum += xCalcHAD8x8_SSE( &piOrg[x], &piCur[x], iStrideOrg, iStrideCur, iBitDepth ); } piOrg += iOffsetOrg; piCur += iOffsetCur; @@ -2410,7 +2098,7 @@ Distortion RdCost::xGetHADs_SIMD( const DistParam &rcDtParam ) { for( x = 0; x < iCols; x += 2 ) { - uiSum += xCalcHADs2x2( (Torg*)&piOrg[x], (Tcur*)&piCur[x*rcDtParam.step], iStrideOrg, iStrideCur, rcDtParam.step ); + uiSum += xCalcHADs2x2( &piOrg[x], &piCur[x*rcDtParam.step], iStrideOrg, iStrideCur, rcDtParam.step ); } piOrg += iOffsetOrg; piCur += iOffsetCur; @@ -2421,6 +2109,7 @@ Distortion RdCost::xGetHADs_SIMD( const DistParam &rcDtParam ) THROW( "Unsupported size" ); } + return uiSum >> DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth); } @@ -2451,14 +2140,14 @@ void RdCost::_initRdCostX86() m_afpDistortFunc[DF_SAD24 ] = RdCost::xGetSAD_SIMD<vext>; m_afpDistortFunc[DF_SAD48 ] = RdCost::xGetSAD_SIMD<vext>; - m_afpDistortFunc[DF_HAD] = RdCost::xGetHADs_SIMD<Pel, Pel, vext>; - m_afpDistortFunc[DF_HAD2] = RdCost::xGetHADs_SIMD<Pel, Pel, vext>; - m_afpDistortFunc[DF_HAD4] = RdCost::xGetHADs_SIMD<Pel, Pel, vext>; - m_afpDistortFunc[DF_HAD8] = RdCost::xGetHADs_SIMD<Pel, Pel, vext>; - m_afpDistortFunc[DF_HAD16] = RdCost::xGetHADs_SIMD<Pel, Pel, vext>; - m_afpDistortFunc[DF_HAD32] = RdCost::xGetHADs_SIMD<Pel, Pel, vext>; - m_afpDistortFunc[DF_HAD64] = RdCost::xGetHADs_SIMD<Pel, Pel, vext>; - m_afpDistortFunc[DF_HAD16N] = RdCost::xGetHADs_SIMD<Pel, Pel, vext>; + m_afpDistortFunc[DF_HAD] = RdCost::xGetHADs_SIMD<vext>; + m_afpDistortFunc[DF_HAD2] = RdCost::xGetHADs_SIMD<vext>; + m_afpDistortFunc[DF_HAD4] = RdCost::xGetHADs_SIMD<vext>; + m_afpDistortFunc[DF_HAD8] = RdCost::xGetHADs_SIMD<vext>; + m_afpDistortFunc[DF_HAD16] = RdCost::xGetHADs_SIMD<vext>; + m_afpDistortFunc[DF_HAD32] = RdCost::xGetHADs_SIMD<vext>; + m_afpDistortFunc[DF_HAD64] = RdCost::xGetHADs_SIMD<vext>; + m_afpDistortFunc[DF_HAD16N] = RdCost::xGetHADs_SIMD<vext>; m_afpDistortFunc[DF_SAD_INTERMEDIATE_BITDEPTH] = RdCost::xGetSAD_IBD_SIMD<vext>; } diff --git a/source/Lib/DecoderLib/AnnexBread.cpp b/source/Lib/DecoderLib/AnnexBread.cpp index 870e2b19381e9efcf5bf8f85664f44818717b85a..7058de923b32d00ecf690cabd1961393eda7a466 100644 --- a/source/Lib/DecoderLib/AnnexBread.cpp +++ b/source/Lib/DecoderLib/AnnexBread.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -79,7 +79,11 @@ _byteStreamNALUnit( { uint8_t leading_zero_8bits = bs.readByte(); #if RExt__DECODER_DEBUG_BIT_STATISTICS +#if EPBINCOUNT_FIX + statBits.bits+=8; +#else statBits.bits+=8; statBits.count++; +#endif #endif if(leading_zero_8bits != 0) { THROW( "Leading zero bits not zero" ); } stats.m_numLeadingZero8BitsBytes++; @@ -97,7 +101,11 @@ _byteStreamNALUnit( { uint8_t zero_byte = bs.readByte(); #if RExt__DECODER_DEBUG_BIT_STATISTICS +#if EPBINCOUNT_FIX + statBits.bits+=8; +#else statBits.bits+=8; statBits.count++; +#endif #endif CHECK( zero_byte != 0, "Zero byte not '0'" ); stats.m_numZeroByteBytes++; @@ -111,7 +119,11 @@ _byteStreamNALUnit( /* NB, (1) guarantees that the next three bytes are 0x00 00 01 */ uint32_t start_code_prefix_one_3bytes = bs.readBytes(24/8); #if RExt__DECODER_DEBUG_BIT_STATISTICS +#if EPBINCOUNT_FIX + statBits.bits+=24; +#else statBits.bits+=24; statBits.count+=3; +#endif #endif if(start_code_prefix_one_3bytes != 0x000001) { THROW( "Invalid code prefix" );} stats.m_numStartCodePrefixBytes += 3; @@ -163,7 +175,11 @@ _byteStreamNALUnit( { uint8_t trailing_zero_8bits = bs.readByte(); #if RExt__DECODER_DEBUG_BIT_STATISTICS +#if EPBINCOUNT_FIX + statBits.bits+=8; +#else statBits.bits+=8; statBits.count++; +#endif #endif CHECK( trailing_zero_8bits != 0, "Trailing zero bits not '0'" ); stats.m_numTrailingZero8BitsBytes++; diff --git a/source/Lib/DecoderLib/AnnexBread.h b/source/Lib/DecoderLib/AnnexBread.h index 659c4bc2c8fadbe9e92b0da9474a409f4b8ea6ad..6f9c7334d7133ccddc0c402c11542109d3e12b90 100644 --- a/source/Lib/DecoderLib/AnnexBread.h +++ b/source/Lib/DecoderLib/AnnexBread.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/source/Lib/DecoderLib/BinDecoder.cpp b/source/Lib/DecoderLib/BinDecoder.cpp index 49b3ea2f3a2d91734b2acdda7f3d881c1673c878..81d4783ba41efb3cc7bcf75c1d203ea2c5ee40bc 100644 --- a/source/Lib/DecoderLib/BinDecoder.cpp +++ b/source/Lib/DecoderLib/BinDecoder.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * -* Copyright (c) 2010-2019, ITU/ISO/IEC +* Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -180,48 +180,33 @@ unsigned BinDecoderBase::decodeBinsEP( unsigned numBins ) return bins; } -unsigned BinDecoderBase::decodeRemAbsEP( unsigned goRicePar, bool useLimitedPrefixLength, int maxLog2TrDynamicRange ) +unsigned BinDecoderBase::decodeRemAbsEP(unsigned goRicePar, unsigned cutoff, int maxLog2TrDynamicRange) { - unsigned cutoff = COEF_REMAIN_BIN_REDUCTION; unsigned prefix = 0; - useLimitedPrefixLength = true; - if( useLimitedPrefixLength ) { const unsigned maxPrefix = 32 - maxLog2TrDynamicRange; - unsigned codeWord = 0; + unsigned codeWord = 0; do { prefix++; codeWord = decodeBinEP(); - } - while( codeWord && prefix < maxPrefix ); + } while (codeWord && prefix < maxPrefix); prefix -= 1 - codeWord; } - else - { - while( decodeBinEP() ) - { - prefix++; - } - } + unsigned length = goRicePar, offset; - if( prefix < cutoff ) + if (prefix < cutoff) { - offset = prefix << goRicePar; + offset = prefix << goRicePar; } else { - offset = ( ( ( 1 << ( prefix - cutoff ) ) + cutoff - 1 ) << goRicePar ); - if( useLimitedPrefixLength ) + offset = (((1 << (prefix - cutoff)) + cutoff - 1) << goRicePar); { - length += ( prefix == ( 32 - maxLog2TrDynamicRange ) ? maxLog2TrDynamicRange - goRicePar : prefix - COEF_REMAIN_BIN_REDUCTION ); - } - else - { - length += ( prefix - cutoff ); + length += (prefix == (32 - maxLog2TrDynamicRange) ? maxLog2TrDynamicRange - goRicePar : prefix - cutoff); } } - return offset + decodeBinsEP( length ); + return offset + decodeBinsEP(length); } @@ -257,15 +242,6 @@ unsigned BinDecoderBase::decodeBinTrm() } -unsigned BinDecoderBase::decodeBinsPCM( unsigned numBins ) -{ - unsigned bins = 0; - m_Bitstream->read( numBins, bins ); -#if RExt__DECODER_DEBUG_BIT_STATISTICS - CodingStatistics::IncrementStatisticEP( STATS__CABAC_PCM_CODE_BITS, numBins, int(bins) ); -#endif - return bins; -} void BinDecoderBase::align() diff --git a/source/Lib/DecoderLib/BinDecoder.h b/source/Lib/DecoderLib/BinDecoder.h index 2e45c0d250b3e27208ffdc275f40f2dd5ead1cd8..11a4260974e322f69032b782d7f64522b93b686d 100644 --- a/source/Lib/DecoderLib/BinDecoder.h +++ b/source/Lib/DecoderLib/BinDecoder.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * -* Copyright (c) 2010-2019, ITU/ISO/IEC +* Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -71,9 +71,8 @@ public: public: unsigned decodeBinEP (); unsigned decodeBinsEP ( unsigned numBins ); - unsigned decodeRemAbsEP ( unsigned goRicePar, bool useLimitedPrefixLength, int maxLog2TrDynamicRange ); + unsigned decodeRemAbsEP ( unsigned goRicePar, unsigned cutoff, int maxLog2TrDynamicRange ); unsigned decodeBinTrm (); - unsigned decodeBinsPCM ( unsigned numBins ); void align (); unsigned getNumBitsRead () { return m_Bitstream->getNumBitsRead() + m_bitsNeeded; } private: diff --git a/source/Lib/DecoderLib/CABACReader.cpp b/source/Lib/DecoderLib/CABACReader.cpp index 6a5c179aee44bb4673824c41d683a9ba7a6d3838..17abe59be5eba95409a82b8364c6a4dcf08569d8 100644 --- a/source/Lib/DecoderLib/CABACReader.cpp +++ b/source/Lib/DecoderLib/CABACReader.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * -* Copyright (c) 2010-2019, ITU/ISO/IEC +* Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -130,45 +130,38 @@ void CABACReader::remaining_bytes( bool noTrailingBytesExpected ) //================================================================================ // clause 7.3.8.2 //-------------------------------------------------------------------------------- -// bool coding_tree_unit( cs, area, qpL, qpC, ctuRsAddr ) +// void coding_tree_unit( cs, area, qpL, qpC, ctuRsAddr ) //================================================================================ -bool CABACReader::coding_tree_unit( CodingStructure& cs, const UnitArea& area, int (&qps)[2], unsigned ctuRsAddr ) +void CABACReader::coding_tree_unit( CodingStructure& cs, const UnitArea& area, int (&qps)[2], unsigned ctuRsAddr ) { CUCtx cuCtx( qps[CH_L] ); - Partitioner *partitioner = PartitionerFactory::get( *cs.slice ); + QTBTPartitioner partitioner; - partitioner->initCtu( area, CH_L, *cs.slice ); + partitioner.initCtu(area, CH_L, *cs.slice); + cs.treeType = partitioner.treeType = TREE_D; + cs.modeType = partitioner.modeType = MODE_TYPE_ALL; sao( cs, ctuRsAddr ); - - if (cs.sps->getALFEnabledFlag() && (cs.slice->getTileGroupAlfEnabledFlag())) + if (cs.sps->getALFEnabledFlag() && (cs.slice->getTileGroupAlfEnabledFlag(COMPONENT_Y))) { - CHECK(cs.aps == nullptr, "APS not initialized"); - const AlfSliceParam& alfSliceParam = cs.aps->getAlfAPSParam(); - const PreCalcValues& pcv = *cs.pcv; int frame_width_in_ctus = pcv.widthInCtus; int ry = ctuRsAddr / frame_width_in_ctus; int rx = ctuRsAddr - ry * frame_width_in_ctus; const Position pos( rx * cs.pcv->maxCUWidth, ry * cs.pcv->maxCUHeight ); const uint32_t curSliceIdx = cs.slice->getIndependentSliceIdx(); -#if HEVC_TILES_WPP - const uint32_t curTileIdx = cs.picture->tileMap->getTileIdxMap( pos ); - bool leftAvail = cs.getCURestricted( pos.offset( -(int)pcv.maxCUWidth, 0 ), curSliceIdx, curTileIdx, CH_L ) ? true : false; - bool aboveAvail = cs.getCURestricted( pos.offset( 0, -(int)pcv.maxCUHeight ), curSliceIdx, curTileIdx, CH_L ) ? true : false; -#else - bool leftAvail = cs.getCURestricted( pos.offset( -(int)pcv.maxCUWidth, 0 ), curSliceIdx, CH_L ) ? true : false; - bool aboveAvail = cs.getCURestricted( pos.offset( 0, -(int)pcv.maxCUHeight ), curSliceIdx, CH_L ) ? true : false; -#endif + const uint32_t curTileIdx = cs.pps->getTileIdx( pos ); + bool leftAvail = cs.getCURestricted( pos.offset( -(int)pcv.maxCUWidth, 0 ), pos, curSliceIdx, curTileIdx, CH_L ) ? true : false; + bool aboveAvail = cs.getCURestricted( pos.offset( 0, -(int)pcv.maxCUHeight ), pos, curSliceIdx, curTileIdx, CH_L ) ? true : false; int leftCTUAddr = leftAvail ? ctuRsAddr - 1 : -1; int aboveCTUAddr = aboveAvail ? ctuRsAddr - frame_width_in_ctus : -1; for( int compIdx = 0; compIdx < MAX_NUM_COMPONENT; compIdx++ ) { - if( alfSliceParam.enabledFlag[compIdx] ) + if (cs.slice->getTileGroupAlfEnabledFlag((ComponentID)compIdx)) { uint8_t* ctbAlfFlag = cs.slice->getPic()->getAlfCtuEnableFlag( compIdx ); int ctx = 0; @@ -177,31 +170,50 @@ bool CABACReader::coding_tree_unit( CodingStructure& cs, const UnitArea& area, i RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET(STATS__CABAC_BITS__ALF); ctbAlfFlag[ctuRsAddr] = m_BinDecoder.decodeBin( Ctx::ctbAlfFlag( compIdx * 3 + ctx ) ); + + if (isLuma((ComponentID)compIdx) && ctbAlfFlag[ctuRsAddr]) + { + readAlfCtuFilterIndex(cs, ctuRsAddr); + } + if( isChroma( (ComponentID)compIdx ) ) + { + int apsIdx = cs.slice->getTileGroupApsIdChroma(); + CHECK(cs.slice->getAlfAPSs()[apsIdx] == nullptr, "APS not initialized"); + const AlfParam& alfParam = cs.slice->getAlfAPSs()[apsIdx]->getAlfAPSParam(); + const int numAlts = alfParam.numAlternativesChroma; + uint8_t* ctbAlfAlternative = cs.slice->getPic()->getAlfCtuAlternativeData( compIdx ); + ctbAlfAlternative[ctuRsAddr] = 0; + if( ctbAlfFlag[ctuRsAddr] ) + { + uint8_t decoded = 0; + while( decoded < numAlts-1 && m_BinDecoder.decodeBin( Ctx::ctbAlfAlternative( compIdx-1 ) ) ) + ++ decoded; + ctbAlfAlternative[ctuRsAddr] = decoded; + } + } } } } - bool isLast = false; if ( CS::isDualITree(cs) && cs.pcv->chrFormat != CHROMA_400 && cs.pcv->maxCUWidth > 64 ) { - Partitioner *chromaPartitioner = PartitionerFactory::get(*cs.slice); - chromaPartitioner->initCtu(area, CH_C, *cs.slice); + QTBTPartitioner chromaPartitioner; + chromaPartitioner.initCtu(area, CH_C, *cs.slice); CUCtx cuCtxChroma(qps[CH_C]); - isLast = coding_tree(cs, *partitioner, cuCtx, chromaPartitioner, &cuCtxChroma); + coding_tree(cs, partitioner, cuCtx, &chromaPartitioner, &cuCtxChroma); qps[CH_L] = cuCtx.qp; qps[CH_C] = cuCtxChroma.qp; - delete chromaPartitioner; } else { - isLast = coding_tree(cs, *partitioner, cuCtx); + coding_tree(cs, partitioner, cuCtx); qps[CH_L] = cuCtx.qp; - if( !isLast && CS::isDualITree( cs ) && cs.pcv->chrFormat != CHROMA_400 ) + if( CS::isDualITree( cs ) && cs.pcv->chrFormat != CHROMA_400 ) { CUCtx cuCtxChroma( qps[CH_C] ); - partitioner->initCtu( area, CH_C, *cs.slice ); - isLast = coding_tree( cs, *partitioner, cuCtxChroma ); + partitioner.initCtu(area, CH_C, *cs.slice); + coding_tree(cs, partitioner, cuCtxChroma); qps[CH_C] = cuCtxChroma.qp; } } @@ -209,10 +221,36 @@ bool CABACReader::coding_tree_unit( CodingStructure& cs, const UnitArea& area, i DTRACE_COND( ctuRsAddr == 0, g_trace_ctx, D_QP_PER_CTU, "\n%4d %2d", cs.picture->poc, cs.slice->getSliceQpBase() ); DTRACE ( g_trace_ctx, D_QP_PER_CTU, " %3d", qps[CH_L] - cs.slice->getSliceQpBase() ); - delete partitioner; - return isLast; } +void CABACReader::readAlfCtuFilterIndex(CodingStructure& cs, unsigned ctuRsAddr) +{ + short* alfCtbFilterSetIndex = cs.slice->getPic()->getAlfCtbFilterIndex(); + unsigned numAps = cs.slice->getTileGroupNumAps(); + unsigned numAvailableFiltSets = numAps + NUM_FIXED_FILTER_SETS; + uint32_t filtIndex = 0; + if (numAvailableFiltSets > NUM_FIXED_FILTER_SETS) + { + unsigned usePrevFilt = m_BinDecoder.decodeBin(Ctx::AlfUseTemporalFilt()); + if (usePrevFilt) + { + if (numAps > 1) + { + xReadTruncBinCode(filtIndex, numAvailableFiltSets - NUM_FIXED_FILTER_SETS); + } + filtIndex += (unsigned)(NUM_FIXED_FILTER_SETS); + } + else + { + xReadTruncBinCode(filtIndex, NUM_FIXED_FILTER_SETS); + } + } + else + { + xReadTruncBinCode(filtIndex, NUM_FIXED_FILTER_SETS); + } + alfCtbFilterSetIndex[ctuRsAddr] = filtIndex; +} //================================================================================ // clause 7.3.8.3 //-------------------------------------------------------------------------------- @@ -250,22 +288,14 @@ void CABACReader::sao( CodingStructure& cs, unsigned ctuRsAddr ) RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET( STATS__CABAC_BITS__SAO ); -#if HEVC_TILES_WPP - const unsigned curTileIdx = cs.picture->tileMap->getTileIdxMap( pos ); - if( cs.getCURestricted( pos.offset(-(int)cs.pcv->maxCUWidth, 0), curSliceIdx, curTileIdx, CH_L ) ) -#else - if( cs.getCURestricted( pos.offset(-(int)cs.pcv->maxCUWidth, 0), curSliceIdx, CH_L ) ) -#endif + const unsigned curTileIdx = cs.pps->getTileIdx( pos ); + if( cs.getCURestricted( pos.offset(-(int)cs.pcv->maxCUWidth, 0), pos, curSliceIdx, curTileIdx, CH_L ) ) { // sao_merge_left_flag sao_merge_type += int( m_BinDecoder.decodeBin( Ctx::SaoMergeFlag() ) ); } -#if HEVC_TILES_WPP - if( sao_merge_type < 0 && cs.getCURestricted( pos.offset(0, -(int)cs.pcv->maxCUHeight), curSliceIdx, curTileIdx, CH_L ) ) -#else - if( sao_merge_type < 0 && cs.getCURestricted( pos.offset(0, -(int)cs.pcv->maxCUHeight), curSliceIdx, CH_L ) ) -#endif + if( sao_merge_type < 0 && cs.getCURestricted( pos.offset(0, -(int)cs.pcv->maxCUHeight), pos, curSliceIdx, curTileIdx, CH_L ) ) { // sao_merge_above_flag sao_merge_type += int( m_BinDecoder.decodeBin( Ctx::SaoMergeFlag() ) ) << 1; @@ -373,19 +403,19 @@ void CABACReader::sao( CodingStructure& cs, unsigned ctuRsAddr ) //================================================================================ // clause 7.3.8.4 //-------------------------------------------------------------------------------- -// bool coding_tree ( cs, partitioner, cuCtx ) +// void coding_tree ( cs, partitioner, cuCtx ) // bool split_cu_flag ( cs, partitioner ) // split split_cu_mode_mt ( cs, partitioner ) //================================================================================ -bool CABACReader::coding_tree( CodingStructure& cs, Partitioner& partitioner, CUCtx& cuCtx, Partitioner* pPartitionerChroma, CUCtx* pCuCtxChroma) +void CABACReader::coding_tree( CodingStructure& cs, Partitioner& partitioner, CUCtx& cuCtx, Partitioner* pPartitionerChroma, CUCtx* pCuCtxChroma) { const PPS &pps = *cs.pps; const UnitArea &currArea = partitioner.currArea(); - bool lastSegment = false; // Reset delta QP coding flag and ChromaQPAdjustemt coding flag - if( pps.getUseDQP() && partitioner.currQgEnable() ) + //Note: do not reset qg at chroma CU + if( pps.getUseDQP() && partitioner.currQgEnable() && !isChroma(partitioner.chType) ) { cuCtx.qgStart = true; cuCtx.isDQPCoded = false; @@ -408,7 +438,6 @@ bool CABACReader::coding_tree( CodingStructure& cs, Partitioner& partitioner, CU pCuCtxChroma->isChromaQpAdjCoded = false; } } - int startShareThisLevel = 0; const PartSplit splitMode = split_cu_mode( cs, partitioner ); @@ -416,31 +445,6 @@ bool CABACReader::coding_tree( CodingStructure& cs, Partitioner& partitioner, CU if( splitMode != CU_DONT_SPLIT ) { - const PartSplit split = splitMode; - int splitRatio = 1; - CHECK(!(split == CU_QUAD_SPLIT || split == CU_HORZ_SPLIT || split == CU_VERT_SPLIT - || split == CU_TRIH_SPLIT || split == CU_TRIV_SPLIT), "invalid split type"); - splitRatio = (split == CU_HORZ_SPLIT || split == CU_VERT_SPLIT) ? 1 : 2; - - bool isOneChildSmall = (((partitioner.currArea().lwidth())*(partitioner.currArea().lheight())) >> splitRatio) < MRG_SHARELIST_SHARSIZE; - - if ((((partitioner.currArea().lwidth())*(partitioner.currArea().lheight())) > (MRG_SHARELIST_SHARSIZE * 1))) - { - shareStateDec = NO_SHARE; - } - - if (shareStateDec == NO_SHARE)//init state - { - if (isOneChildSmall) - { - shareStateDec = SHARING;//share start state - startShareThisLevel = 1; - - shareParentPos = partitioner.currArea().lumaPos(); - shareParentSize.width = partitioner.currArea().lwidth(); - shareParentSize.height = partitioner.currArea().lheight(); - } - } if (CS::isDualITree(cs) && pPartitionerChroma != nullptr && (partitioner.currArea().lwidth() >= 64 || partitioner.currArea().lheight() >= 64)) { partitioner.splitCurrArea(CU_QUAD_SPLIT, cs); @@ -448,15 +452,14 @@ bool CABACReader::coding_tree( CodingStructure& cs, Partitioner& partitioner, CU bool beContinue = true; bool lumaContinue = true; bool chromaContinue = true; - bool lastSegmentC = false; while (beContinue) { if (partitioner.currArea().lwidth() > 64 || partitioner.currArea().lheight() > 64) { - if (!lastSegmentC && cs.area.blocks[partitioner.chType].contains(partitioner.currArea().blocks[partitioner.chType].pos())) + if (cs.area.blocks[partitioner.chType].contains(partitioner.currArea().blocks[partitioner.chType].pos())) { - lastSegmentC = coding_tree(cs, partitioner, cuCtx, pPartitionerChroma, pCuCtxChroma); + coding_tree(cs, partitioner, cuCtx, pPartitionerChroma, pCuCtxChroma); } lumaContinue = partitioner.nextPart(cs); chromaContinue = pPartitionerChroma->nextPart(cs); @@ -466,18 +469,17 @@ bool CABACReader::coding_tree( CodingStructure& cs, Partitioner& partitioner, CU else { //dual tree coding under 64x64 block - if (!lastSegment && cs.area.blocks[partitioner.chType].contains(partitioner.currArea().blocks[partitioner.chType].pos())) + if (cs.area.blocks[partitioner.chType].contains(partitioner.currArea().blocks[partitioner.chType].pos())) { - lastSegment = coding_tree(cs, partitioner, cuCtx); + coding_tree(cs, partitioner, cuCtx); } lumaContinue = partitioner.nextPart(cs); - if (!lastSegmentC && cs.area.blocks[pPartitionerChroma->chType].contains(pPartitionerChroma->currArea().blocks[pPartitionerChroma->chType].pos())) + if (cs.area.blocks[pPartitionerChroma->chType].contains(pPartitionerChroma->currArea().blocks[pPartitionerChroma->chType].pos())) { - lastSegmentC = coding_tree(cs, *pPartitionerChroma, *pCuCtxChroma); + coding_tree(cs, *pPartitionerChroma, *pCuCtxChroma); } chromaContinue = pPartitionerChroma->nextPart(cs); CHECK(lumaContinue != chromaContinue, "luma chroma partition should be matched"); - CHECK(lastSegment == true, "luma should not be the last segment"); beContinue = lumaContinue; } } @@ -516,33 +518,57 @@ bool CABACReader::coding_tree( CodingStructure& cs, Partitioner& partitioner, CU CodingUnit* chromaFirstCu = cs.getCU(pPartitionerChroma->currArea().chromaPos(), CHANNEL_TYPE_CHROMA); tempLastLumaCu->next = chromaFirstCu; - lastSegment = lastSegmentC; } else { + const ModeType modeTypeParent = partitioner.modeType; + cs.modeType = partitioner.modeType = mode_constraint( cs, partitioner, splitMode ); //change for child nodes + //decide chroma split or not + bool chromaNotSplit = modeTypeParent == MODE_TYPE_ALL && partitioner.modeType == MODE_TYPE_INTRA; + CHECK( chromaNotSplit && partitioner.chType != CHANNEL_TYPE_LUMA, "chType must be luma" ); + if( partitioner.treeType == TREE_D ) + { + cs.treeType = partitioner.treeType = chromaNotSplit ? TREE_L : TREE_D; + } partitioner.splitCurrArea( splitMode, cs ); do { - if( !lastSegment && cs.area.blocks[partitioner.chType].contains( partitioner.currArea().blocks[partitioner.chType].pos() ) ) + if( cs.area.blocks[partitioner.chType].contains( partitioner.currArea().blocks[partitioner.chType].pos() ) ) { - lastSegment = coding_tree( cs, partitioner, cuCtx ); + coding_tree( cs, partitioner, cuCtx ); } } while( partitioner.nextPart( cs ) ); partitioner.exitCurrSplit(); + if( chromaNotSplit ) + { + CHECK( partitioner.chType != CHANNEL_TYPE_LUMA, "must be luma status" ); + partitioner.chType = CHANNEL_TYPE_CHROMA; + cs.treeType = partitioner.treeType = TREE_C; + + if( cs.picture->blocks[partitioner.chType].contains( partitioner.currArea().blocks[partitioner.chType].pos() ) ) + { + coding_tree( cs, partitioner, cuCtx ); + } + + //recover treeType + partitioner.chType = CHANNEL_TYPE_LUMA; + cs.treeType = partitioner.treeType = TREE_D; + } + + //recover ModeType + cs.modeType = partitioner.modeType = modeTypeParent; } - if (startShareThisLevel == 1) - shareStateDec = NO_SHARE; - return lastSegment; + return; } CodingUnit& cu = cs.addCU( CS::getArea( cs, currArea, partitioner.chType ), partitioner.chType ); partitioner.setCUData( cu ); cu.slice = cs.slice; -#if HEVC_TILES_WPP - cu.tileIdx = cs.picture->tileMap->getTileIdxMap( currArea.lumaPos() ); -#endif + cu.tileIdx = cs.pps->getTileIdx( currArea.lumaPos() ); + CHECK( cu.cs->treeType != partitioner.treeType, "treeType mismatch" ); + int lumaQPinLocalDualTree = -1; // Predict QP on start of quantization group if( cuCtx.qgStart ) @@ -551,11 +577,15 @@ bool CABACReader::coding_tree( CodingStructure& cs, Partitioner& partitioner, CU cuCtx.qp = CU::predictQP( cu, cuCtx.qp ); } - if (pps.getUseDQP() && CS::isDualITree(cs) && isChroma(cu.chType)) + if (pps.getUseDQP() && partitioner.isSepTree(cs) && isChroma(cu.chType)) { const Position chromaCentral(cu.chromaPos().offset(cu.chromaSize().width >> 1, cu.chromaSize().height >> 1)); const Position lumaRefPos(chromaCentral.x << getComponentScaleX(COMPONENT_Cb, cu.chromaFormat), chromaCentral.y << getComponentScaleY(COMPONENT_Cb, cu.chromaFormat)); - const CodingUnit* colLumaCu = cs.getCU(lumaRefPos, CHANNEL_TYPE_LUMA); + //derive chroma qp, but the chroma qp is saved in cuCtx.qp which is used for luma qp + //therefore, after decoding the chroma CU, the cuCtx.qp shall be recovered to luma qp in order to decode next luma cu qp + const CodingUnit* colLumaCu = cs.getLumaCU( lumaRefPos ); + CHECK( colLumaCu == nullptr, "colLumaCU shall exist" ); + lumaQPinLocalDualTree = cuCtx.qp; if (colLumaCu) cuCtx.qp = colLumaCu->qp; } @@ -564,15 +594,69 @@ bool CABACReader::coding_tree( CodingStructure& cs, Partitioner& partitioner, CU cu.chromaQpAdj = cs.chromaQpAdj; //NOTE: CU chroma QP adjustment can be changed by adjustment signaling at TU level // coding unit - cu.shareParentPos = (shareStateDec == SHARING) ? shareParentPos : partitioner.currArea().lumaPos(); - cu.shareParentSize = (shareStateDec == SHARING) ? shareParentSize : partitioner.currArea().lumaSize(); - bool isLastCtu = coding_unit( cu, partitioner, cuCtx ); + coding_unit( cu, partitioner, cuCtx ); + //recover cuCtx.qp to luma qp after decoding the chroma CU + if( pps.getUseDQP() && partitioner.isSepTree( cs ) && isChroma( cu.chType ) ) + { + cuCtx.qp = lumaQPinLocalDualTree; + } + uint32_t compBegin; + uint32_t numComp; + bool jointPLT = false; + if (cu.isSepTree()) + { + if (isLuma(partitioner.chType)) + { + compBegin = COMPONENT_Y; + numComp = 1; + } + else + { + compBegin = COMPONENT_Cb; + numComp = 2; + } + } + else + { + compBegin = COMPONENT_Y; + numComp = 3; + jointPLT = true; + } + if (CU::isPLT(cu)) + { + cs.reorderPrevPLT(cs.prevPLT, cu.curPLTSize, cu.curPLT, cu.reuseflag, compBegin, numComp, jointPLT); + } + if( cu.chType == CHANNEL_TYPE_CHROMA ) + { + DTRACE( g_trace_ctx, D_QP, "[chroma CU]x=%d, y=%d, w=%d, h=%d, qp=%d\n", cu.Cb().x, cu.Cb().y, cu.Cb().width, cu.Cb().height, cu.qp ); + } + else + { DTRACE( g_trace_ctx, D_QP, "x=%d, y=%d, w=%d, h=%d, qp=%d\n", cu.Y().x, cu.Y().y, cu.Y().width, cu.Y().height, cu.qp ); - if (startShareThisLevel == 1) - shareStateDec = NO_SHARE; - return isLastCtu; + } +} + +ModeType CABACReader::mode_constraint( CodingStructure& cs, Partitioner &partitioner, PartSplit splitMode ) +{ + int val = cs.signalModeCons( splitMode, partitioner, partitioner.modeType ); + if( val == LDT_MODE_TYPE_SIGNAL ) + { + int ctxIdx = DeriveCtx::CtxModeConsFlag( cs, partitioner ); + RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET_SIZE2( STATS__CABAC_BITS__MODE_CONSTRAINT_FLAG, partitioner.currArea().blocks[partitioner.chType].size(), partitioner.chType ); + bool flag = m_BinDecoder.decodeBin( Ctx::ModeConsFlag( ctxIdx ) ); + DTRACE( g_trace_ctx, D_SYNTAX, "mode_cons_flag() flag=%d\n", flag ); + return flag ? MODE_TYPE_INTRA : MODE_TYPE_INTER; + } + else if( val == LDT_MODE_TYPE_INFER ) + { + return MODE_TYPE_INTRA; + } + else + { + return partitioner.modeType; + } } PartSplit CABACReader::split_cu_mode( CodingStructure& cs, Partitioner &partitioner ) @@ -647,32 +731,25 @@ PartSplit CABACReader::split_cu_mode( CodingStructure& cs, Partitioner &partitio //================================================================================ // clause 7.3.8.5 //-------------------------------------------------------------------------------- -// bool coding_unit ( cu, partitioner, cuCtx ) -// void cu_transquant_bypass_flag ( cu ) +// void coding_unit ( cu, partitioner, cuCtx ) // void cu_skip_flag ( cu ) // void pred_mode ( cu ) // void part_mode ( cu ) -// void pcm_flag ( cu ) -// void pcm_samples ( tu ) // void cu_pred_data ( pus ) // void cu_lic_flag ( cu ) // void intra_luma_pred_modes ( pus ) // void intra_chroma_pred_mode ( pu ) // void cu_residual ( cu, partitioner, cuCtx ) // void rqt_root_cbf ( cu ) -// bool end_of_ctu ( cu, cuCtx ) +// void end_of_ctu ( cu, cuCtx ) //================================================================================ -bool CABACReader::coding_unit( CodingUnit &cu, Partitioner &partitioner, CUCtx& cuCtx ) +void CABACReader::coding_unit( CodingUnit &cu, Partitioner &partitioner, CUCtx& cuCtx ) { CodingStructure& cs = *cu.cs; - cs.chType = partitioner.chType; - // transquant bypass flag - if( cs.pps->getTransquantBypassEnabledFlag() ) - { - cu_transquant_bypass_flag( cu ); - } - + CHECK( cu.treeType != partitioner.treeType || cu.modeType != partitioner.modeType, "treeType or modeType mismatch" ); + DTRACE( g_trace_ctx, D_SYNTAX, "coding_unit() treeType=%d modeType=%d\n", cu.treeType, cu.modeType ); + PredictionUnit& pu = cs.addPU(cu, partitioner.chType); // skip flag if ((!cs.slice->isIntra() || cs.slice->getSPS()->getIBCFlag()) && cu.Y().valid()) { @@ -682,36 +759,47 @@ bool CABACReader::coding_unit( CodingUnit &cu, Partitioner &partitioner, CUCtx& // skip data if( cu.skip ) { + cu.colorTransform = false; cs.addTU ( cu, partitioner.chType ); - PredictionUnit& pu = cs.addPU( cu, partitioner.chType ); - pu.shareParentPos = cu.shareParentPos; - pu.shareParentSize = cu.shareParentSize; MergeCtx mrgCtx; prediction_unit ( pu, mrgCtx ); - return end_of_ctu( cu, cuCtx ); + end_of_ctu( cu, cuCtx ); + return; } // prediction mode and partitioning data pred_mode ( cu ); - - // --> create PUs - CU::addPUs( cu ); - - // pcm samples - if( CU::isIntra(cu) ) + if (CU::isIntra(cu)) + { + adaptive_color_transform(cu); + } + if (CU::isPLT(cu)) { - pcm_flag( cu, partitioner ); - if( cu.ipcm ) + cu.colorTransform = false; + cs.addTU(cu, partitioner.chType); + if (cu.isSepTree()) { - TransformUnit& tu = cs.addTU( cu, partitioner.chType ); - pcm_samples( tu ); - return end_of_ctu( cu, cuCtx ); + if (isLuma(partitioner.chType)) + { + cu_palette_info(cu, COMPONENT_Y, 1, cuCtx); + } + if (cu.chromaFormat != CHROMA_400 && (partitioner.chType == CHANNEL_TYPE_CHROMA)) + { + cu_palette_info(cu, COMPONENT_Cb, 2, cuCtx); + } } + else + { + cu_palette_info(cu, COMPONENT_Y, 3, cuCtx); + } + end_of_ctu(cu, cuCtx); + return; } + bdpcm_mode( cu, ComponentID( partitioner.chType ) ); + if (!CS::isDualITree(*cu.cs) && isLuma(partitioner.chType)) + bdpcm_mode(cu, ComponentID(CHANNEL_TYPE_CHROMA)); - extend_ref_line( cu ); - - isp_mode( cu ); + // --> create PUs // prediction data ( intra prediction modes / reference indexes + motion vectors ) cu_pred_data( cu ); @@ -720,28 +808,21 @@ bool CABACReader::coding_unit( CodingUnit &cu, Partitioner &partitioner, CUCtx& cu_residual( cu, partitioner, cuCtx ); // check end of cu - return end_of_ctu( cu, cuCtx ); -} - - -void CABACReader::cu_transquant_bypass_flag( CodingUnit& cu ) -{ - RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET( STATS__CABAC_BITS__TQ_BYPASS_FLAG ); - - cu.transQuantBypass = ( m_BinDecoder.decodeBin( Ctx::TransquantBypassFlag() ) ); + end_of_ctu( cu, cuCtx ); } - void CABACReader::cu_skip_flag( CodingUnit& cu ) { RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET( STATS__CABAC_BITS__SKIP_FLAG ); - if (cu.slice->isIntra() && cu.cs->slice->getSPS()->getIBCFlag()) + if ((cu.slice->isIntra() || cu.isConsIntra()) && cu.cs->slice->getSPS()->getIBCFlag()) { cu.skip = false; cu.rootCbf = false; cu.predMode = MODE_INTRA; cu.mmvdSkip = false; + if (cu.lwidth() < 128 && cu.lheight() < 128) // disable IBC mode larger than 64x64 + { unsigned ctxId = DeriveCtx::CtxSkipFlag(cu); unsigned skip = m_BinDecoder.decodeBin(Ctx::SkipFlag(ctxId)); DTRACE( g_trace_ctx, D_SYNTAX, "cu_skip_flag() ctx=%d skip=%d\n", ctxId, skip ? 1 : 0 ); @@ -752,10 +833,17 @@ void CABACReader::cu_skip_flag( CodingUnit& cu ) cu.predMode = MODE_IBC; cu.mmvdSkip = false; } - + } + return; + } + if ( !cu.cs->slice->getSPS()->getIBCFlag() && cu.lwidth() == 4 && cu.lheight() == 4 ) + { + return; + } + if( !cu.cs->slice->getSPS()->getIBCFlag() && cu.isConsIntra() ) + { return; } - unsigned ctxId = DeriveCtx::CtxSkipFlag(cu); unsigned skip = m_BinDecoder.decodeBin( Ctx::SkipFlag(ctxId) ); @@ -763,6 +851,16 @@ void CABACReader::cu_skip_flag( CodingUnit& cu ) if (skip && cu.cs->slice->getSPS()->getIBCFlag()) { + if (cu.lwidth() < 128 && cu.lheight() < 128 && !cu.isConsInter()) // disable IBC mode larger than 64x64 and disable IBC when only allowing inter mode + { + if ( cu.lwidth() == 4 && cu.lheight() == 4 ) + { + cu.skip = true; + cu.rootCbf = false; + cu.predMode = MODE_IBC; + cu.mmvdSkip = false; + return; + } unsigned ctxidx = DeriveCtx::CtxIBCFlag(cu); if (m_BinDecoder.decodeBin(Ctx::IBCFlag(ctxidx))) { @@ -770,23 +868,22 @@ void CABACReader::cu_skip_flag( CodingUnit& cu ) cu.rootCbf = false; cu.predMode = MODE_IBC; cu.mmvdSkip = false; + cu.firstPU->regularMergeFlag = false; } else { cu.predMode = MODE_INTER; } DTRACE(g_trace_ctx, D_SYNTAX, "ibc() ctx=%d cu.predMode=%d\n", ctxidx, cu.predMode); + } + else + { + cu.predMode = MODE_INTER; + } } if ((skip && CU::isInter(cu) && cu.cs->slice->getSPS()->getIBCFlag()) || (skip && !cu.cs->slice->getSPS()->getIBCFlag())) { -#if JVET_MMVD_OFF_MACRO - cu.mmvdSkip = false; -#else - unsigned mmvdSkip = m_BinDecoder.decodeBin(Ctx::MmvdFlag(0)); - cu.mmvdSkip = mmvdSkip; - DTRACE(g_trace_ctx, D_SYNTAX, "mmvd_cu_skip_flag() ctx=%d mmvd_skip=%d\n", 0, mmvdSkip ? 1 : 0); -#endif cu.skip = true; cu.rootCbf = false; cu.predMode = MODE_INTER; @@ -816,21 +913,30 @@ void CABACReader::imv_mode( CodingUnit& cu, MergeCtx& mrgCtx ) const SPS *sps = cu.cs->sps; unsigned value = 0; - unsigned ctxId = DeriveCtx::CtxIMVFlag( cu ); if (CU::isIBC(cu)) value = 1; else - value = m_BinDecoder.decodeBin( Ctx::ImvFlag( ctxId ) ); - DTRACE( g_trace_ctx, D_SYNTAX, "imv_mode() value=%d ctx=%d\n", value, ctxId ); + value = m_BinDecoder.decodeBin( Ctx::ImvFlag( 0 ) ); + DTRACE( g_trace_ctx, D_SYNTAX, "imv_mode() value=%d ctx=%d\n", value, 0 ); + cu.imv = value; if( sps->getAMVREnabledFlag() && value ) { - value = m_BinDecoder.decodeBin( Ctx::ImvFlag( 3 ) ); - DTRACE( g_trace_ctx, D_SYNTAX, "imv_mode() value=%d ctx=%d\n", value, 3 ); + if (!CU::isIBC(cu)) + { + value = m_BinDecoder.decodeBin(Ctx::ImvFlag(4)); + DTRACE(g_trace_ctx, D_SYNTAX, "imv_mode() value=%d ctx=%d\n", value, 4); + cu.imv = value ? 1 : IMV_HPEL; + } + if (value) + { + value = m_BinDecoder.decodeBin( Ctx::ImvFlag( 1 ) ); + DTRACE( g_trace_ctx, D_SYNTAX, "imv_mode() value=%d ctx=%d\n", value, 1 ); value++; + cu.imv = value; + } } - cu.imv = value; DTRACE( g_trace_ctx, D_SYNTAX, "imv_mode() IMVFlag=%d\n", cu.imv ); } @@ -851,13 +957,13 @@ void CABACReader::affine_amvr_mode( CodingUnit& cu, MergeCtx& mrgCtx ) } unsigned value = 0; - value = m_BinDecoder.decodeBin( Ctx::ImvFlag( 4 ) ); - DTRACE( g_trace_ctx, D_SYNTAX, "affine_amvr_mode() value=%d ctx=%d\n", value, 4 ); + value = m_BinDecoder.decodeBin( Ctx::ImvFlag( 2 ) ); + DTRACE( g_trace_ctx, D_SYNTAX, "affine_amvr_mode() value=%d ctx=%d\n", value, 2 ); if( value ) { - value = m_BinDecoder.decodeBin( Ctx::ImvFlag( 5 ) ); - DTRACE( g_trace_ctx, D_SYNTAX, "affine_amvr_mode() value=%d ctx=%d\n", value, 5 ); + value = m_BinDecoder.decodeBin( Ctx::ImvFlag( 3 ) ); + DTRACE( g_trace_ctx, D_SYNTAX, "affine_amvr_mode() value=%d ctx=%d\n", value, 3 ); value++; } @@ -868,60 +974,135 @@ void CABACReader::affine_amvr_mode( CodingUnit& cu, MergeCtx& mrgCtx ) void CABACReader::pred_mode( CodingUnit& cu ) { RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET( STATS__CABAC_BITS__PRED_MODE ); - - if (cu.cs->slice->getSPS()->getIBCFlag()) + if (cu.cs->slice->getSPS()->getIBCFlag() && cu.chType != CHANNEL_TYPE_CHROMA) { - if (cu.cs->slice->isIntra()) + if( cu.isConsInter() ) + { + cu.predMode = MODE_INTER; + return; + } + + if ( cu.cs->slice->isIntra() || ( cu.lwidth() == 4 && cu.lheight() == 4 ) || cu.isConsIntra() ) { cu.predMode = MODE_INTRA; + if (cu.lwidth() < 128 && cu.lheight() < 128) // disable IBC mode larger than 64x64 + { unsigned ctxidx = DeriveCtx::CtxIBCFlag(cu); if (m_BinDecoder.decodeBin(Ctx::IBCFlag(ctxidx))) { cu.predMode = MODE_IBC; } + } + if (!CU::isIBC(cu) && cu.cs->slice->getSPS()->getPLTMode() && cu.lwidth() <= 64 && cu.lheight() <= 64) + { + if (m_BinDecoder.decodeBin(Ctx::PLTFlag(0))) + { + cu.predMode = MODE_PLT; + } + } } else { if (m_BinDecoder.decodeBin(Ctx::PredMode(DeriveCtx::CtxPredModeFlag(cu)))) { cu.predMode = MODE_INTRA; + if (cu.cs->slice->getSPS()->getPLTMode() && cu.lwidth() <= 64 && cu.lheight() <= 64) + { + if (m_BinDecoder.decodeBin(Ctx::PLTFlag(0))) + { + cu.predMode = MODE_PLT; + } + } } else { cu.predMode = MODE_INTER; + if (cu.lwidth() < 128 && cu.lheight() < 128) // disable IBC mode larger than 64x64 + { unsigned ctxidx = DeriveCtx::CtxIBCFlag(cu); if (m_BinDecoder.decodeBin(Ctx::IBCFlag(ctxidx))) { cu.predMode = MODE_IBC; } + } } } } else { - if (cu.cs->slice->isIntra() || m_BinDecoder.decodeBin(Ctx::PredMode(DeriveCtx::CtxPredModeFlag(cu)))) + if( cu.isConsInter() ) + { + cu.predMode = MODE_INTER; + return; + } + + if ( cu.cs->slice->isIntra() || (cu.lwidth() == 4 && cu.lheight() == 4) || cu.isConsIntra() ) { cu.predMode = MODE_INTRA; + if (cu.cs->slice->getSPS()->getPLTMode() && cu.lwidth() <= 64 && cu.lheight() <= 64) + { + if (m_BinDecoder.decodeBin(Ctx::PLTFlag(0))) + { + cu.predMode = MODE_PLT; + } + } } else { - cu.predMode = MODE_INTER; + cu.predMode = m_BinDecoder.decodeBin(Ctx::PredMode(DeriveCtx::CtxPredModeFlag(cu))) ? MODE_INTRA : MODE_INTER; + if (CU::isIntra(cu) && cu.cs->slice->getSPS()->getPLTMode() && cu.lwidth() <= 64 && cu.lheight() <= 64) + { + if (m_BinDecoder.decodeBin(Ctx::PLTFlag(0))) + { + cu.predMode = MODE_PLT; + } + } } } } - -void CABACReader::pcm_flag( CodingUnit& cu, Partitioner &partitioner ) +void CABACReader::bdpcm_mode( CodingUnit& cu, const ComponentID compID ) { - const SPS& sps = *cu.cs->sps; - if( !sps.getPCMEnabledFlag() || partitioner.currArea().lwidth() > (1 << sps.getPCMLog2MaxSize()) || partitioner.currArea().lwidth() < (1 << sps.getPCMLog2MinSize()) - || partitioner.currArea().lheight() > (1 << sps.getPCMLog2MaxSize()) || partitioner.currArea().lheight() < (1 << sps.getPCMLog2MinSize()) ) + + if (!CU::bdpcmAllowed(cu, compID)) { - cu.ipcm = false; - return; + if (isLuma(compID)) + { + cu.bdpcmMode = 0; + if (!CS::isDualITree(*cu.cs)) + cu.bdpcmModeChroma = 0; + } + else + { + cu.bdpcmModeChroma = 0; + } + return; } - cu.ipcm = ( m_BinDecoder.decodeBinTrm() ); -} + RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET_SIZE2( STATS__CABAC_BITS__BDPCM_MODE, cu.block(compID).lumaSize(), compID ); + + int bdpcmMode; + bdpcmMode = m_BinDecoder.decodeBin(Ctx::BDPCMMode(0)); + if (bdpcmMode) + { + bdpcmMode += m_BinDecoder.decodeBin(Ctx::BDPCMMode(1)); + } + if (isLuma(compID)) + { + cu.bdpcmMode = bdpcmMode; + } + else + { + cu.bdpcmModeChroma = bdpcmMode; + } + if (isLuma(compID)) + { + DTRACE(g_trace_ctx, D_SYNTAX, "bdpcm_mode(%d) x=%d, y=%d, w=%d, h=%d, bdpcm=%d\n", CHANNEL_TYPE_LUMA, cu.lumaPos().x, cu.lumaPos().y, cu.lwidth(), cu.lheight(), cu.bdpcmMode); + } + else + { + DTRACE(g_trace_ctx, D_SYNTAX, "bdpcm_mode(%d) x=%d, y=%d, w=%d, h=%d, bdpcm=%d\n", CHANNEL_TYPE_CHROMA, cu.chromaPos().x, cu.chromaPos().y, cu.chromaSize().width, cu.chromaSize().height, cu.bdpcmModeChroma); + } +} void CABACReader::cu_pred_data( CodingUnit &cu ) { @@ -940,59 +1121,53 @@ void CABACReader::cu_pred_data( CodingUnit &cu ) for( auto &pu : CU::traversePUs( cu ) ) { - pu.shareParentPos = cu.shareParentPos; - pu.shareParentSize = cu.shareParentSize; prediction_unit( pu, mrgCtx ); } imv_mode ( cu, mrgCtx ); affine_amvr_mode( cu, mrgCtx ); - cu_gbi_flag( cu ); + cu_bcw_flag( cu ); } -void CABACReader::cu_gbi_flag(CodingUnit& cu) +void CABACReader::cu_bcw_flag(CodingUnit& cu) { - if(!CU::isGBiIdxCoded(cu)) + if(!CU::isBcwIdxCoded(cu)) { return; } - CHECK(!(GBI_NUM > 1 && (GBI_NUM == 2 || (GBI_NUM & 0x01) == 1)), " !( GBI_NUM > 1 && ( GBI_NUM == 2 || ( GBI_NUM & 0x01 ) == 1 ) ) "); + CHECK(!(BCW_NUM > 1 && (BCW_NUM == 2 || (BCW_NUM & 0x01) == 1)), " !( BCW_NUM > 1 && ( BCW_NUM == 2 || ( BCW_NUM & 0x01 ) == 1 ) ) "); - RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET(STATS__CABAC_BITS__GBI_IDX); + RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET(STATS__CABAC_BITS__BCW_IDX); uint32_t idx = 0; - uint32_t symbol = m_BinDecoder.decodeBin(Ctx::GBiIdx(0)); - - int32_t numGBi = (cu.slice->getCheckLDC()) ? 5 : 3; + uint32_t symbol = m_BinDecoder.decodeBin(Ctx::BcwIdx(0)); - if(symbol == 0) + int32_t numBcw = (cu.slice->getCheckLDC()) ? 5 : 3; + if(symbol == 1) { - uint32_t prefixNumBits = numGBi - 2; + uint32_t prefixNumBits = numBcw - 2; uint32_t step = 1; - unsigned ctxIdGBi = 4; idx = 1; for(int ui = 0; ui < prefixNumBits; ++ui) { - symbol = m_BinDecoder.decodeBin(Ctx::GBiIdx(ctxIdGBi)); - - if (symbol == 1) + symbol = m_BinDecoder.decodeBinEP(); + if (symbol == 0) { break; } - ctxIdGBi += step; idx += step; } } - uint8_t gbiIdx = (uint8_t)g_GbiParsingOrder[idx]; - CU::setGbiIdx(cu, gbiIdx); + uint8_t bcwIdx = (uint8_t)g_BcwParsingOrder[idx]; + CU::setBcwIdx(cu, bcwIdx); - DTRACE(g_trace_ctx, D_SYNTAX, "cu_gbi_flag() gbi_idx=%d\n", cu.GBiIdx ? 1 : 0); + DTRACE(g_trace_ctx, D_SYNTAX, "cu_bcw_flag() bcw_idx=%d\n", cu.BcwIdx ? 1 : 0); } void CABACReader::xReadTruncBinCode(uint32_t& symbol, uint32_t maxSymbol) @@ -1029,11 +1204,7 @@ void CABACReader::xReadTruncBinCode(uint32_t& symbol, uint32_t maxSymbol) void CABACReader::extend_ref_line(CodingUnit& cu) { -#if !ENABLE_JVET_L0283_MRL - return; -#endif - - if (!cu.Y().valid() || cu.predMode != MODE_INTRA || !isLuma(cu.chType) || cu.ipcm) + if ( !cu.Y().valid() || cu.predMode != MODE_INTRA || !isLuma(cu.chType) || cu.bdpcmMode ) { cu.firstPU->multiRefIdx = 0; return; @@ -1045,6 +1216,12 @@ void CABACReader::extend_ref_line(CodingUnit& cu) for (int k = 0; k < numBlocks; k++) { + if( !cu.cs->sps->getUseMRL() ) + { + pu->multiRefIdx = 0; + pu = pu->next; + continue; + } bool isFirstLineOfCtu = (((cu.block(COMPONENT_Y).y)&((cu.cs->sps)->getMaxCUWidth() - 1)) == 0); if (isFirstLineOfCtu) { @@ -1059,10 +1236,6 @@ void CABACReader::extend_ref_line(CodingUnit& cu) if (MRL_NUM_REF_LINES > 2 && multiRefIdx != MULTI_REF_LINE_IDX[0]) { multiRefIdx = m_BinDecoder.decodeBin(Ctx::MultiRefLineIdx(1)) == 1 ? MULTI_REF_LINE_IDX[2] : MULTI_REF_LINE_IDX[1]; - if (MRL_NUM_REF_LINES > 3 && multiRefIdx != MULTI_REF_LINE_IDX[1]) - { - multiRefIdx = m_BinDecoder.decodeBin(Ctx::MultiRefLineIdx(2)) == 1 ? MULTI_REF_LINE_IDX[3] : MULTI_REF_LINE_IDX[2]; - } } } @@ -1078,6 +1251,21 @@ void CABACReader::intra_luma_pred_modes( CodingUnit &cu ) return; } + if( cu.bdpcmMode ) + { + cu.firstPU->intraDir[0] = cu.bdpcmMode == 2? VER_IDX : HOR_IDX; + return; + } + + mip_flag(cu); + if (cu.mipFlag) + { + mip_pred_modes(cu); + return; + } + extend_ref_line( cu ); + isp_mode( cu ); + RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET_SIZE2( STATS__CABAC_BITS__INTRA_DIR_ANG, cu.lumaSize(), CHANNEL_TYPE_LUMA ); // prev_intra_luma_pred_flag @@ -1086,7 +1274,7 @@ void CABACReader::intra_luma_pred_modes( CodingUnit &cu ) for( int k = 0; k < numBlocks; k++ ) { CHECK(numBlocks != 1, "not supported yet"); - if( cu.firstPU->multiRefIdx || ( cu.ispMode && isLuma( cu.chType ) ) ) + if ( cu.firstPU->multiRefIdx ) { mpmFlag[0] = true; } @@ -1107,7 +1295,11 @@ void CABACReader::intra_luma_pred_modes( CodingUnit &cu ) { uint32_t ipred_idx = 0; { - ipred_idx = m_BinDecoder.decodeBinEP(); + unsigned ctx = (pu->cu->ispMode == NOT_INTRA_SUBPARTITIONS ? 1 : 0); + if (pu->multiRefIdx == 0) + ipred_idx = m_BinDecoder.decodeBin(Ctx::IntraLumaPlanarFlag(ctx)); + else + ipred_idx = 1; if( ipred_idx ) { ipred_idx += m_BinDecoder.decodeBinEP(); @@ -1152,7 +1344,7 @@ void CABACReader::intra_luma_pred_modes( CodingUnit &cu ) void CABACReader::intra_chroma_pred_modes( CodingUnit& cu ) { - if( cu.chromaFormat == CHROMA_400 || ( CS::isDualITree( *cu.cs ) && cu.chType == CHANNEL_TYPE_LUMA ) ) + if( cu.chromaFormat == CHROMA_400 || ( cu.isSepTree() && cu.chType == CHANNEL_TYPE_LUMA ) ) { return; } @@ -1164,50 +1356,72 @@ void CABACReader::intra_chroma_pred_modes( CodingUnit& cu ) intra_chroma_pred_mode( *pu ); } } - -bool CABACReader::intra_chroma_lmc_mode( PredictionUnit& pu ) +bool CABACReader::intra_chroma_lmc_mode(PredictionUnit& pu) { int lmModeList[10]; - int maxSymbol = PU::getLMSymbolList(pu, lmModeList); - int symbol = unary_max_symbol(Ctx::IntraChromaPredMode(1), Ctx::IntraChromaPredMode(2), maxSymbol - 1); - if (lmModeList[symbol] != -1) + PU::getLMSymbolList(pu, lmModeList); + + int symbol = m_BinDecoder.decodeBin(Ctx::CclmModeIdx(0)); + + if (symbol == 0) { pu.intraDir[1] = lmModeList[symbol]; - return true; + CHECK(pu.intraDir[1] != LM_CHROMA_IDX, "should be LM_CHROMA"); } - return false; + else + { + symbol += m_BinDecoder.decodeBinEP(); + pu.intraDir[1] = lmModeList[symbol]; + } + return true; //it will only enter this function for LMC modes, so always return true ; } -void CABACReader::intra_chroma_pred_mode( PredictionUnit& pu ) +void CABACReader::intra_chroma_pred_mode(PredictionUnit& pu) { - RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET_SIZE2( STATS__CABAC_BITS__INTRA_DIR_ANG, pu.cu->blocks[pu.chType].lumaSize(), CHANNEL_TYPE_CHROMA ); - - if (m_BinDecoder.decodeBin(Ctx::IntraChromaPredMode(0)) == 0) + RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET_SIZE2(STATS__CABAC_BITS__INTRA_DIR_ANG, pu.cu->blocks[pu.chType].lumaSize(), CHANNEL_TYPE_CHROMA); + if (pu.cu->colorTransform) { - pu.intraDir[1] = DM_CHROMA_IDX; + pu.intraDir[CHANNEL_TYPE_CHROMA] = DM_CHROMA_IDX; return; } // LM chroma mode - if( pu.cs->sps->getUseLMChroma() ) + + if (pu.cu->bdpcmModeChroma) + { + unsigned chromaCandModes[NUM_CHROMA_MODE]; + PU::getIntraChromaCandModes(pu, chromaCandModes); + pu.intraDir[1] = chromaCandModes[0]; + return; + } + + if (pu.cs->sps->getUseLMChroma() && pu.cu->checkCCLMAllowed()) { - if( intra_chroma_lmc_mode( pu ) ) + bool isLMCMode = m_BinDecoder.decodeBin(Ctx::CclmModeFlag(0)) ? true : false; + if (isLMCMode) { + intra_chroma_lmc_mode(pu); return; } } - unsigned candId = m_BinDecoder.decodeBinsEP( 2 ); - unsigned chromaCandModes[ NUM_CHROMA_MODE ]; - PU::getIntraChromaCandModes( pu, chromaCandModes ); + if (m_BinDecoder.decodeBin(Ctx::IntraChromaPredMode(0)) == 0) + { + pu.intraDir[1] = DM_CHROMA_IDX; + return; + } + + unsigned candId = m_BinDecoder.decodeBinsEP(2); - CHECK( candId >= NUM_CHROMA_MODE, "Chroma prediction mode index out of bounds" ); - CHECK( PU::isLMCMode( chromaCandModes[ candId ] ), "The intra dir cannot be LM_CHROMA for this path" ); - CHECK( chromaCandModes[ candId ] == DM_CHROMA_IDX, "The intra dir cannot be DM_CHROMA for this path" ); + unsigned chromaCandModes[NUM_CHROMA_MODE]; + PU::getIntraChromaCandModes(pu, chromaCandModes); - pu.intraDir[1] = chromaCandModes[ candId ]; -} + CHECK(candId >= NUM_CHROMA_MODE, "Chroma prediction mode index out of bounds"); + CHECK(PU::isLMCMode(chromaCandModes[candId]), "The intra dir cannot be LM_CHROMA for this path"); + CHECK(chromaCandModes[candId] == DM_CHROMA_IDX, "The intra dir cannot be DM_CHROMA for this path"); + pu.intraDir[1] = chromaCandModes[candId]; +} void CABACReader::cu_residual( CodingUnit& cu, Partitioner &partitioner, CUCtx& cuCtx ) { if (!CU::isIntra(cu)) @@ -1227,6 +1441,7 @@ void CABACReader::cu_residual( CodingUnit& cu, Partitioner &partitioner, CUCtx& } if( !cu.rootCbf ) { + cu.colorTransform = false; TransformUnit& tu = cu.cs->addTU(cu, partitioner.chType); tu.depth = 0; for( unsigned c = 0; c < tu.blocks.size(); c++ ) @@ -1240,16 +1455,29 @@ void CABACReader::cu_residual( CodingUnit& cu, Partitioner &partitioner, CUCtx& } } + if (CU::isInter(cu) || CU::isIBC(cu)) + { + adaptive_color_transform(cu); + } + + cuCtx.violatesLfnstConstrained[CHANNEL_TYPE_LUMA] = false; + cuCtx.violatesLfnstConstrained[CHANNEL_TYPE_CHROMA] = false; + cuCtx.lfnstLastScanPos = false; + cuCtx.violatesMtsCoeffConstraint = false; + ChromaCbfs chromaCbfs; if( cu.ispMode && isLuma( partitioner.chType ) ) { TUIntraSubPartitioner subTuPartitioner( partitioner ); - transform_tree( *cu.cs, subTuPartitioner, cuCtx, chromaCbfs, CU::getISPType( cu, getFirstComponentOfChannel( partitioner.chType ) ), 0 ); + transform_tree( *cu.cs, subTuPartitioner, cuCtx, CU::getISPType(cu, getFirstComponentOfChannel(partitioner.chType)), 0 ); } else { - transform_tree( *cu.cs, partitioner, cuCtx, chromaCbfs ); + transform_tree( *cu.cs, partitioner, cuCtx ); } + + residual_lfnst_mode( cu, cuCtx ); + mts_idx ( cu, cuCtx ); } void CABACReader::rqt_root_cbf( CodingUnit& cu ) @@ -1261,6 +1489,24 @@ void CABACReader::rqt_root_cbf( CodingUnit& cu ) DTRACE( g_trace_ctx, D_SYNTAX, "rqt_root_cbf() ctx=0 root_cbf=%d pos=(%d,%d)\n", cu.rootCbf ? 1 : 0, cu.lumaPos().x, cu.lumaPos().y ); } +void CABACReader::adaptive_color_transform(CodingUnit& cu) +{ + if (!cu.slice->getSPS()->getUseColorTrans()) + { + return; + } + + if (cu.isSepTree()) + { + return; + } + + if (CU::isInter(cu) || CU::isIBC(cu) || CU::isIntra(cu)) + { + cu.colorTransform = (m_BinDecoder.decodeBin(Ctx::ACTFlag())); + } +} + void CABACReader::sbt_mode( CodingUnit& cu ) { const uint8_t sbtAllowed = cu.checkAllowedSbt(); @@ -1318,27 +1564,338 @@ void CABACReader::sbt_mode( CodingUnit& cu ) } -bool CABACReader::end_of_ctu( CodingUnit& cu, CUCtx& cuCtx ) +void CABACReader::end_of_ctu( CodingUnit& cu, CUCtx& cuCtx ) { - const SPS &sps = *cu.cs->sps; const Position rbPos = recalcPosition( cu.chromaFormat, cu.chType, CHANNEL_TYPE_LUMA, cu.blocks[cu.chType].bottomRight().offset( 1, 1 ) ); - if ( ( ( rbPos.x & cu.cs->pcv->maxCUWidthMask ) == 0 || rbPos.x == sps.getPicWidthInLumaSamples () ) - && ( ( rbPos.y & cu.cs->pcv->maxCUHeightMask ) == 0 || rbPos.y == sps.getPicHeightInLumaSamples() ) - && ( !CS::isDualITree( *cu.cs ) || cu.chromaFormat == CHROMA_400 || isChroma( cu.chType ) ) + if( ( ( rbPos.x & cu.cs->pcv->maxCUWidthMask ) == 0 || rbPos.x == cu.cs->pps->getPicWidthInLumaSamples() ) + && ( ( rbPos.y & cu.cs->pcv->maxCUHeightMask ) == 0 || rbPos.y == cu.cs->pps->getPicHeightInLumaSamples() ) + && ( !cu.isSepTree() || cu.chromaFormat == CHROMA_400 || isChroma( cu.chType ) ) ) { cuCtx.isDQPCoded = ( cu.cs->pps->getUseDQP() && !cuCtx.isDQPCoded ); - return terminating_bit(); } - - return false; } -//================================================================================ -// clause 7.3.8.6 -//-------------------------------------------------------------------------------- +void CABACReader::cu_palette_info(CodingUnit& cu, ComponentID compBegin, uint32_t numComp, CUCtx& cuCtx) +{ + const SPS& sps = *(cu.cs->sps); + TransformUnit& tu = *cu.firstTU; + int curPLTidx = 0; + + cu.lastPLTSize[compBegin] = cu.cs->prevPLT.curPLTSize[compBegin]; + + if (cu.lastPLTSize[compBegin]) + { + xDecodePLTPredIndicator(cu, MAXPLTSIZE, compBegin); + } + + for (int idx = 0; idx < cu.lastPLTSize[compBegin]; idx++) + { + if (cu.reuseflag[compBegin][idx]) + { + for (int comp = compBegin; comp < (compBegin + numComp); comp++) + { + cu.curPLT[comp][curPLTidx] = cu.cs->prevPLT.curPLT[comp][idx]; + } + curPLTidx++; + } + } + + uint32_t recievedPLTnum = 0; + + if (curPLTidx < MAXPLTSIZE) + { + recievedPLTnum = exp_golomb_eqprob(0); + } + + cu.curPLTSize[compBegin] = curPLTidx + recievedPLTnum; + for (int comp = compBegin; comp < (compBegin + numComp); comp++) + { + for (int idx = curPLTidx; idx < cu.curPLTSize[compBegin]; idx++) + { + ComponentID compID = (ComponentID)comp; + const int channelBitDepth = sps.getBitDepth(toChannelType(compID)); + cu.curPLT[compID][idx] = m_BinDecoder.decodeBinsEP(channelBitDepth); + } + } + cu.useEscape[compBegin] = true; + if (cu.curPLTSize[compBegin] > 0) + { + uint32_t escCode = 0; + escCode = m_BinDecoder.decodeBinEP(); + cu.useEscape[compBegin] = (escCode != 0); + } + uint32_t indexMaxSize = cu.useEscape[compBegin] ? (cu.curPLTSize[compBegin] + 1) : cu.curPLTSize[compBegin]; + //encode index map + uint32_t height = cu.block(compBegin).height; + uint32_t width = cu.block(compBegin).width; + + uint32_t total = height * width; + if (indexMaxSize > 1) + parseScanRotationModeFlag(cu, compBegin); + else + cu.useRotation[compBegin] = false; + + if (cu.useEscape[compBegin] && cu.cs->pps->getUseDQP() && !cuCtx.isDQPCoded) + { + if (!cu.isSepTree() || isLuma(tu.chType)) + { + cu_qp_delta(cu, cuCtx.qp, cu.qp); + cuCtx.qp = cu.qp; + cuCtx.isDQPCoded = true; + } + } + if (cu.useEscape[compBegin] && cu.cs->slice->getUseChromaQpAdj() && !cuCtx.isChromaQpAdjCoded) + { + if (!cu.isSepTree() || isChroma(tu.chType)) + { + cu_chroma_qp_offset(cu); + cuCtx.isChromaQpAdjCoded = true; + } + } + + m_scanOrder = g_scanOrder[SCAN_UNGROUPED][(cu.useRotation[compBegin]) ? SCAN_TRAV_VER : SCAN_TRAV_HOR][gp_sizeIdxInfo->idxFrom(width)][gp_sizeIdxInfo->idxFrom(height)]; + uint32_t prevRunPos = 0; + unsigned prevRunType = 0; + for (int subSetId = 0; subSetId <= (total - 1) >> LOG2_PALETTE_CG_SIZE; subSetId++) + { + cuPaletteSubblockInfo(cu, compBegin, numComp, subSetId, prevRunPos, prevRunType); + } +} +void CABACReader::cuPaletteSubblockInfo(CodingUnit& cu, ComponentID compBegin, uint32_t numComp, int subSetId, uint32_t& prevRunPos, unsigned& prevRunType) +{ + const SPS& sps = *(cu.cs->sps); + TransformUnit& tu = *cu.firstTU; + PLTtypeBuf runType = tu.getrunType(compBegin); + PelBuf curPLTIdx = tu.getcurPLTIdx(compBegin); + uint32_t indexMaxSize = cu.useEscape[compBegin] ? (cu.curPLTSize[compBegin] + 1) : cu.curPLTSize[compBegin]; + uint32_t totalPel = cu.block(compBegin).height*cu.block(compBegin).width; + + int minSubPos = subSetId << LOG2_PALETTE_CG_SIZE; + int maxSubPos = minSubPos + (1 << LOG2_PALETTE_CG_SIZE); + maxSubPos = (maxSubPos > totalPel) ? totalPel : maxSubPos; // if last position is out of the current CU size + + unsigned runCopyFlag[(1 << LOG2_PALETTE_CG_SIZE)]; + for (int i = 0; i < (1 << LOG2_PALETTE_CG_SIZE); i++) + runCopyFlag[i] = MAX_INT; + if (minSubPos == 0) + runCopyFlag[0] = 0; + +// PLT runCopy flag and runType - context coded + int curPos = minSubPos; + for (; curPos < maxSubPos && indexMaxSize > 1; curPos++) + { + uint32_t posy = m_scanOrder[curPos].y; + uint32_t posx = m_scanOrder[curPos].x; + uint32_t posyprev = (curPos == 0) ? 0 : m_scanOrder[curPos - 1].y; + uint32_t posxprev = (curPos == 0) ? 0 : m_scanOrder[curPos - 1].x; + unsigned identityFlag = 1; + + const CtxSet& ctxSet = (prevRunType == PLT_RUN_INDEX) ? Ctx::IdxRunModel : Ctx::CopyRunModel; + if (curPos > 0) + { + int dist = curPos - prevRunPos - 1; + const unsigned ctxId = DeriveCtx::CtxPltCopyFlag(prevRunType, dist); + identityFlag = m_BinDecoder.decodeBin( ctxSet( ctxId ) ); + DTRACE(g_trace_ctx, D_SYNTAX, "plt_copy_flag() bin=%d ctx=%d\n", identityFlag, ctxId); + runCopyFlag[curPos - minSubPos] = identityFlag; + } + + if ( identityFlag == 0 || curPos == 0 ) + { + if (((posy == 0) && !cu.useRotation[compBegin]) || ((posx == 0) && cu.useRotation[compBegin])) + { + runType.at(posx, posy) = PLT_RUN_INDEX; + } + else if (curPos != 0 && runType.at(posxprev, posyprev) == PLT_RUN_COPY) + { + runType.at(posx, posy) = PLT_RUN_INDEX; + } + else + { + runType.at(posx, posy) = (m_BinDecoder.decodeBin(Ctx::RunTypeFlag())); + } + DTRACE(g_trace_ctx, D_SYNTAX, "plt_type_flag() bin=%d sp=%d\n", runType.at(posx, posy), curPos); + prevRunType = runType.at(posx, posy); + prevRunPos = curPos; + } + else //assign run information + { + runType.at(posx, posy) = runType.at(posxprev, posyprev); + } + } + +// PLT index values - bypass coded + uint32_t adjust; + uint32_t symbol = 0; + curPos = minSubPos; + if (indexMaxSize > 1) + { + for (; curPos < maxSubPos; curPos++) + { + if (curPos > 0) + adjust = 1; + else + adjust = 0; + + uint32_t posy = m_scanOrder[curPos].y; + uint32_t posx = m_scanOrder[curPos].x; + uint32_t posyprev = (curPos == 0) ? 0 : m_scanOrder[curPos - 1].y; + uint32_t posxprev = (curPos == 0) ? 0 : m_scanOrder[curPos - 1].x; + if ( runCopyFlag[curPos - minSubPos] == 0 && runType.at(posx, posy) == PLT_RUN_INDEX ) + { + xReadTruncBinCode(symbol, indexMaxSize - adjust); + xAdjustPLTIndex(cu, symbol, curPos, curPLTIdx, runType, indexMaxSize, compBegin); + DTRACE(g_trace_ctx, D_SYNTAX, "plt_idx_idc() value=%d sp=%d\n", curPLTIdx.at(posx, posy), curPos); + } + else if (runType.at(posx, posy) == PLT_RUN_INDEX) + { + curPLTIdx.at(posx, posy) = curPLTIdx.at(posxprev, posyprev); + } + else + { + curPLTIdx.at(posx, posy) = (cu.useRotation[compBegin]) ? curPLTIdx.at(posx - 1, posy) : curPLTIdx.at(posx, posy - 1); + } + } + } + else + { + for (; curPos < maxSubPos; curPos++) + { + uint32_t posy = m_scanOrder[curPos].y; + uint32_t posx = m_scanOrder[curPos].x; + uint32_t posyprev = (curPos == 0) ? 0 : m_scanOrder[curPos - 1].y; + uint32_t posxprev = (curPos == 0) ? 0 : m_scanOrder[curPos - 1].x; + runType.at(posx, posy) = PLT_RUN_INDEX; + if (runCopyFlag[curPos - minSubPos] == 0 && runType.at(posx, posy) == PLT_RUN_INDEX) + { + curPLTIdx.at(posx, posy) = 0; + } + else + { + curPLTIdx.at(posx, posy) = curPLTIdx.at(posxprev, posyprev); + } + } + } + +// Quantized escape colors - bypass coded + uint32_t scaleX = getComponentScaleX(COMPONENT_Cb, sps.getChromaFormatIdc()); + uint32_t scaleY = getComponentScaleY(COMPONENT_Cb, sps.getChromaFormatIdc()); + for (int comp = compBegin; comp < (compBegin + numComp); comp++) + { + ComponentID compID = (ComponentID)comp; + for (curPos = minSubPos; curPos < maxSubPos; curPos++) + { + uint32_t posy = m_scanOrder[curPos].y; + uint32_t posx = m_scanOrder[curPos].x; + if (curPLTIdx.at(posx, posy) == cu.curPLTSize[compBegin]) + { + PLTescapeBuf escapeValue = tu.getescapeValue((ComponentID)comp); + if (compID == COMPONENT_Y || compBegin != COMPONENT_Y) + { + escapeValue.at(posx, posy) = exp_golomb_eqprob(3); + assert(escapeValue.at(posx, posy) < (1 << (cu.cs->sps->getBitDepth(toChannelType((ComponentID)comp)) + 1))); + DTRACE(g_trace_ctx, D_SYNTAX, "plt_escape_val() value=%d etype=%d sp=%d\n", escapeValue.at(posx, posy), comp, curPos); + } + if (compBegin == COMPONENT_Y && compID != COMPONENT_Y && posy % (1 << scaleY) == 0 && posx % (1 << scaleX) == 0) + { + uint32_t posxC = posx >> scaleX; + uint32_t posyC = posy >> scaleY; + escapeValue.at(posxC, posyC) = exp_golomb_eqprob(3); + assert(escapeValue.at(posxC, posyC) < (1 << (cu.cs->sps->getBitDepth(toChannelType(compID)) + 1))); + DTRACE(g_trace_ctx, D_SYNTAX, "plt_escape_val() value=%d etype=%d sp=%d\n", escapeValue.at(posx, posy), comp, curPos); + } + } + } + } +} +void CABACReader::parseScanRotationModeFlag(CodingUnit& cu, ComponentID compBegin) +{ + cu.useRotation[compBegin] = m_BinDecoder.decodeBin(Ctx::RotationFlag()); +} +void CABACReader::xDecodePLTPredIndicator(CodingUnit& cu, uint32_t maxPLTSize, ComponentID compBegin) +{ + uint32_t symbol, numPltPredicted = 0, idx = 0; + + symbol = exp_golomb_eqprob(0); + + if (symbol != 1) + { + while (idx < cu.lastPLTSize[compBegin] && numPltPredicted < maxPLTSize) + { + if (idx > 0) + { + symbol = exp_golomb_eqprob(0); + } + if (symbol == 1) + { + break; + } + + if (symbol) + { + idx += symbol - 1; + } + cu.reuseflag[compBegin][idx] = 1; + numPltPredicted++; + idx++; + } + } +} +void CABACReader::xAdjustPLTIndex(CodingUnit& cu, Pel curLevel, uint32_t idx, PelBuf& paletteIdx, PLTtypeBuf& paletteRunType, int maxSymbol, ComponentID compBegin) +{ + uint32_t symbol; + int refLevel = MAX_INT; + uint32_t posy = m_scanOrder[idx].y; + uint32_t posx = m_scanOrder[idx].x; + if (idx) + { + uint32_t prevposy = m_scanOrder[idx - 1].y; + uint32_t prevposx = m_scanOrder[idx - 1].x; + if (paletteRunType.at(prevposx, prevposy) == PLT_RUN_INDEX) + { + refLevel = paletteIdx.at(prevposx, prevposy); + if (paletteIdx.at(prevposx, prevposy) == cu.curPLTSize[compBegin]) // escape + { + refLevel = maxSymbol - 1; + } + } + else + { + if (cu.useRotation[compBegin]) + { + assert(prevposx > 0); + refLevel = paletteIdx.at(posx - 1, posy); + if (paletteIdx.at(posx - 1, posy) == cu.curPLTSize[compBegin]) // escape mode + { + refLevel = maxSymbol - 1; + } + } + else + { + assert(prevposy > 0); + refLevel = paletteIdx.at(posx, posy - 1); + if (paletteIdx.at(posx, posy - 1) == cu.curPLTSize[compBegin]) // escape mode + { + refLevel = maxSymbol - 1; + } + } + } + maxSymbol--; + } + symbol = curLevel; + if (curLevel >= refLevel) // include escape mode + { + symbol++; + } + paletteIdx.at(posx, posy) = symbol; +} + +//================================================================================ +// clause 7.3.8.6 +//-------------------------------------------------------------------------------- // void prediction_unit ( pu, mrgCtx ); // void merge_flag ( pu ); // void merge_data ( pu, mrgCtx ); @@ -1360,27 +1917,7 @@ void CABACReader::prediction_unit( PredictionUnit& pu, MergeCtx& mrgCtx ) } if( pu.mergeFlag ) { - if (CU::isIBC(*pu.cu)) - { - merge_idx(pu); - } - else - { - subblock_merge_flag( *pu.cu ); - MHIntra_flag(pu); - if (pu.mhIntraFlag) - { - MHIntra_luma_pred_modes(*pu.cu); - pu.intraDir[1] = DM_CHROMA_IDX; - } - triangle_mode( *pu.cu ); - if (pu.mmvdMergeFlag) - { - mmvd_merge_idx(pu); - } - else - merge_data ( pu ); - } + merge_data(pu); } else if (CU::isIBC(*pu.cu)) { @@ -1388,6 +1925,11 @@ void CABACReader::prediction_unit( PredictionUnit& pu, MergeCtx& mrgCtx ) pu.cu->affine = false; pu.refIdx[REF_PIC_LIST_0] = MAX_NUM_REF; mvd_coding(pu.mvd[REF_PIC_LIST_0]); + if ( pu.cu->slice->getPicHeader()->getMaxNumIBCMergeCand() == 1 ) + { + pu.mvpIdx[REF_PIC_LIST_0] = 0; + } + else mvp_flag(pu, REF_PIC_LIST_0); } else @@ -1420,7 +1962,7 @@ void CABACReader::prediction_unit( PredictionUnit& pu, MergeCtx& mrgCtx ) if ( pu.cu->smvdMode != 1 ) { ref_idx ( pu, REF_PIC_LIST_1 ); - if( pu.cu->cs->slice->getMvdL1ZeroFlag() && pu.interDir == 3 /* PRED_BI */ ) + if( pu.cu->cs->picHeader->getMvdL1ZeroFlag() && pu.interDir == 3 /* PRED_BI */ ) { pu.mvd[ REF_PIC_LIST_1 ] = Mv(); pu.mvdAffi[REF_PIC_LIST_1][0] = Mv(); @@ -1449,13 +1991,14 @@ void CABACReader::prediction_unit( PredictionUnit& pu, MergeCtx& mrgCtx ) pu.mv [REF_PIC_LIST_1] = Mv(0, 0); pu.refIdx[REF_PIC_LIST_1] = -1; pu.interDir = 1; - pu.cu->GBiIdx = GBI_DEFAULT; + pu.cu->BcwIdx = BCW_DEFAULT; } if ( pu.cu->smvdMode ) { RefPicList eCurRefList = (RefPicList)(pu.cu->smvdMode - 1); pu.mvd[1 - eCurRefList].set( -pu.mvd[eCurRefList].hor, -pu.mvd[eCurRefList].ver ); + CHECK(!((pu.mvd[1 - eCurRefList].getHor() >= MVD_MIN) && (pu.mvd[1 - eCurRefList].getHor() <= MVD_MAX)) || !((pu.mvd[1 - eCurRefList].getVer() >= MVD_MIN) && (pu.mvd[1 - eCurRefList].getVer() <= MVD_MAX)), "Illegal MVD value"); pu.refIdx[1 - eCurRefList] = pu.cs->slice->getSymRefIdx( 1 - eCurRefList ); } @@ -1484,17 +2027,14 @@ void CABACReader::smvd_mode( PredictionUnit& pu ) void CABACReader::subblock_merge_flag( CodingUnit& cu ) { - if ( cu.firstPU->mergeFlag && (cu.firstPU->mmvdMergeFlag || cu.mmvdSkip) ) - { - return; - } + cu.affine = false; - if ( !cu.cs->slice->isIntra() && (cu.cs->sps->getUseAffine() || cu.cs->sps->getSBTMVPEnabledFlag()) && cu.lumaSize().width >= 8 && cu.lumaSize().height >= 8 ) + if ( !cu.cs->slice->isIntra() && (cu.slice->getPicHeader()->getMaxNumAffineMergeCand() > 0) && cu.lumaSize().width >= 8 && cu.lumaSize().height >= 8 ) { RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET( STATS__CABAC_BITS__AFFINE_FLAG ); unsigned ctxId = DeriveCtx::CtxAffineFlag( cu ); - cu.affine = m_BinDecoder.decodeBin( Ctx::AffineFlag( ctxId ) ); + cu.affine = m_BinDecoder.decodeBin( Ctx::SubblockMergeFlag( ctxId ) ); DTRACE( g_trace_ctx, D_SYNTAX, "subblock_merge_flag() subblock_merge_flag=%d ctx=%d pos=(%d,%d)\n", cu.affine ? 1 : 0, ctxId, cu.Y().x, cu.Y().y ); } } @@ -1533,23 +2073,85 @@ void CABACReader::merge_flag( PredictionUnit& pu ) if (pu.mergeFlag && CU::isIBC(*pu.cu)) { pu.mmvdMergeFlag = false; + pu.regularMergeFlag = false; return; } -#if JVET_MMVD_OFF_MACRO - pu.mmvdMergeFlag = false; -#else - if (pu.mergeFlag) - { - pu.mmvdMergeFlag = (m_BinDecoder.decodeBin(Ctx::MmvdFlag(0))); - DTRACE(g_trace_ctx, D_SYNTAX, "mmvd_merge_flag() mmvd_merge=%d pos=(%d,%d) size=%dx%d\n", pu.mmvdMergeFlag ? 1 : 0, pu.lumaPos().x, pu.lumaPos().y, pu.lumaSize().width, pu.lumaSize().height); - } -#endif } void CABACReader::merge_data( PredictionUnit& pu ) { - if (pu.cu->mmvdSkip) + if (CU::isIBC(*pu.cu)) + { + merge_idx(pu); + return; + } + else + { + CodingUnit cu = *pu.cu; + subblock_merge_flag(*pu.cu); + if (pu.cu->affine) + { + merge_idx(pu); + cu.firstPU->regularMergeFlag = false; + return; + } + + RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET( STATS__CABAC_BITS__MERGE_FLAG ); + const bool triangleAvailable = pu.cu->cs->slice->getSPS()->getUseTriangle() && pu.cu->cs->slice->isInterB() && pu.cu->cs->picHeader->getMaxNumTriangleCand() > 1; + const bool ciipAvailable = pu.cs->sps->getUseCiip() && !pu.cu->skip && pu.cu->lwidth() < MAX_CU_SIZE && pu.cu->lheight() < MAX_CU_SIZE; + if (pu.cu->lwidth() * pu.cu->lheight() >= 64 + && (triangleAvailable || ciipAvailable)) + { + cu.firstPU->regularMergeFlag = m_BinDecoder.decodeBin(Ctx::RegularMergeFlag(cu.skip ? 0 : 1)); + } + else + { + cu.firstPU->regularMergeFlag = true; + } + if (cu.firstPU->regularMergeFlag) + { + if (cu.cs->slice->getSPS()->getUseMMVD()) + { + cu.firstPU->mmvdMergeFlag = m_BinDecoder.decodeBin(Ctx::MmvdFlag(0)); + } + else + { + cu.firstPU->mmvdMergeFlag = false; + } + if (cu.skip) + { + cu.mmvdSkip = cu.firstPU->mmvdMergeFlag; + } + } + else + { + pu.mmvdMergeFlag = false; + pu.cu->mmvdSkip = false; + if (triangleAvailable && ciipAvailable) + { + Ciip_flag(pu); + } + else if (ciipAvailable) + { + pu.ciipFlag = true; + } + else + { + pu.ciipFlag = false; + } + if (pu.ciipFlag) + { + pu.intraDir[0] = PLANAR_IDX; + pu.intraDir[1] = DM_CHROMA_IDX; + } + else + { + pu.cu->triangle = true; + } + } + } + if (pu.mmvdMergeFlag || pu.cu->mmvdSkip) { mmvd_merge_idx(pu); } @@ -1566,7 +2168,7 @@ void CABACReader::merge_idx( PredictionUnit& pu ) if ( pu.cu->affine ) { - int numCandminus1 = int( pu.cs->slice->getMaxNumAffineMergeCand() ) - 1; + int numCandminus1 = int( pu.cs->picHeader->getMaxNumAffineMergeCand() ) - 1; pu.mergeIdx = 0; if ( numCandminus1 > 0 ) { @@ -1586,7 +2188,7 @@ void CABACReader::merge_idx( PredictionUnit& pu ) } else { - int numCandminus1 = int( pu.cs->slice->getMaxNumMergeCand() ) - 1; + int numCandminus1 = int( pu.cs->picHeader->getMaxNumMergeCand() ) - 1; pu.mergeIdx = 0; if( pu.cu->triangle ) @@ -1613,8 +2215,10 @@ void CABACReader::merge_idx( PredictionUnit& pu ) } return decIdx; }; - candIdx0 = decodeOneIdx(TRIANGLE_MAX_NUM_UNI_CANDS - 1); - candIdx1 = decodeOneIdx(TRIANGLE_MAX_NUM_UNI_CANDS - 2); + const int maxNumTriangleCand = pu.cs->picHeader->getMaxNumTriangleCand(); + CHECK(maxNumTriangleCand < 2, "Incorrect max number of triangle candidates"); + candIdx0 = decodeOneIdx(maxNumTriangleCand - 1); + candIdx1 = decodeOneIdx(maxNumTriangleCand - 2); candIdx1 += candIdx1 >= candIdx0 ? 1 : 0; DTRACE( g_trace_ctx, D_SYNTAX, "merge_idx() triangle_split_dir=%d\n", splitDir ); DTRACE( g_trace_ctx, D_SYNTAX, "merge_idx() triangle_idx0=%d\n", candIdx0 ); @@ -1625,6 +2229,10 @@ void CABACReader::merge_idx( PredictionUnit& pu ) return; } + if (pu.cu->predMode == MODE_IBC) + { + numCandminus1 = int(pu.cs->picHeader->getMaxNumIBCMergeCand()) - 1; + } if( numCandminus1 > 0 ) { if( m_BinDecoder.decodeBin( Ctx::MergeIdx() ) ) @@ -1646,50 +2254,29 @@ void CABACReader::merge_idx( PredictionUnit& pu ) void CABACReader::mmvd_merge_idx(PredictionUnit& pu) { RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET(STATS__CABAC_BITS__MERGE_INDEX); - int var0, var1, var2; - int dir0 = 0; - int var = 0; - int mvpIdx = 0; - - pu.mmvdMergeIdx = 0; - mvpIdx = (var + dir0)*(MMVD_MAX_REFINE_NUM*MMVD_BASE_MV_NUM); - - int numCandminus1_base = MMVD_BASE_MV_NUM - 1; - var0 = 0; - if (numCandminus1_base > 0) + int var0 = 0; + if (pu.cs->picHeader->getMaxNumMergeCand() > 1) { - if (m_BinDecoder.decodeBin(Ctx::MmvdMergeIdx())) - { - var0++; - for (; var0 < numCandminus1_base; var0++) - { - if (!m_BinDecoder.decodeBinEP()) - { - break; - } - } - } + static_assert(MMVD_BASE_MV_NUM == 2, ""); + var0 = m_BinDecoder.decodeBin(Ctx::MmvdMergeIdx()); } DTRACE(g_trace_ctx, D_SYNTAX, "base_mvp_idx() base_mvp_idx=%d\n", var0); int numCandminus1_step = MMVD_REFINE_STEP - 1; - var1 = 0; - if (numCandminus1_step > 0) + int var1 = 0; + if (m_BinDecoder.decodeBin(Ctx::MmvdStepMvpIdx())) { - if (m_BinDecoder.decodeBin(Ctx::MmvdStepMvpIdx())) + var1++; + for (; var1 < numCandminus1_step; var1++) { - var1++; - for (; var1 < numCandminus1_step; var1++) + if (!m_BinDecoder.decodeBinEP()) { - if (!m_BinDecoder.decodeBinEP()) - { - break; - } + break; } } } DTRACE(g_trace_ctx, D_SYNTAX, "MmvdStepMvpIdx() MmvdStepMvpIdx=%d\n", var1); - var2 = 0; + int var2 = 0; if (m_BinDecoder.decodeBinEP()) { var2 += 2; @@ -1707,7 +2294,7 @@ void CABACReader::mmvd_merge_idx(PredictionUnit& pu) } } DTRACE(g_trace_ctx, D_SYNTAX, "pos() pos=%d\n", var2); - mvpIdx += (var0 * MMVD_MAX_REFINE_NUM + var1 * 4 + var2); + int mvpIdx = (var0 * MMVD_MAX_REFINE_NUM + var1 * 4 + var2); pu.mmvdMergeIdx = mvpIdx; DTRACE(g_trace_ctx, D_SYNTAX, "mmvd_merge_idx() mmvd_merge_idx=%d\n", pu.mmvdMergeIdx); } @@ -1731,13 +2318,13 @@ void CABACReader::inter_pred_idc( PredictionUnit& pu ) return; } } - if( m_BinDecoder.decodeBin( Ctx::InterDir(4) ) ) + if( m_BinDecoder.decodeBin( Ctx::InterDir(5) ) ) { - DTRACE( g_trace_ctx, D_SYNTAX, "inter_pred_idc() ctx=4 value=%d pos=(%d,%d)\n", 2, pu.lumaPos().x, pu.lumaPos().y ); + DTRACE( g_trace_ctx, D_SYNTAX, "inter_pred_idc() ctx=5 value=%d pos=(%d,%d)\n", 2, pu.lumaPos().x, pu.lumaPos().y ); pu.interDir = 2; return; } - DTRACE( g_trace_ctx, D_SYNTAX, "inter_pred_idc() ctx=4 value=%d pos=(%d,%d)\n", 1, pu.lumaPos().x, pu.lumaPos().y ); + DTRACE( g_trace_ctx, D_SYNTAX, "inter_pred_idc() ctx=5 value=%d pos=(%d,%d)\n", 1, pu.lumaPos().x, pu.lumaPos().y ); pu.interDir = 1; return; } @@ -1794,192 +2381,27 @@ void CABACReader::mvp_flag( PredictionUnit& pu, RefPicList eRefList ) } -void CABACReader::MHIntra_flag(PredictionUnit& pu) +void CABACReader::Ciip_flag(PredictionUnit& pu) { - if (!pu.cs->sps->getUseMHIntra()) + if (!pu.cs->sps->getUseCiip()) { - pu.mhIntraFlag = false; + pu.ciipFlag = false; return; } if (pu.cu->skip) { - pu.mhIntraFlag = false; + pu.ciipFlag = false; return; } - if (pu.mmvdMergeFlag) - { - pu.mhIntraFlag = false; - return; - } - if (pu.cu->affine) - { - pu.mhIntraFlag = false; - return; - } - if (pu.cu->lwidth() * pu.cu->lheight() < 64 || pu.cu->lwidth() >= MAX_CU_SIZE || pu.cu->lheight() >= MAX_CU_SIZE) - { - pu.mhIntraFlag = false; - return; - } RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET(STATS__CABAC_BITS__MH_INTRA_FLAG); - pu.mhIntraFlag = (m_BinDecoder.decodeBin(Ctx::MHIntraFlag())); - DTRACE(g_trace_ctx, D_SYNTAX, "MHIntra_flag() MHIntra=%d pos=(%d,%d) size=%dx%d\n", pu.mhIntraFlag ? 1 : 0, pu.lumaPos().x, pu.lumaPos().y, pu.lumaSize().width, pu.lumaSize().height); -} - -void CABACReader::MHIntra_luma_pred_modes(CodingUnit &cu) -{ - if (!cu.Y().valid()) - { - return; - } - - RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET_SIZE2(STATS__CABAC_BITS__INTRA_DIR_ANG, cu.lumaSize(), CHANNEL_TYPE_LUMA); - - const int numMPMs = 3; // Multi-hypothesis intra uses only 3 MPM - - // prev_intra_luma_pred_flag - int numBlocks = CU::getNumPUs(cu); - int mpmFlag[4]; - PredictionUnit *pu = cu.firstPU; - for (int k = 0; k < numBlocks; k++) - { - if (PU::getNarrowShape(pu->lwidth(), pu->lheight()) == 0) - { - mpmFlag[k] = m_BinDecoder.decodeBin(Ctx::MHIntraPredMode()); - } - else - { - mpmFlag[k] = 1; - } - } - - unsigned mpm_pred[numMPMs]; - for (int k = 0; k < numBlocks; k++) - { - PU::getMHIntraMPMs(*pu, mpm_pred); - - if (mpmFlag[k]) - { - unsigned pred_idx = 0; - - pred_idx = m_BinDecoder.decodeBinEP(); - if (pred_idx) - { - pred_idx += m_BinDecoder.decodeBinEP(); - } - pu->intraDir[0] = mpm_pred[pred_idx]; - } - else - { - unsigned pred_mode = 0; - - bool isMPMCand[4]; - for (unsigned i = 0; i < 4; i++) - { - isMPMCand[i] = false; - } - for (unsigned i = 0; i < 3; i++) - { - if (mpm_pred[i] == PLANAR_IDX) - { - isMPMCand[0] = true; - } - else if (mpm_pred[i] == DC_IDX) - { - isMPMCand[1] = true; - } - else if (mpm_pred[i] == HOR_IDX) - { - isMPMCand[2] = true; - } - else if (mpm_pred[i] == VER_IDX) - { - isMPMCand[3] = true; - } - } - if (!isMPMCand[0]) - { - pred_mode = PLANAR_IDX; - } - if (!isMPMCand[1]) - { - pred_mode = DC_IDX; - } - if (!isMPMCand[2]) - { - pred_mode = HOR_IDX; - } - if (!isMPMCand[3]) - { - pred_mode = VER_IDX; - } - pu->intraDir[0] = pred_mode; - } - DTRACE(g_trace_ctx, D_SYNTAX, "intra_luma_pred_modes() idx=%d pos=(%d,%d) mode=%d\n", k, pu->lumaPos().x, pu->lumaPos().y, pu->intraDir[0]); - pu = pu->next; - } + pu.ciipFlag = (m_BinDecoder.decodeBin(Ctx::CiipFlag())); + DTRACE(g_trace_ctx, D_SYNTAX, "Ciip_flag() Ciip=%d pos=(%d,%d) size=%dx%d\n", pu.ciipFlag ? 1 : 0, pu.lumaPos().x, pu.lumaPos().y, pu.lumaSize().width, pu.lumaSize().height); } -void CABACReader::triangle_mode( CodingUnit& cu ) -{ - RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET( STATS__CABAC_BITS__TRIANGLE_FLAG ); - - if( !cu.cs->slice->getSPS()->getUseTriangle() || !cu.cs->slice->isInterB() || cu.lwidth() * cu.lheight() < TRIANGLE_MIN_SIZE || cu.affine ) - { - return; - } - if ( cu.firstPU->mmvdMergeFlag || cu.mmvdSkip ) - { - return; - } - if ( cu.firstPU->mhIntraFlag ) - { - return; - } - - unsigned flag_idx = DeriveCtx::CtxTriangleFlag( cu ); - cu.triangle = m_BinDecoder.decodeBin( Ctx::TriangleFlag(flag_idx) ); - - - DTRACE( g_trace_ctx, D_SYNTAX, "triangle_mode() triangle_mode=%d pos=(%d,%d) size: %dx%d\n", cu.triangle, cu.Y().x, cu.Y().y, cu.lumaSize().width, cu.lumaSize().height ); -} - -//================================================================================ -// clause 7.3.8.7 -//-------------------------------------------------------------------------------- -// void pcm_samples( tu ) -//================================================================================ - -void CABACReader::pcm_samples( TransformUnit& tu ) -{ - CHECK( !tu.cu->ipcm, "pcm mode expected" ); - - const CodingStructure *cs = tu.cs; - const ChannelType chType = tu.chType; - - const SPS& sps = *tu.cu->cs->sps; - tu.depth = 0; - - ComponentID compStr = (CS::isDualITree(*cs) && !isLuma(chType)) ? COMPONENT_Cb: COMPONENT_Y; - ComponentID compEnd = (CS::isDualITree(*cs) && isLuma(chType)) ? COMPONENT_Y : COMPONENT_Cr; - for( ComponentID compID = compStr; compID <= compEnd; compID = ComponentID(compID+1) ) - { - PelBuf samples = tu.getPcmbuf( compID ); - const unsigned sampleBits = sps.getPCMBitDepth( toChannelType(compID) ); - for( unsigned y = 0; y < samples.height; y++ ) - { - for( unsigned x = 0; x < samples.width; x++ ) - { - samples.at(x, y) = m_BinDecoder.decodeBinsPCM( sampleBits ); - } - } - } - m_BinDecoder.start(); -} //================================================================================ // clause 7.3.8.8 @@ -1989,21 +2411,16 @@ void CABACReader::pcm_samples( TransformUnit& tu ) // bool cbf_comp ( area, depth ) //================================================================================ -void CABACReader::transform_tree( CodingStructure &cs, Partitioner &partitioner, CUCtx& cuCtx, ChromaCbfs& chromaCbfs, const PartSplit ispType, const int subTuIdx ) +void CABACReader::transform_tree( CodingStructure &cs, Partitioner &partitioner, CUCtx& cuCtx, const PartSplit ispType, const int subTuIdx ) { - ChromaCbfs chromaCbfsLastDepth; - chromaCbfsLastDepth.Cb = chromaCbfs.Cb; - chromaCbfsLastDepth.Cr = chromaCbfs.Cr; - const UnitArea& area = partitioner.currArea(); - - CodingUnit& cu = *cs.getCU( area.blocks[partitioner.chType], partitioner.chType ); - const unsigned trDepth = partitioner.currTrDepth; - int subTuCounter = subTuIdx; + const UnitArea& area = partitioner.currArea(); + CodingUnit& cu = *cs.getCU(area.blocks[partitioner.chType], partitioner.chType); + int subTuCounter = subTuIdx; // split_transform_flag - bool split = false; + bool split = partitioner.canSplit(TU_MAX_TR_SPLIT, cs); + const unsigned trDepth = partitioner.currTrDepth; - split = partitioner.canSplit( TU_MAX_TR_SPLIT, cs ); if( cu.sbtInfo && partitioner.canSplit( PartSplit( cu.getSbtTuSplit() ), cs ) ) { split = true; @@ -2013,27 +2430,6 @@ void CABACReader::transform_tree( CodingStructure &cs, Partitioner &partitioner, { split = partitioner.canSplit( ispType, cs ); } - const bool chromaCbfISP = area.blocks[COMPONENT_Cb].valid() && cu.ispMode && !split; - - // cbf_cb & cbf_cr - if( area.chromaFormat != CHROMA_400 && area.blocks[COMPONENT_Cb].valid() && ( !CS::isDualITree( cs ) || partitioner.chType == CHANNEL_TYPE_CHROMA ) && ( !cu.ispMode || chromaCbfISP ) ) - { - const int cbfDepth = chromaCbfISP ? trDepth - 1 : trDepth; - if (chromaCbfs.Cb) - { - if (!(cu.sbtInfo && trDepth == 1)) - chromaCbfs.Cb &= cbf_comp(cs, area.blocks[COMPONENT_Cb], cbfDepth); - } - if (chromaCbfs.Cr) - { - if (!(cu.sbtInfo && trDepth == 1)) - chromaCbfs.Cr &= cbf_comp(cs, area.blocks[COMPONENT_Cr], cbfDepth, chromaCbfs.Cb); - } - } - else if( CS::isDualITree( cs ) ) - { - chromaCbfs = ChromaCbfs( false ); - } if( split ) { @@ -2062,45 +2458,18 @@ void CABACReader::transform_tree( CodingStructure &cs, Partitioner &partitioner, do { - ChromaCbfs subCbfs = chromaCbfs; - transform_tree( cs, partitioner, cuCtx, subCbfs, ispType, subTuCounter ); + transform_tree( cs, partitioner, cuCtx, ispType, subTuCounter ); subTuCounter += subTuCounter != -1 ? 1 : 0; } while( partitioner.nextPart( cs ) ); partitioner.exitCurrSplit(); - const UnitArea &currArea = partitioner.currArea(); - const unsigned currDepth = partitioner.currTrDepth; - const unsigned numTBlocks = getNumberValidTBlocks( *cs.pcv ); - - unsigned compCbf[3] = { 0, 0, 0 }; - unsigned cbfDepth = 0; - for( auto &currTU : cs.traverseTUs( currArea, partitioner.chType ) ) - { - for( unsigned ch = 0; ch < numTBlocks; ch++ ) - { - cbfDepth = !isLuma( ComponentID( ch ) ) && cu.ispMode ? currDepth : currDepth + 1; - compCbf[ch] |= ( TU::getCbfAtDepth( currTU, ComponentID( ch ), cbfDepth ) ? 1 : 0 ); - } - } - - for (auto &currTU: cs.traverseTUs(currArea, partitioner.chType)) - { - TU::setCbfAtDepth(currTU, COMPONENT_Y, currDepth, compCbf[COMPONENT_Y]); - if (currArea.chromaFormat != CHROMA_400) - { - TU::setCbfAtDepth(currTU, COMPONENT_Cb, currDepth, compCbf[COMPONENT_Cb]); - TU::setCbfAtDepth(currTU, COMPONENT_Cr, currDepth, compCbf[COMPONENT_Cr]); - } - } } else { TransformUnit &tu = cs.addTU( CS::getArea( cs, area, partitioner.chType ), partitioner.chType ); unsigned numBlocks = ::getNumberValidTBlocks( *cs.pcv ); tu.checkTuNoResidual( partitioner.currPartIdx() ); - chromaCbfs.Cb &= !tu.noResidual; - chromaCbfs.Cr &= !tu.noResidual; for( unsigned compID = COMPONENT_Y; compID < numBlocks; compID++ ) { @@ -2113,70 +2482,33 @@ void CABACReader::transform_tree( CodingStructure &cs, Partitioner &partitioner, tu.depth = trDepth; DTRACE( g_trace_ctx, D_SYNTAX, "transform_unit() pos=(%d,%d) size=%dx%d depth=%d trDepth=%d\n", tu.blocks[tu.chType].x, tu.blocks[tu.chType].y, tu.blocks[tu.chType].width, tu.blocks[tu.chType].height, cu.depth, partitioner.currTrDepth ); - if( !isChroma( partitioner.chType ) ) - { - if( !CU::isIntra( cu ) && trDepth == 0 && !chromaCbfs.sigChroma( area.chromaFormat ) ) - { - TU::setCbfAtDepth( tu, COMPONENT_Y, trDepth, 1 ); - } - else if( cu.sbtInfo && tu.noResidual ) - { - TU::setCbfAtDepth( tu, COMPONENT_Y, trDepth, 0 ); - } - else if( cu.sbtInfo && !chromaCbfsLastDepth.sigChroma( area.chromaFormat ) ) - { - assert( !tu.noResidual ); - TU::setCbfAtDepth( tu, COMPONENT_Y, trDepth, 1 ); - } - else - { - bool previousCbf = false; - bool rootCbfSoFar = false; - bool lastCbfIsInferred = false; - if( cu.ispMode ) - { - uint32_t nTus = cu.ispMode == HOR_INTRA_SUBPARTITIONS ? cu.lheight() >> g_aucLog2[tu.lheight()] : cu.lwidth() >> g_aucLog2[tu.lwidth()]; - if( subTuCounter == nTus - 1 ) - { - TransformUnit* tuPointer = cu.firstTU; - for( int tuIdx = 0; tuIdx < nTus - 1; tuIdx++ ) - { - rootCbfSoFar |= TU::getCbfAtDepth( *tuPointer, COMPONENT_Y, trDepth ); - tuPointer = tuPointer->next; - } - if( !rootCbfSoFar ) - { - lastCbfIsInferred = true; - } - } - if( !lastCbfIsInferred ) - { - previousCbf = TU::getPrevTuCbfAtDepth( tu, COMPONENT_Y, trDepth ); - } - } - bool cbfY = lastCbfIsInferred ? true : cbf_comp( cs, tu.Y(), trDepth, previousCbf, cu.ispMode ); - TU::setCbfAtDepth( tu, COMPONENT_Y, trDepth, ( cbfY ? 1 : 0 ) ); - } - } - if( area.chromaFormat != CHROMA_400 && ( !cu.ispMode || chromaCbfISP ) ) - { - TU::setCbfAtDepth( tu, COMPONENT_Cb, trDepth, ( chromaCbfs.Cb ? 1 : 0 ) ); - TU::setCbfAtDepth( tu, COMPONENT_Cr, trDepth, ( chromaCbfs.Cr ? 1 : 0 ) ); - } - - - transform_unit( tu, cuCtx, chromaCbfs ); + transform_unit(tu, cuCtx, partitioner, subTuCounter); } } bool CABACReader::cbf_comp( CodingStructure& cs, const CompArea& area, unsigned depth, const bool prevCbf, const bool useISP ) { - const unsigned ctxId = DeriveCtx::CtxQtCbf( area.compID, depth, prevCbf, useISP && isLuma( area.compID ) ); + unsigned ctxId = DeriveCtx::CtxQtCbf(area.compID, prevCbf, useISP && isLuma(area.compID)); const CtxSet& ctxSet = Ctx::QtCbf[ area.compID ]; RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET_SIZE2(STATS__CABAC_BITS__QT_CBF, area.size(), area.compID); - const unsigned cbf = m_BinDecoder.decodeBin( ctxSet( ctxId ) ); + unsigned cbf = 0; + if( (area.compID == COMPONENT_Y && cs.getCU(area.pos(), ChannelType(area.compID))->bdpcmMode) + || (area.compID != COMPONENT_Y && cs.getCU(area.pos(), ChannelType(area.compID))->bdpcmModeChroma)) + { + if (area.compID == COMPONENT_Y) + ctxId = 1; + else if (area.compID == COMPONENT_Cb) + ctxId = 1; + else + ctxId = 2; + cbf = m_BinDecoder.decodeBin(ctxSet(ctxId)); + } + else + { + cbf = m_BinDecoder.decodeBin( ctxSet( ctxId ) ); + } DTRACE( g_trace_ctx, D_SYNTAX, "cbf_comp() etype=%d pos=(%d,%d) ctx=%d cbf=%d\n", area.compID, area.x, area.y, ctxId, cbf ); return cbf; @@ -2218,7 +2550,7 @@ void CABACReader::mvd_coding( Mv &rMvd ) { if (horAbs > 1) { - horAbs += exp_golomb_eqprob(1 ); + horAbs += m_BinDecoder.decodeRemAbsEP(1, 0, MV_BITS - 1); } if (m_BinDecoder.decodeBinEP()) { @@ -2229,51 +2561,139 @@ void CABACReader::mvd_coding( Mv &rMvd ) { if (verAbs > 1) { - verAbs += exp_golomb_eqprob(1 ); + verAbs += m_BinDecoder.decodeRemAbsEP(1, 0, MV_BITS - 1); } if (m_BinDecoder.decodeBinEP()) { verAbs = -verAbs; } } - rMvd = Mv(horAbs, verAbs); -} - - -//================================================================================ -// clause 7.3.8.10 -//-------------------------------------------------------------------------------- -// void transform_unit ( tu, cuCtx, chromaCbfs ) -// void cu_qp_delta ( cu ) -// void cu_chroma_qp_offset ( cu ) -//================================================================================ - -void CABACReader::transform_unit( TransformUnit& tu, CUCtx& cuCtx, ChromaCbfs& chromaCbfs ) -{ - CodingUnit& cu = *tu.cu; + rMvd = Mv(horAbs, verAbs); + CHECK(!((horAbs >= MVD_MIN) && (horAbs <= MVD_MAX)) || !((verAbs >= MVD_MIN) && (verAbs <= MVD_MAX)), "Illegal MVD value"); +} + + +//================================================================================ +// clause 7.3.8.10 +//-------------------------------------------------------------------------------- +// void transform_unit ( tu, cuCtx, chromaCbfs ) +// void cu_qp_delta ( cu ) +// void cu_chroma_qp_offset ( cu ) +//================================================================================ +void CABACReader::transform_unit( TransformUnit& tu, CUCtx& cuCtx, Partitioner& partitioner, const int subTuCounter) +{ + const UnitArea& area = partitioner.currArea(); + const unsigned trDepth = partitioner.currTrDepth; + + CodingStructure& cs = *tu.cs; + CodingUnit& cu = *tu.cu; + ChromaCbfs chromaCbfs; + chromaCbfs.Cb = chromaCbfs.Cr = false; + + const bool chromaCbfISP = area.blocks[COMPONENT_Cb].valid() && cu.ispMode; + + // cbf_cb & cbf_cr + if (area.chromaFormat != CHROMA_400 && area.blocks[COMPONENT_Cb].valid() && (!cu.isSepTree() || partitioner.chType == CHANNEL_TYPE_CHROMA) && (!cu.ispMode || chromaCbfISP)) + { + const int cbfDepth = chromaCbfISP ? trDepth - 1 : trDepth; + { + if (!(cu.sbtInfo && tu.noResidual)) + chromaCbfs.Cb = cbf_comp(cs, area.blocks[COMPONENT_Cb], cbfDepth); + + if (!(cu.sbtInfo && tu.noResidual)) + chromaCbfs.Cr = cbf_comp(cs, area.blocks[COMPONENT_Cr], cbfDepth, chromaCbfs.Cb); + } + } + else if (cu.isSepTree()) + { + chromaCbfs = ChromaCbfs(false); + } + + if (!isChroma(partitioner.chType)) + { + if (!CU::isIntra(cu) && trDepth == 0 && !chromaCbfs.sigChroma(area.chromaFormat)) + { + TU::setCbfAtDepth(tu, COMPONENT_Y, trDepth, 1); + } + else if (cu.sbtInfo && tu.noResidual) + { + TU::setCbfAtDepth(tu, COMPONENT_Y, trDepth, 0); + } + else if (cu.sbtInfo && !chromaCbfs.sigChroma(area.chromaFormat)) + { + assert(!tu.noResidual); + TU::setCbfAtDepth(tu, COMPONENT_Y, trDepth, 1); + } + else + { + bool lumaCbfIsInferredACT = (cu.colorTransform && cu.predMode == MODE_INTRA && trDepth == 0 && !chromaCbfs.sigChroma(area.chromaFormat)); + bool lastCbfIsInferred = lumaCbfIsInferredACT; // ISP and ACT are mutually exclusive + bool previousCbf = false; + bool rootCbfSoFar = false; + if (cu.ispMode) + { + uint32_t nTus = cu.ispMode == HOR_INTRA_SUBPARTITIONS ? cu.lheight() >> floorLog2(tu.lheight()) : cu.lwidth() >> floorLog2(tu.lwidth()); + if (subTuCounter == nTus - 1) + { + TransformUnit* tuPointer = cu.firstTU; + for (int tuIdx = 0; tuIdx < nTus - 1; tuIdx++) + { + rootCbfSoFar |= TU::getCbfAtDepth(*tuPointer, COMPONENT_Y, trDepth); + tuPointer = tuPointer->next; + } + if (!rootCbfSoFar) + { + lastCbfIsInferred = true; + } + } + if (!lastCbfIsInferred) + { + previousCbf = TU::getPrevTuCbfAtDepth(tu, COMPONENT_Y, trDepth); + } + } + bool cbfY = lastCbfIsInferred ? true : cbf_comp(cs, tu.Y(), trDepth, previousCbf, cu.ispMode); + TU::setCbfAtDepth(tu, COMPONENT_Y, trDepth, (cbfY ? 1 : 0)); + } + } + if (area.chromaFormat != CHROMA_400 && (!cu.ispMode || chromaCbfISP)) + { + TU::setCbfAtDepth(tu, COMPONENT_Cb, trDepth, (chromaCbfs.Cb ? 1 : 0)); + TU::setCbfAtDepth(tu, COMPONENT_Cr, trDepth, (chromaCbfs.Cr ? 1 : 0)); + } bool lumaOnly = ( cu.chromaFormat == CHROMA_400 || !tu.blocks[COMPONENT_Cb].valid() ); bool cbfLuma = ( tu.cbf[ COMPONENT_Y ] != 0 ); bool cbfChroma = ( lumaOnly ? false : ( chromaCbfs.Cb || chromaCbfs.Cr ) ); - if( cbfLuma || cbfChroma ) + if( ( cu.lwidth() > 64 || cu.lheight() > 64 || cbfLuma || cbfChroma ) && + (!tu.cu->isSepTree() || isLuma(tu.chType)) ) { if( cu.cs->pps->getUseDQP() && !cuCtx.isDQPCoded ) { - if (!CS::isDualITree(*tu.cs) || isLuma(tu.chType)) - { - cu_qp_delta(cu, cuCtx.qp, cu.qp); - cuCtx.qp = cu.qp; - cuCtx.isDQPCoded = true; - } + cu_qp_delta(cu, cuCtx.qp, cu.qp); + cuCtx.qp = cu.qp; + cuCtx.isDQPCoded = true; } - if( cu.cs->slice->getUseChromaQpAdj() && cbfChroma && !cu.transQuantBypass && !cuCtx.isChromaQpAdjCoded ) + } + if (!cu.isSepTree() || isChroma(tu.chType)) // !DUAL_TREE_LUMA + { + SizeType channelWidth = !cu.isSepTree() ? cu.lwidth() : cu.chromaSize().width; + SizeType channelHeight = !cu.isSepTree() ? cu.lheight() : cu.chromaSize().height; + + if (cu.cs->slice->getUseChromaQpAdj() && (channelWidth > 64 || channelHeight > 64 || cbfChroma) && !cuCtx.isChromaQpAdjCoded) { - cu_chroma_qp_offset( cu ); + cu_chroma_qp_offset(cu); cuCtx.isChromaQpAdjCoded = true; } + } + + if( !lumaOnly ) + { + joint_cb_cr( tu, ( tu.cbf[COMPONENT_Cb] ? 2 : 0 ) + ( tu.cbf[COMPONENT_Cr] ? 1 : 0 ) ); + } + if( cbfLuma ) { - residual_coding( tu, COMPONENT_Y ); + residual_coding( tu, COMPONENT_Y, cuCtx ); } if( !lumaOnly ) { @@ -2285,8 +2705,7 @@ void CABACReader::transform_unit( TransformUnit& tu, CUCtx& cuCtx, ChromaCbfs& c } if( tu.cbf[ compID ] ) { - residual_coding( tu, compID ); - } + residual_coding( tu, compID, cuCtx ); } } } @@ -2323,7 +2742,7 @@ void CABACReader::cu_chroma_qp_offset( CodingUnit& cu ) RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET_SIZE2( STATS__CABAC_BITS__CHROMA_QP_ADJUSTMENT, cu.blocks[cu.chType].lumaSize(), CHANNEL_TYPE_CHROMA ); // cu_chroma_qp_offset_flag - int length = cu.cs->pps->getPpsRangeExtension().getChromaQpOffsetListLen(); + int length = cu.cs->pps->getChromaQpOffsetListLen(); unsigned qpAdj = m_BinDecoder.decodeBin( Ctx::ChromaQpAdjFlag() ); if( qpAdj && length > 1 ) { @@ -2346,20 +2765,41 @@ void CABACReader::cu_chroma_qp_offset( CodingUnit& cu ) // void residual_coding_subblock( coeffCtx ) //================================================================================ -void CABACReader::residual_coding( TransformUnit& tu, ComponentID compID ) +void CABACReader::joint_cb_cr( TransformUnit& tu, const int cbfMask ) +{ + if ( !tu.cu->slice->getSPS()->getJointCbCrEnabledFlag() ) + { + return; + } + + if( ( CU::isIntra( *tu.cu ) && cbfMask ) || ( cbfMask == 3 ) ) + { + RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET_SIZE2( STATS__CABAC_BITS__JOINT_CB_CR, tu.blocks[COMPONENT_Cr].lumaSize(), CHANNEL_TYPE_CHROMA ); + tu.jointCbCr = ( m_BinDecoder.decodeBin( Ctx::JointCbCrFlag( cbfMask-1 ) ) ? cbfMask : 0 ); + } +} + +void CABACReader::residual_coding( TransformUnit& tu, ComponentID compID, CUCtx& cuCtx ) { const CodingUnit& cu = *tu.cu; DTRACE( g_trace_ctx, D_SYNTAX, "residual_coding() etype=%d pos=(%d,%d) size=%dx%d predMode=%d\n", tu.blocks[compID].compID, tu.blocks[compID].x, tu.blocks[compID].y, tu.blocks[compID].width, tu.blocks[compID].height, cu.predMode ); + if( compID == COMPONENT_Cr && tu.jointCbCr == 3 ) + return; + // parse transform skip and explicit rdpcm mode - mts_coding ( tu, compID ); + ts_flag ( tu, compID ); explicit_rdpcm_mode( tu, compID ); + if (tu.mtsIdx[compID] == MTS_SKIP) + { + residual_codingTS( tu, compID ); + return; + } -#if HEVC_USE_SIGN_HIDING // determine sign hiding - bool signHiding = ( cu.cs->slice->getSignDataHidingEnabledFlag() && !cu.transQuantBypass && tu.rdpcm[compID] == RDPCM_OFF ); - if( signHiding && CU::isIntra(cu) && CU::isRDPCMEnabled(cu) && tu.mtsIdx==1 ) + bool signHiding = ( cu.cs->picHeader->getSignDataHidingEnabledFlag() && tu.rdpcm[compID] == RDPCM_OFF ); + if( signHiding && CU::isIntra(cu) && CU::isRDPCMEnabled(cu) && tu.mtsIdx[compID] == MTS_SKIP ) { const ChannelType chType = toChannelType( compID ); const unsigned intraMode = PU::getFinalIntraMode( *cu.cs->getPU( tu.blocks[compID].pos(), chType ), chType ); @@ -2368,28 +2808,42 @@ void CABACReader::residual_coding( TransformUnit& tu, ComponentID compID ) signHiding = false; } } -#endif // init coeff coding context -#if HEVC_USE_SIGN_HIDING CoeffCodingContext cctx ( tu, compID, signHiding ); -#else - CoeffCodingContext cctx ( tu, compID ); -#endif TCoeff* coeff = tu.getCoeffs( compID ).buf; // parse last coeff position cctx.setScanPosLast( last_sig_coeff( cctx, tu, compID ) ); + if (tu.mtsIdx[compID] != MTS_SKIP && tu.blocks[compID].height >= 4 && tu.blocks[compID].width >= 4 ) + { + const int maxLfnstPos = ((tu.blocks[compID].height == 4 && tu.blocks[compID].width == 4) || (tu.blocks[compID].height == 8 && tu.blocks[compID].width == 8)) ? 7 : 15; + cuCtx.violatesLfnstConstrained[ toChannelType(compID) ] |= cctx.scanPosLast() > maxLfnstPos; + } + if( tu.mtsIdx[compID] != MTS_SKIP && tu.blocks[compID].height >= 4 && tu.blocks[compID].width >= 4 ) + { + const int lfnstLastScanPosTh = isLuma( compID ) ? LFNST_LAST_SIG_LUMA : LFNST_LAST_SIG_CHROMA; + cuCtx.lfnstLastScanPos |= cctx.scanPosLast() >= lfnstLastScanPosTh; + } +#if !JVET_Q0055_MTS_SIGNALLING + if( isLuma(compID) && ( cctx.posX(cctx.scanPosLast()) >= 16 || cctx.posY(cctx.scanPosLast()) >= 16 ) ) + { + cuCtx.violatesMtsCoeffConstraint = true; + } +#endif // parse subblocks - const int stateTransTab = ( tu.cs->slice->getDepQuantEnabledFlag() ? 32040 : 0 ); + const int stateTransTab = ( tu.cs->picHeader->getDepQuantEnabledFlag() ? 32040 : 0 ); int state = 0; + int ctxBinSampleRatio = (compID == COMPONENT_Y) ? MAX_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT_LUMA : MAX_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT_CHROMA; + cctx.regBinLimit = (tu.getTbAreaAfterCoefZeroOut(compID) * ctxBinSampleRatio) >> 4; for( int subSetId = ( cctx.scanPosLast() >> cctx.log2CGSize() ); subSetId >= 0; subSetId--) { cctx.initSubblock ( subSetId ); - if( ( tu.mtsIdx > 1 || ( tu.cu->sbtInfo != 0 && tu.blocks[ compID ].height <= 32 && tu.blocks[ compID ].width <= 32 ) ) && !tu.cu->transQuantBypass && compID == COMPONENT_Y ) + + if( tu.cs->sps->getUseMTS() && tu.cu->sbtInfo != 0 && tu.blocks[ compID ].height <= 32 && tu.blocks[ compID ].width <= 32 && compID == COMPONENT_Y ) { if( ( tu.blocks[ compID ].height == 32 && cctx.cgPosY() >= ( 16 >> cctx.log2CGHeight() ) ) || ( tu.blocks[ compID ].width == 32 && cctx.cgPosX() >= ( 16 >> cctx.log2CGWidth() ) ) ) { @@ -2397,91 +2851,83 @@ void CABACReader::residual_coding( TransformUnit& tu, ComponentID compID ) } } residual_coding_subblock( cctx, coeff, stateTransTab, state ); + +#if JVET_Q0055_MTS_SIGNALLING + if ( isLuma(compID) && cctx.isSigGroup() && ( cctx.cgPosY() > 3 || cctx.cgPosX() > 3 ) ) + { + cuCtx.violatesMtsCoeffConstraint = true; + } +#endif } } -void CABACReader::mts_coding( TransformUnit& tu, ComponentID compID ) +void CABACReader::ts_flag( TransformUnit& tu, ComponentID compID ) { - const CodingUnit &cu = *tu.cu; - const bool tsAllowed = TU::isTSAllowed ( tu, compID ); - const bool mtsAllowed = TU::isMTSAllowed( tu, compID ); - - if( !mtsAllowed && !tsAllowed ) return; + int tsFlag = ( (tu.cu->bdpcmMode && isLuma(compID)) || (tu.cu->bdpcmModeChroma && isChroma(compID)) ) ? 1 : tu.mtsIdx[compID] == MTS_SKIP ? 1 : 0; + int ctxIdx = isLuma(compID) ? 0 : 1; - RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET_SIZE2( STATS__CABAC_BITS__MTS_FLAGS, tu.blocks[compID], compID ); - - int symbol = 0; - int ctxIdx = 0; - - if( tsAllowed ) + if( TU::isTSAllowed ( tu, compID ) ) { - ctxIdx = 6; - symbol = m_BinDecoder.decodeBin( Ctx::MTSIndex( ctxIdx ) ); - tu.mtsIdx = 1-symbol; // 1 = TS + RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET_SIZE2( STATS__CABAC_BITS__MTS_FLAGS, tu.blocks[compID], compID ); + tsFlag = m_BinDecoder.decodeBin( Ctx::TransformSkipFlag( ctxIdx ) ); } + + tu.mtsIdx[compID] = tsFlag ? MTS_SKIP : MTS_DCT2_DCT2; + + DTRACE(g_trace_ctx, D_SYNTAX, "ts_flag() etype=%d pos=(%d,%d) mtsIdx=%d\n", COMPONENT_Y, tu.cu->lx(), tu.cu->ly(), tsFlag); +} - if( tu.mtsIdx != 1 ) - { - if( mtsAllowed ) +void CABACReader::mts_idx( CodingUnit& cu, CUCtx& cuCtx ) +{ + TransformUnit &tu = *cu.firstTU; + int mtsIdx = tu.mtsIdx[COMPONENT_Y]; // Transform skip flag has already been decoded + + if( CU::isMTSAllowed( cu, COMPONENT_Y ) && !cuCtx.violatesMtsCoeffConstraint && + cu.lfnstIdx == 0 && mtsIdx != MTS_SKIP && TU::getCbf(tu, COMPONENT_Y) ) + { + RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET_SIZE2( STATS__CABAC_BITS__MTS_FLAGS, tu.blocks[COMPONENT_Y], COMPONENT_Y ); + int ctxIdx = 0; + int symbol = m_BinDecoder.decodeBin( Ctx::MTSIdx(ctxIdx)); + + if( symbol ) { - ctxIdx = std::min( (int)cu.qtDepth, 5 ); - symbol = m_BinDecoder.decodeBin( Ctx::MTSIndex( ctxIdx ) ); - - if( symbol ) + ctxIdx = 1; + mtsIdx = MTS_DST7_DST7; // mtsIdx = 2 -- 4 + for( int i = 0; i < 3; i++, ctxIdx++ ) { - ctxIdx = 7; - tu.mtsIdx = 2; // mtsIdx = 2 -- 4 - for( int i = 0; i < 3; i++, ctxIdx++ ) + symbol = m_BinDecoder.decodeBin( Ctx::MTSIdx(ctxIdx)); + mtsIdx += symbol; + + if( !symbol ) { - symbol = m_BinDecoder.decodeBin( Ctx::MTSIndex( ctxIdx ) ); - tu.mtsIdx += symbol; - - if( !symbol ) - { - break; - } + break; } } } } - DTRACE( g_trace_ctx, D_SYNTAX, "mts_coding() etype=%d pos=(%d,%d) mtsIdx=%d\n", COMPONENT_Y, cu.lx(), cu.ly(), tu.mtsIdx ); + + tu.mtsIdx[COMPONENT_Y] = mtsIdx; + + DTRACE(g_trace_ctx, D_SYNTAX, "mts_idx() etype=%d pos=(%d,%d) mtsIdx=%d\n", COMPONENT_Y, tu.cu->lx(), tu.cu->ly(), mtsIdx); } - + void CABACReader::isp_mode( CodingUnit& cu ) { - if( !CU::isIntra( cu ) || !isLuma( cu.chType ) || cu.firstPU->multiRefIdx || cu.ipcm ) + if( !CU::isIntra( cu ) || !isLuma( cu.chType ) || cu.firstPU->multiRefIdx || !cu.cs->sps->getUseISP() || cu.bdpcmMode || !CU::canUseISP( cu, getFirstComponentOfChannel( cu.chType ) ) || cu.colorTransform ) { cu.ispMode = NOT_INTRA_SUBPARTITIONS; return; } - const ISPType allowedSplits = CU::canUseISPSplit( cu, getFirstComponentOfChannel( cu.chType ) ); - if( allowedSplits == NOT_INTRA_SUBPARTITIONS ) - { - cu.ispMode = NOT_INTRA_SUBPARTITIONS; - return; - } + RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET(STATS__CABAC_BITS__ISP_MODE_FLAG); - RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET( STATS__CABAC_BITS__ISP_MODE_FLAG ); - cu.ispMode = NOT_INTRA_SUBPARTITIONS; - int symbol = m_BinDecoder.decodeBin( Ctx::ISPMode( 0 ) ); + int symbol = m_BinDecoder.decodeBin(Ctx::ISPMode(0)); if( symbol ) { - if( allowedSplits == HOR_INTRA_SUBPARTITIONS ) - { - cu.ispMode = HOR_INTRA_SUBPARTITIONS; - } - else if( allowedSplits == VER_INTRA_SUBPARTITIONS ) - { - cu.ispMode = VER_INTRA_SUBPARTITIONS; - } - else - { - RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET( STATS__CABAC_BITS__ISP_SPLIT_FLAG ); - cu.ispMode = 1 + m_BinDecoder.decodeBin( Ctx::ISPMode( 1 ) ); - } + RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET( STATS__CABAC_BITS__ISP_SPLIT_FLAG ); + cu.ispMode = 1 + m_BinDecoder.decodeBin( Ctx::ISPMode( 1 ) ); } DTRACE( g_trace_ctx, D_SYNTAX, "intra_subPartitions() etype=%d pos=(%d,%d) ispIdx=%d\n", cu.chType, cu.blocks[cu.chType].x, cu.blocks[cu.chType].y, (int)cu.ispMode ); } @@ -2492,7 +2938,7 @@ void CABACReader::explicit_rdpcm_mode( TransformUnit& tu, ComponentID compID ) tu.rdpcm[compID] = RDPCM_OFF; - if( !CU::isIntra(cu) && CU::isRDPCMEnabled(cu) && ( tu.mtsIdx==1 || cu.transQuantBypass ) ) + if (!CU::isIntra(cu) && CU::isRDPCMEnabled(cu) && ( tu.mtsIdx[compID] == MTS_SKIP)) { RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET_SIZE( STATS__EXPLICIT_RDPCM_BITS, tu.blocks[tu.chType].lumaSize() ); @@ -2511,6 +2957,51 @@ void CABACReader::explicit_rdpcm_mode( TransformUnit& tu, ComponentID compID ) } } +void CABACReader::residual_lfnst_mode( CodingUnit& cu, CUCtx& cuCtx ) +{ + int chIdx = cu.isSepTree() && cu.chType == CHANNEL_TYPE_CHROMA ? 1 : 0; + if ( (cu.ispMode && !CU::canUseLfnstWithISP( cu, cu.chType ) ) || + (cu.cs->sps->getUseLFNST() && CU::isIntra(cu) && cu.mipFlag && !allowLfnstWithMip(cu.firstPU->lumaSize())) || + ( cu.isSepTree() && cu.chType == CHANNEL_TYPE_CHROMA && std::min( cu.blocks[ 1 ].width, cu.blocks[ 1 ].height ) < 4 ) + || ( cu.blocks[ chIdx ].lumaSize().width > cu.cs->sps->getMaxTbSize() || cu.blocks[ chIdx ].lumaSize().height > cu.cs->sps->getMaxTbSize() ) + ) + { + return; + } + + RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET( STATS__CABAC_BITS__LFNST ); + + if( cu.cs->sps->getUseLFNST() && CU::isIntra( cu ) ) + { + const bool lumaFlag = cu.isSepTree() ? ( isLuma( cu.chType ) ? true : false ) : true; + const bool chromaFlag = cu.isSepTree() ? ( isChroma( cu.chType ) ? true : false ) : true; + bool nonZeroCoeffNonTsCorner8x8 = ( lumaFlag && cuCtx.violatesLfnstConstrained[CHANNEL_TYPE_LUMA] ) || (chromaFlag && cuCtx.violatesLfnstConstrained[CHANNEL_TYPE_CHROMA] ); + const bool isTrSkip = TU::getCbf(*cu.firstTU, COMPONENT_Y) && cu.firstTU->mtsIdx[COMPONENT_Y] == MTS_SKIP; + if ((!cuCtx.lfnstLastScanPos && !cu.ispMode) || nonZeroCoeffNonTsCorner8x8 || isTrSkip) + { + cu.lfnstIdx = 0; + return; + } + } + else + { + cu.lfnstIdx = 0; + return; + } + + + unsigned cctx = 0; + if ( cu.isSepTree() ) cctx++; + + uint32_t idxLFNST = m_BinDecoder.decodeBin( Ctx::LFNSTIdx( cctx ) ); + if( idxLFNST ) + { + idxLFNST += m_BinDecoder.decodeBin(Ctx::LFNSTIdx(2)); + } + cu.lfnstIdx = idxLFNST; + + DTRACE( g_trace_ctx, D_SYNTAX, "residual_lfnst_mode() etype=%d pos=(%d,%d) mode=%d\n", COMPONENT_Y, cu.lx(), cu.ly(), ( int ) cu.lfnstIdx ); +} int CABACReader::last_sig_coeff( CoeffCodingContext& cctx, TransformUnit& tu, ComponentID compID ) { @@ -2520,7 +3011,7 @@ int CABACReader::last_sig_coeff( CoeffCodingContext& cctx, TransformUnit& tu, Co unsigned maxLastPosX = cctx.maxLastPosX(); unsigned maxLastPosY = cctx.maxLastPosY(); - if( ( tu.mtsIdx > 1 || ( tu.cu->sbtInfo != 0 && tu.blocks[ compID ].width <= 32 && tu.blocks[ compID ].height <= 32 ) ) && !tu.cu->transQuantBypass && compID == COMPONENT_Y ) + if( tu.cs->sps->getUseMTS() && tu.cu->sbtInfo != 0 && tu.blocks[ compID ].width <= 32 && tu.blocks[ compID ].height <= 32 && compID == COMPONENT_Y ) { maxLastPosX = ( tu.blocks[ compID ].width == 32 ) ? g_uiGroupIdx[ 15 ] : maxLastPosX; maxLastPosY = ( tu.blocks[ compID ].height == 32 ) ? g_uiGroupIdx[ 15 ] : maxLastPosY; @@ -2562,13 +3053,6 @@ int CABACReader::last_sig_coeff( CoeffCodingContext& cctx, TransformUnit& tu, Co } int blkPos; -#if HEVC_USE_MDCS - if( cctx.scanType() == SCAN_VER ) - { - blkPos = PosLastY + ( PosLastX * cctx.width() ); - } - else -#endif { blkPos = PosLastX + ( PosLastY * cctx.width() ); } @@ -2624,13 +3108,10 @@ void CABACReader::residual_coding_subblock( CoeffCodingContext& cctx, TCoeff* co //===== decode absolute values ===== const int inferSigPos = nextSigPos != cctx.scanPosLast() ? ( cctx.isNotFirst() ? minSubPos : -1 ) : nextSigPos; -#if HEVC_USE_SIGN_HIDING int firstNZPos = nextSigPos; int lastNZPos = -1; -#endif int numNonZero = 0; - bool is2x2subblock = ( cctx.log2CGSize() == 2 ); - int remRegBins = ( is2x2subblock ? MAX_NUM_REG_BINS_2x2SUBBLOCK : MAX_NUM_REG_BINS_4x4SUBBLOCK ); + int remRegBins = cctx.regBinLimit; int firstPosMode2 = minSubPos - 1; int sigBlkPos[ 1 << MLS_CG_SIZE ]; @@ -2646,16 +3127,18 @@ void CABACReader::residual_coding_subblock( CoeffCodingContext& cctx, TCoeff* co DTRACE( g_trace_ctx, D_SYNTAX_RESI, "sig_bin() bin=%d ctx=%d\n", sigFlag, sigCtxId ); remRegBins--; } + else if( nextSigPos != cctx.scanPosLast() ) + { + cctx.sigCtxIdAbs( nextSigPos, coeff, state ); // required for setting variables that are needed for gtx/par context selection + } if( sigFlag ) { uint8_t& ctxOff = ctxOffset[ nextSigPos - minSubPos ]; ctxOff = cctx.ctxOffsetAbs(); sigBlkPos[ numNonZero++ ] = blkPos; -#if HEVC_USE_SIGN_HIDING firstNZPos = nextSigPos; lastNZPos = std::max<int>( lastNZPos, nextSigPos ); -#endif RExt__DECODER_DEBUG_BIT_STATISTICS_SET( ctype_gt1 ); unsigned gt1Flag = m_BinDecoder.decodeBin( cctx.greater1CtxIdAbs(ctxOff) ); @@ -2682,33 +3165,33 @@ void CABACReader::residual_coding_subblock( CoeffCodingContext& cctx, TCoeff* co state = ( stateTransTable >> ((state<<2)+((coeff[blkPos]&1)<<1)) ) & 3; } firstPosMode2 = nextSigPos; + cctx.regBinLimit = remRegBins; //===== 2nd PASS: Go-rice codes ===== unsigned ricePar = 0; for( int scanPos = firstSigPos; scanPos > firstPosMode2; scanPos-- ) { + int sumAll = cctx.templateAbsSum(scanPos, coeff, 4); + ricePar = g_auiGoRiceParsCoeff[sumAll]; TCoeff& tcoeff = coeff[ cctx.blockPos( scanPos ) ]; if( tcoeff >= 4 ) { RExt__DECODER_DEBUG_BIT_STATISTICS_SET( ctype_escs ); - int rem = m_BinDecoder.decodeRemAbsEP( ricePar, cctx.extPrec(), cctx.maxLog2TrDRange() ); + int rem = m_BinDecoder.decodeRemAbsEP( ricePar, COEF_REMAIN_BIN_REDUCTION, cctx.maxLog2TrDRange() ); DTRACE( g_trace_ctx, D_SYNTAX_RESI, "rem_val() bin=%d ctx=%d\n", rem, ricePar ); tcoeff += (rem<<1); - if( ricePar < 3 && rem > (3<<ricePar)-1 ) - { - ricePar++; - } } } //===== coeff bypass ==== for( int scanPos = firstPosMode2; scanPos >= minSubPos; scanPos-- ) { - int sumAll = cctx.templateAbsSum(scanPos, coeff); + int sumAll = cctx.templateAbsSum(scanPos, coeff, 0); int rice = g_auiGoRiceParsCoeff [sumAll]; - int pos0 = g_auiGoRicePosCoeff0[std::max(0, state - 1)][sumAll]; - int rem = m_BinDecoder.decodeRemAbsEP( rice, cctx.extPrec(), cctx.maxLog2TrDRange() ); + int pos0 = g_auiGoRicePosCoeff0(state, rice); + RExt__DECODER_DEBUG_BIT_STATISTICS_SET(ctype_escs); + int rem = m_BinDecoder.decodeRemAbsEP( rice, COEF_REMAIN_BIN_REDUCTION, cctx.maxLog2TrDRange() ); DTRACE( g_trace_ctx, D_SYNTAX_RESI, "rem_val() bin=%d ctx=%d\n", rem, rice ); TCoeff tcoeff = ( rem == pos0 ? 0 : rem < pos0 ? rem+1 : rem ); state = ( stateTransTable >> ((state<<2)+((tcoeff&1)<<1)) ) & 3; @@ -2716,36 +3199,26 @@ void CABACReader::residual_coding_subblock( CoeffCodingContext& cctx, TCoeff* co { int blkPos = cctx.blockPos( scanPos ); sigBlkPos[ numNonZero++ ] = blkPos; -#if HEVC_USE_SIGN_HIDING + firstNZPos = scanPos; lastNZPos = std::max<int>( lastNZPos, scanPos ); -#endif coeff[blkPos] = tcoeff; } } //===== decode sign's ===== RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET_SIZE2( STATS__CABAC_BITS__SIGN_BIT, Size( cctx.width(), cctx.height() ), cctx.compID() ); -#if HEVC_USE_SIGN_HIDING const unsigned numSigns = ( cctx.hideSign( firstNZPos, lastNZPos ) ? numNonZero - 1 : numNonZero ); unsigned signPattern = m_BinDecoder.decodeBinsEP( numSigns ) << ( 32 - numSigns ); -#else - unsigned signPattern = m_BinDecoder.decodeBinsEP( numNonZero ) << ( 32 - numNonZero ); -#endif //===== set final coefficents ===== int sumAbs = 0; -#if HEVC_USE_SIGN_HIDING for( unsigned k = 0; k < numSigns; k++ ) -#else - for( unsigned k = 0; k < numNonZero; k++ ) -#endif { int AbsCoeff = coeff[ sigBlkPos[ k ] ]; sumAbs += AbsCoeff; coeff[ sigBlkPos[k] ] = ( signPattern & ( 1u << 31 ) ? -AbsCoeff : AbsCoeff ); signPattern <<= 1; } -#if HEVC_USE_SIGN_HIDING if( numNonZero > numSigns ) { int k = numSigns; @@ -2753,9 +3226,199 @@ void CABACReader::residual_coding_subblock( CoeffCodingContext& cctx, TCoeff* co sumAbs += AbsCoeff; coeff[ sigBlkPos[k] ] = ( sumAbs & 1 ? -AbsCoeff : AbsCoeff ); } +} + +void CABACReader::residual_codingTS( TransformUnit& tu, ComponentID compID ) +{ + DTRACE( g_trace_ctx, D_SYNTAX, "residual_codingTS() etype=%d pos=(%d,%d) size=%dx%d\n", tu.blocks[compID].compID, tu.blocks[compID].x, tu.blocks[compID].y, tu.blocks[compID].width, tu.blocks[compID].height ); + + // init coeff coding context + CoeffCodingContext cctx ( tu, compID, false, isLuma(compID) ? tu.cu->bdpcmMode : tu.cu->bdpcmModeChroma); + TCoeff* coeff = tu.getCoeffs( compID ).buf; + int maxCtxBins = (cctx.maxNumCoeff() * 7) >> 2; + cctx.setNumCtxBins(maxCtxBins); + + for( int subSetId = 0; subSetId <= ( cctx.maxNumCoeff() - 1 ) >> cctx.log2CGSize(); subSetId++ ) + { + cctx.initSubblock ( subSetId ); + residual_coding_subblockTS( cctx, coeff ); + } +} + +void CABACReader::residual_coding_subblockTS( CoeffCodingContext& cctx, TCoeff* coeff ) +{ + // NOTE: All coefficients of the subblock must be set to zero before calling this function +#if RExt__DECODER_DEBUG_BIT_STATISTICS + CodingStatisticsClassType ctype_group ( STATS__CABAC_BITS__SIG_COEFF_GROUP_FLAG, cctx.width(), cctx.height(), cctx.compID() ); +#if TR_ONLY_COEFF_STATS + CodingStatisticsClassType ctype_map ( STATS__CABAC_BITS__SIG_COEFF_MAP_FLAG_TS, cctx.width(), cctx.height(), cctx.compID() ); + CodingStatisticsClassType ctype_par ( STATS__CABAC_BITS__PAR_FLAG_TS, cctx.width(), cctx.height(), cctx.compID() ); + CodingStatisticsClassType ctype_gt1 ( STATS__CABAC_BITS__GT1_FLAG_TS, cctx.width(), cctx.height(), cctx.compID() ); + CodingStatisticsClassType ctype_gt2 ( STATS__CABAC_BITS__GT2_FLAG_TS, cctx.width(), cctx.height(), cctx.compID() ); + CodingStatisticsClassType ctype_escs ( STATS__CABAC_BITS__ESCAPE_BITS_TS, cctx.width(), cctx.height(), cctx.compID() ); +#else + CodingStatisticsClassType ctype_map ( STATS__CABAC_BITS__SIG_COEFF_MAP_FLAG, cctx.width(), cctx.height(), cctx.compID() ); + CodingStatisticsClassType ctype_par ( STATS__CABAC_BITS__PAR_FLAG, cctx.width(), cctx.height(), cctx.compID() ); + CodingStatisticsClassType ctype_gt1 ( STATS__CABAC_BITS__GT1_FLAG, cctx.width(), cctx.height(), cctx.compID() ); + CodingStatisticsClassType ctype_gt2 ( STATS__CABAC_BITS__GT2_FLAG, cctx.width(), cctx.height(), cctx.compID() ); + CodingStatisticsClassType ctype_escs ( STATS__CABAC_BITS__ESCAPE_BITS, cctx.width(), cctx.height(), cctx.compID() ); +#endif + +#endif + + //===== init ===== + const int minSubPos = cctx.maxSubPos(); + int firstSigPos = cctx.minSubPos(); + int nextSigPos = firstSigPos; + unsigned signPattern = 0; + + //===== decode significant_coeffgroup_flag ===== + RExt__DECODER_DEBUG_BIT_STATISTICS_SET( ctype_group ); + bool sigGroup = cctx.isLastSubSet() && cctx.noneSigGroup(); + if( !sigGroup ) + { + sigGroup = m_BinDecoder.decodeBin( cctx.sigGroupCtxId( true ) ); + DTRACE( g_trace_ctx, D_SYNTAX_RESI, "ts_sigGroup() bin=%d ctx=%d\n", sigGroup, cctx.sigGroupCtxId() ); + } + if( sigGroup ) + { + cctx.setSigGroup(); + } + else + { + return; + } + + //===== decode absolute values ===== + const int inferSigPos = minSubPos; + int numNonZero = 0; + int sigBlkPos[ 1 << MLS_CG_SIZE ]; + + int lastScanPosPass1 = -1; + int lastScanPosPass2 = -1; + for (; nextSigPos <= minSubPos && cctx.numCtxBins() >= 4; nextSigPos++) + { + int blkPos = cctx.blockPos( nextSigPos ); + unsigned sigFlag = ( !numNonZero && nextSigPos == inferSigPos ); + if( !sigFlag ) + { + RExt__DECODER_DEBUG_BIT_STATISTICS_SET( ctype_map ); + const unsigned sigCtxId = cctx.sigCtxIdAbsTS( nextSigPos, coeff ); + sigFlag = m_BinDecoder.decodeBin( sigCtxId ); + DTRACE( g_trace_ctx, D_SYNTAX_RESI, "ts_sig_bin() bin=%d ctx=%d\n", sigFlag, sigCtxId ); + cctx.decimateNumCtxBins(1); + } + + if( sigFlag ) + { + //===== decode sign's ===== +#if TR_ONLY_COEFF_STATS + RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET_SIZE2(STATS__CABAC_BITS__SIGN_BIT_TS, Size(cctx.width(), cctx.height()), cctx.compID()); +#else + RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET_SIZE2( STATS__CABAC_BITS__SIGN_BIT, Size( cctx.width(), cctx.height() ), cctx.compID() ); #endif + int sign; + const unsigned signCtxId = cctx.signCtxIdAbsTS(nextSigPos, coeff, cctx.bdpcm()); + sign = m_BinDecoder.decodeBin(signCtxId); + cctx.decimateNumCtxBins(1); + + signPattern += ( sign << numNonZero ); + + sigBlkPos[numNonZero++] = blkPos; + + RExt__DECODER_DEBUG_BIT_STATISTICS_SET( ctype_gt1 ); + unsigned gt1Flag; + const unsigned gt1CtxId = cctx.lrg1CtxIdAbsTS(nextSigPos, coeff, cctx.bdpcm()); + gt1Flag = m_BinDecoder.decodeBin(gt1CtxId); + DTRACE( g_trace_ctx, D_SYNTAX_RESI, "ts_gt1_flag() bin=%d ctx=%d\n", gt1Flag, gt1CtxId ); + cctx.decimateNumCtxBins(1); + + unsigned parFlag = 0; + if( gt1Flag ) + { + RExt__DECODER_DEBUG_BIT_STATISTICS_SET( ctype_par ); + parFlag = m_BinDecoder.decodeBin( cctx.parityCtxIdAbsTS() ); + DTRACE( g_trace_ctx, D_SYNTAX_RESI, "ts_par_flag() bin=%d ctx=%d\n", parFlag, cctx.parityCtxIdAbsTS() ); + cctx.decimateNumCtxBins(1); + } + coeff[ blkPos ] = (sign ? -1 : 1 ) * (1 + parFlag + gt1Flag); + } + lastScanPosPass1 = nextSigPos; + } + + int cutoffVal = 2; + const int numGtBins = 4; + + //===== 2nd PASS: gt2 ===== + for (int scanPos = firstSigPos; scanPos <= minSubPos && cctx.numCtxBins() >= 4; scanPos++) + { + TCoeff& tcoeff = coeff[cctx.blockPos(scanPos)]; + cutoffVal = 2; + for (int i = 0; i < numGtBins; i++) + { + if( tcoeff < 0) + { + tcoeff = -tcoeff; + } + if (tcoeff >= cutoffVal) + { + RExt__DECODER_DEBUG_BIT_STATISTICS_SET(ctype_gt2); + unsigned gt2Flag; + gt2Flag = m_BinDecoder.decodeBin(cctx.greaterXCtxIdAbsTS(cutoffVal >> 1)); + tcoeff += (gt2Flag << 1); + DTRACE(g_trace_ctx, D_SYNTAX_RESI, "ts_gt%d_flag() bin=%d ctx=%d sp=%d coeff=%d\n", i, gt2Flag, cctx.greaterXCtxIdAbsTS(cutoffVal >> 1), scanPos, tcoeff); + cctx.decimateNumCtxBins(1); + } + cutoffVal += 2; + } + lastScanPosPass2 = scanPos; + } + //===== 3rd PASS: Go-rice codes ===== + for( int scanPos = firstSigPos; scanPos <= minSubPos; scanPos++ ) + { + TCoeff& tcoeff = coeff[ cctx.blockPos( scanPos ) ]; + RExt__DECODER_DEBUG_BIT_STATISTICS_SET( ctype_escs ); + + cutoffVal = (scanPos <= lastScanPosPass2 ? 10 : (scanPos <= lastScanPosPass1 ? 2 : 0)); + if (tcoeff < 0) + { + tcoeff = -tcoeff; + } + if( tcoeff >= cutoffVal ) + { + int rice = cctx.templateAbsSumTS( scanPos, coeff ); + int rem = m_BinDecoder.decodeRemAbsEP( rice, COEF_REMAIN_BIN_REDUCTION, cctx.maxLog2TrDRange() ); + DTRACE( g_trace_ctx, D_SYNTAX_RESI, "ts_rem_val() bin=%d ctx=%d sp=%d\n", rem, rice, scanPos ); + tcoeff += (scanPos <= lastScanPosPass1) ? (rem << 1) : rem; + if (tcoeff && scanPos > lastScanPosPass1) + { + int blkPos = cctx.blockPos(scanPos); + int sign = m_BinDecoder.decodeBinEP(); + signPattern += (sign << numNonZero); + sigBlkPos[numNonZero++] = blkPos; + } + } + if (!cctx.bdpcm() && cutoffVal) + { + if (tcoeff > 0) + { + int rightPixel, belowPixel; + cctx.neighTS(rightPixel, belowPixel, scanPos, coeff); + tcoeff = cctx.decDeriveModCoeff(rightPixel, belowPixel, tcoeff); + } + } + } + + //===== set final coefficents ===== + for( unsigned k = 0; k < numNonZero; k++ ) + { + int AbsCoeff = coeff[ sigBlkPos[ k ] ]; + coeff[ sigBlkPos[k] ] = ( signPattern & 1 ? -AbsCoeff : AbsCoeff ); + signPattern >>= 1; + } } + //================================================================================ // clause 7.3.8.12 //-------------------------------------------------------------------------------- @@ -2839,3 +3502,64 @@ unsigned CABACReader::exp_golomb_eqprob( unsigned count ) return symbol; } +unsigned CABACReader::code_unary_fixed( unsigned ctxId, unsigned unary_max, unsigned fixed ) +{ + unsigned idx; + bool unary = m_BinDecoder.decodeBin( ctxId ); + if( unary ) + { + idx = unary_max_eqprob( unary_max ); + } + else + { + idx = unary_max + 1 + m_BinDecoder.decodeBinsEP( fixed ); + } + return idx; +} + +void CABACReader::mip_flag( CodingUnit& cu ) +{ + RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET( STATS__CABAC_BITS__OTHER ); + + if( !cu.Y().valid() ) + { + return; + } + if( !cu.cs->sps->getUseMIP() ) + { + cu.mipFlag = false; + return; + } + + + unsigned ctxId = DeriveCtx::CtxMipFlag( cu ); + cu.mipFlag = m_BinDecoder.decodeBin( Ctx::MipFlag( ctxId ) ); + DTRACE( g_trace_ctx, D_SYNTAX, "mip_flag() pos=(%d,%d) mode=%d\n", cu.lumaPos().x, cu.lumaPos().y, cu.mipFlag ? 1 : 0 ); +} + +void CABACReader::mip_pred_modes( CodingUnit &cu ) +{ + RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET( STATS__CABAC_BITS__OTHER ); + + if( !cu.Y().valid() ) + { + return; + } + for( auto &pu : CU::traversePUs( cu ) ) + { + mip_pred_mode( pu ); + } +} + +void CABACReader::mip_pred_mode( PredictionUnit &pu ) +{ + pu.mipTransposedFlag = bool(m_BinDecoder.decodeBinEP()); + + uint32_t mipMode; + const int numModes = getNumModesMip( pu.Y() ); + xReadTruncBinCode( mipMode, numModes ); + pu.intraDir[CHANNEL_TYPE_LUMA] = mipMode; + CHECKD( pu.intraDir[CHANNEL_TYPE_LUMA] < 0 || pu.intraDir[CHANNEL_TYPE_LUMA] >= numModes, "Invalid MIP mode" ); + + DTRACE( g_trace_ctx, D_SYNTAX, "mip_pred_mode() pos=(%d,%d) mode=%d transposed=%d\n", pu.lumaPos().x, pu.lumaPos().y, pu.intraDir[CHANNEL_TYPE_LUMA], pu.mipTransposedFlag ? 1 : 0 ); +} diff --git a/source/Lib/DecoderLib/CABACReader.h b/source/Lib/DecoderLib/CABACReader.h index 34559ae1cef1c92edd6119465c3f0ae0d20c8ffe..132c50232a229395cbcde5d257acb71b7f99eec0 100644 --- a/source/Lib/DecoderLib/CABACReader.h +++ b/source/Lib/DecoderLib/CABACReader.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * -* Copyright (c) 2010-2019, ITU/ISO/IEC +* Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -48,7 +48,7 @@ class CABACReader { public: - CABACReader(BinDecoderBase& binDecoder) : shareStateDec(0), m_BinDecoder(binDecoder), m_Bitstream(0) {} + CABACReader(BinDecoderBase& binDecoder) : m_BinDecoder(binDecoder), m_Bitstream(0) {} virtual ~CABACReader() {} public: @@ -63,23 +63,25 @@ public: void remaining_bytes ( bool noTrailingBytesExpected ); // coding tree unit (clause 7.3.8.2) - bool coding_tree_unit ( CodingStructure& cs, const UnitArea& area, int (&qps)[2], unsigned ctuRsAddr ); + void coding_tree_unit ( CodingStructure& cs, const UnitArea& area, int (&qps)[2], unsigned ctuRsAddr ); // sao (clause 7.3.8.3) void sao ( CodingStructure& cs, unsigned ctuRsAddr ); + void readAlfCtuFilterIndex(CodingStructure& cs, unsigned ctuRsAddr); + // coding (quad)tree (clause 7.3.8.4) - bool coding_tree ( CodingStructure& cs, Partitioner& pm, CUCtx& cuCtx, Partitioner* pPartitionerChroma = nullptr, CUCtx* pCuCtxChroma = nullptr); + void coding_tree ( CodingStructure& cs, Partitioner& pm, CUCtx& cuCtx, Partitioner* pPartitionerChroma = nullptr, CUCtx* pCuCtxChroma = nullptr); PartSplit split_cu_mode ( CodingStructure& cs, Partitioner& pm ); + ModeType mode_constraint ( CodingStructure& cs, Partitioner& pm, const PartSplit splitMode ); // coding unit (clause 7.3.8.5) - bool coding_unit ( CodingUnit& cu, Partitioner& pm, CUCtx& cuCtx ); - void cu_transquant_bypass_flag ( CodingUnit& cu ); + void coding_unit ( CodingUnit& cu, Partitioner& pm, CUCtx& cuCtx ); void cu_skip_flag ( CodingUnit& cu ); void pred_mode ( CodingUnit& cu ); - void pcm_flag ( CodingUnit& cu, Partitioner& pm ); + void bdpcm_mode ( CodingUnit& cu, const ComponentID compID ); void cu_pred_data ( CodingUnit& cu ); - void cu_gbi_flag ( CodingUnit& cu ); + void cu_bcw_flag ( CodingUnit& cu ); void extend_ref_line (CodingUnit& cu); void intra_luma_pred_modes ( CodingUnit& cu ); void intra_chroma_pred_modes ( CodingUnit& cu ); @@ -87,9 +89,14 @@ public: void intra_chroma_pred_mode ( PredictionUnit& pu ); void cu_residual ( CodingUnit& cu, Partitioner& pm, CUCtx& cuCtx ); void rqt_root_cbf ( CodingUnit& cu ); + void adaptive_color_transform(CodingUnit& cu); void sbt_mode ( CodingUnit& cu ); - bool end_of_ctu ( CodingUnit& cu, CUCtx& cuCtx ); - + void end_of_ctu ( CodingUnit& cu, CUCtx& cuCtx ); + void mip_flag ( CodingUnit& cu ); + void mip_pred_modes ( CodingUnit& cu ); + void mip_pred_mode ( PredictionUnit& pu ); + void cu_palette_info ( CodingUnit& cu, ComponentID compBegin, uint32_t numComp, CUCtx& cuCtx ); + void cuPaletteSubblockInfo ( CodingUnit& cu, ComponentID compBegin, uint32_t numComp, int subSetId, uint32_t& prevRunPos, unsigned& prevRunType ); // prediction unit (clause 7.3.8.6) void prediction_unit ( PredictionUnit& pu, MergeCtx& mrgCtx ); void merge_flag ( PredictionUnit& pu ); @@ -103,33 +110,34 @@ public: void inter_pred_idc ( PredictionUnit& pu ); void ref_idx ( PredictionUnit& pu, RefPicList eRefList ); void mvp_flag ( PredictionUnit& pu, RefPicList eRefList ); - void MHIntra_flag ( PredictionUnit& pu ); - void MHIntra_luma_pred_modes ( CodingUnit& cu ); - void triangle_mode ( CodingUnit& cu ); + void Ciip_flag ( PredictionUnit& pu ); void smvd_mode ( PredictionUnit& pu ); - // pcm samples (clause 7.3.8.7) - void pcm_samples ( TransformUnit& tu ); // transform tree (clause 7.3.8.8) - void transform_tree ( CodingStructure& cs, Partitioner& pm, CUCtx& cuCtx, ChromaCbfs& chromaCbfs, const PartSplit ispType = TU_NO_ISP, const int subTuIdx = -1 ); - bool cbf_comp ( CodingStructure& cs, const CompArea& area, unsigned depth, const bool prevCbCbf = false, const bool useISP = false ); + void transform_tree ( CodingStructure& cs, Partitioner& pm, CUCtx& cuCtx, const PartSplit ispType = TU_NO_ISP, const int subTuIdx = -1 ); + bool cbf_comp ( CodingStructure& cs, const CompArea& area, unsigned depth, const bool prevCbf = false, const bool useISP = false ); // mvd coding (clause 7.3.8.9) void mvd_coding ( Mv &rMvd ); // transform unit (clause 7.3.8.10) - void transform_unit ( TransformUnit& tu, CUCtx& cuCtx, ChromaCbfs& chromaCbfs ); + void transform_unit ( TransformUnit& tu, CUCtx& cuCtx, Partitioner& pm, const int subTuCounter = -1 ); void cu_qp_delta ( CodingUnit& cu, int predQP, int8_t& qp ); void cu_chroma_qp_offset ( CodingUnit& cu ); // residual coding (clause 7.3.8.11) - void residual_coding ( TransformUnit& tu, ComponentID compID ); - void mts_coding ( TransformUnit& tu, ComponentID compID ); + void residual_coding ( TransformUnit& tu, ComponentID compID, CUCtx& cuCtx ); + void ts_flag ( TransformUnit& tu, ComponentID compID ); + void mts_idx ( CodingUnit& cu, CUCtx& cuCtx ); + void residual_lfnst_mode ( CodingUnit& cu, CUCtx& cuCtx ); void isp_mode ( CodingUnit& cu ); void explicit_rdpcm_mode ( TransformUnit& tu, ComponentID compID ); int last_sig_coeff ( CoeffCodingContext& cctx, TransformUnit& tu, ComponentID compID ); void residual_coding_subblock ( CoeffCodingContext& cctx, TCoeff* coeff, const int stateTransTable, int& state ); + void residual_codingTS ( TransformUnit& tu, ComponentID compID ); + void residual_coding_subblockTS( CoeffCodingContext& cctx, TCoeff* coeff ); + void joint_cb_cr ( TransformUnit& tu, const int cbfMask ); // cross component prediction (clause 7.3.8.12) void cross_comp_pred ( TransformUnit& tu, ComponentID compID ); @@ -139,15 +147,17 @@ private: unsigned unary_max_eqprob ( unsigned maxSymbol ); unsigned exp_golomb_eqprob ( unsigned count ); unsigned get_num_bits_read () { return m_BinDecoder.getNumBitsRead(); } + unsigned code_unary_fixed ( unsigned ctxId, unsigned unary_max, unsigned fixed ); void xReadTruncBinCode(uint32_t& symbol, uint32_t maxSymbol); + void parseScanRotationModeFlag ( CodingUnit& cu, ComponentID compBegin ); + void xDecodePLTPredIndicator ( CodingUnit& cu, uint32_t maxPLTSize, ComponentID compBegin ); + void xAdjustPLTIndex ( CodingUnit& cu, Pel curLevel, uint32_t idx, PelBuf& paletteIdx, PLTtypeBuf& paletteRunType, int maxSymbol, ComponentID compBegin ); public: - int shareStateDec; - Position shareParentPos; - Size shareParentSize; private: BinDecoderBase& m_BinDecoder; InputBitstream* m_Bitstream; + ScanElement* m_scanOrder; }; diff --git a/source/Lib/DecoderLib/DecCu.cpp b/source/Lib/DecoderLib/DecCu.cpp index 04b10286a175f9b0967cbd2e742ee4cb0414fc6e..3b96128195c8623b5fff067cdcecb0486b61cb18 100644 --- a/source/Lib/DecoderLib/DecCu.cpp +++ b/source/Lib/DecoderLib/DecCu.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -103,12 +103,12 @@ void DecCu::decompressCtu( CodingStructure& cs, const UnitArea& ctuArea ) { const int maxNumChannelType = cs.pcv->chrFormat != CHROMA_400 && CS::isDualITree( cs ) ? 2 : 1; - if (!cs.pcv->isEncoder) + + if (cs.resetIBCBuffer) { - m_shareStateDec = NO_SHARE; + m_pcInterPred->resetIBCBuffer(cs.pcv->chrFormat, cs.slice->getSPS()->getMaxCUHeight()); + cs.resetIBCBuffer = false; } - bool sharePrepareCondition = ((!cs.pcv->isEncoder) && (!(cs.slice->isIntra()) || cs.slice->getSPS()->getIBCFlag())); - for( int ch = 0; ch < maxNumChannelType; ch++ ) { const ChannelType chType = ChannelType( ch ); @@ -117,23 +117,15 @@ void DecCu::decompressCtu( CodingStructure& cs, const UnitArea& ctuArea ) for( auto &currCU : cs.traverseCUs( CS::getArea( cs, ctuArea, chType ), chType ) ) { - if(sharePrepareCondition) + if(currCU.Y().valid()) { - if ((currCU.shareParentPos.x >= 0) && (!(currCU.shareParentPos.x == prevTmpPos.x && currCU.shareParentPos.y == prevTmpPos.y))) - { - m_shareStateDec = GEN_ON_SHARED_BOUND; - cs.motionLut.lutShare = cs.motionLut.lut; - cs.motionLut.lutShareIbc = cs.motionLut.lutIbc; - } - - if (currCU.shareParentPos.x < 0) + const int vSize = cs.slice->getSPS()->getMaxCUHeight() > 64 ? 64 : cs.slice->getSPS()->getMaxCUHeight(); + if((currCU.Y().x % vSize) == 0 && (currCU.Y().y % vSize) == 0) { - m_shareStateDec = 0; + m_pcInterPred->resetVPDUforIBC(cs.pcv->chrFormat, cs.slice->getSPS()->getMaxCUHeight(), vSize, currCU.Y().x + g_IBCBufferSize / cs.slice->getSPS()->getMaxCUHeight() / 2, currCU.Y().y); } - prevTmpPos = currCU.shareParentPos; } - cs.chType = chType; - if (currCU.predMode != MODE_INTRA && currCU.Y().valid()) + if (currCU.predMode != MODE_INTRA && currCU.predMode != MODE_PLT && currCU.Y().valid()) { xDeriveCUMV(currCU); } @@ -143,6 +135,7 @@ void DecCu::decompressCtu( CodingStructure& cs, const UnitArea& ctuArea ) case MODE_IBC: xReconInter( currCU ); break; + case MODE_PLT: case MODE_INTRA: xReconIntraQT( currCU ); break; @@ -151,10 +144,7 @@ void DecCu::decompressCtu( CodingStructure& cs, const UnitArea& ctuArea ) break; } - if( CU::isLosslessCoded( currCU ) && !currCU.ipcm ) - { - xFillPCMBuffer( currCU ); - } + m_pcInterPred->xFillIBCBuffer(currCU); DTRACE_BLOCK_REC( cs.picture->getRecoBuf( currCU ), currCU, currCU.predMode ); } @@ -184,11 +174,31 @@ void DecCu::xIntraRecBlk( TransformUnit& tu, const ComponentID compID ) const PredictionUnit &pu = *tu.cs->getPU( area.pos(), chType ); const uint32_t uiChFinalMode = PU::getFinalIntraMode( pu, chType ); + PelBuf pReco = cs.getRecoBuf(area); //===== init availability pattern ===== - - const bool bUseFilteredPredictions = IntraPrediction::useFilteredIntraRefSamples( compID, pu, true, tu ); - m_pcIntraPred->initIntraPatternChType( *tu.cu, area, bUseFilteredPredictions ); + bool predRegDiffFromTB = CU::isPredRegDiffFromTB(*tu.cu, compID); + bool firstTBInPredReg = CU::isFirstTBInPredReg(*tu.cu, compID, area); + CompArea areaPredReg(COMPONENT_Y, tu.chromaFormat, area); + if (tu.cu->ispMode && isLuma(compID)) + { + if (predRegDiffFromTB) + { + if (firstTBInPredReg) + { + CU::adjustPredArea(areaPredReg); + m_pcIntraPred->initIntraPatternChTypeISP(*tu.cu, areaPredReg, pReco); + } + } + else + { + m_pcIntraPred->initIntraPatternChTypeISP(*tu.cu, area, pReco); + } + } + else + { + m_pcIntraPred->initIntraPatternChType(*tu.cu, area); + } //===== get prediction signal ===== if( compID != COMPONENT_Y && PU::isLMCMode( uiChFinalMode ) ) @@ -199,18 +209,32 @@ void DecCu::xIntraRecBlk( TransformUnit& tu, const ComponentID compID ) } else { - m_pcIntraPred->predIntraAng( compID, piPred, pu, bUseFilteredPredictions ); + if( PU::isMIP( pu, chType ) ) + { + m_pcIntraPred->initIntraMip( pu, area ); + m_pcIntraPred->predIntraMip( compID, piPred, pu ); + } + else + { + if (predRegDiffFromTB) + { + if (firstTBInPredReg) + { + PelBuf piPredReg = cs.getPredBuf(areaPredReg); + m_pcIntraPred->predIntraAng(compID, piPredReg, pu); + } + } + else + m_pcIntraPred->predIntraAng(compID, piPred, pu); + } } const Slice &slice = *cs.slice; - bool flag = slice.getReshapeInfo().getUseSliceReshaper() && (slice.isIntra() || (!slice.isIntra() && m_pcReshape->getCTUFlag())); - if (flag && slice.getReshapeInfo().getSliceReshapeChromaAdj() && (compID != COMPONENT_Y)) + bool flag = slice.getPicHeader()->getLmcsEnabledFlag() && (slice.isIntra() || (!slice.isIntra() && m_pcReshape->getCTUFlag())); + if (flag && slice.getPicHeader()->getLmcsChromaResidualScaleFlag() && (compID != COMPONENT_Y) && (tu.cbf[COMPONENT_Cb] || tu.cbf[COMPONENT_Cr])) { const Area area = tu.Y().valid() ? tu.Y() : Area(recalcPosition(tu.chromaFormat, tu.chType, CHANNEL_TYPE_LUMA, tu.blocks[tu.chType].pos()), recalcSize(tu.chromaFormat, tu.chType, CHANNEL_TYPE_LUMA, tu.blocks[tu.chType].size())); const CompArea &areaY = CompArea(COMPONENT_Y, tu.chromaFormat, area); - PelBuf piPredY; - piPredY = cs.picture->getPredBuf(areaY); - const Pel avgLuma = piPredY.computeAvg(); - int adj = m_pcReshape->calculateChromaAdj(avgLuma); + int adj = m_pcReshape->calculateChromaAdjVpduNei(tu, areaY); tu.setChromaAdj(adj); } //===== inverse transform ===== @@ -218,6 +242,24 @@ void DecCu::xIntraRecBlk( TransformUnit& tu, const ComponentID compID ) const QpParam cQP( tu, compID ); + if( tu.jointCbCr && isChroma(compID) ) + { + if( compID == COMPONENT_Cb ) + { + PelBuf resiCr = cs.getResiBuf( tu.blocks[ COMPONENT_Cr ] ); + if( tu.jointCbCr >> 1 ) + { + m_pcTrQuant->invTransformNxN( tu, COMPONENT_Cb, piResi, cQP ); + } + else + { + const QpParam qpCr( tu, COMPONENT_Cr ); + m_pcTrQuant->invTransformNxN( tu, COMPONENT_Cr, resiCr, qpCr ); + } + m_pcTrQuant->invTransformICT( tu, piResi, resiCr ); + } + } + else if( TU::getCbf( tu, compID ) ) { m_pcTrQuant->invTransformNxN( tu, compID, piResi, cQP ); @@ -229,7 +271,7 @@ void DecCu::xIntraRecBlk( TransformUnit& tu, const ComponentID compID ) //===== reconstruction ===== flag = flag && (tu.blocks[compID].width*tu.blocks[compID].height > 4); - if (flag && TU::getCbf(tu, compID) && isChroma(compID) && slice.getReshapeInfo().getSliceReshapeChromaAdj()) + if (flag && (TU::getCbf(tu, compID) || tu.jointCbCr) && isChroma(compID) && slice.getPicHeader()->getLmcsChromaResidualScaleFlag()) { piResi.scaleSignal(tu.getChromaAdj(), 0, tu.cu->cs->slice->clpRng(compID)); } @@ -238,7 +280,6 @@ void DecCu::xIntraRecBlk( TransformUnit& tu, const ComponentID compID ) CrossComponentPrediction::crossComponentPrediction( tu, compID, cs.getResiBuf( tu.Y() ), piResi, piResi, true ); } - PelBuf pReco = cs.getRecoBuf( area ); if( !tu.cu->ispMode || !isLuma( compID ) ) { @@ -253,7 +294,7 @@ void DecCu::xIntraRecBlk( TransformUnit& tu, const ComponentID compID ) CompArea tmpArea(COMPONENT_Y, area.chromaFormat, Position(0, 0), area.size()); PelBuf tmpPred; #endif - if (slice.getReshapeInfo().getUseSliceReshaper() && (m_pcReshape->getCTUFlag() || slice.isIntra()) && compID == COMPONENT_Y) + if (slice.getPicHeader()->getLmcsEnabledFlag() && (m_pcReshape->getCTUFlag() || slice.isIntra()) && compID == COMPONENT_Y) { #if REUSE_CU_RESULTS { @@ -270,7 +311,7 @@ void DecCu::xIntraRecBlk( TransformUnit& tu, const ComponentID compID ) #if !KEEP_PRED_AND_RESI_SIGNALS pReco.copyFrom( piPred ); #endif - if (slice.getReshapeInfo().getUseSliceReshaper() && (m_pcReshape->getCTUFlag() || slice.isIntra()) && compID == COMPONENT_Y) + if (slice.getPicHeader()->getLmcsEnabledFlag() && (m_pcReshape->getCTUFlag() || slice.isIntra()) && compID == COMPONENT_Y) { #if REUSE_CU_RESULTS { @@ -287,72 +328,248 @@ void DecCu::xIntraRecBlk( TransformUnit& tu, const ComponentID compID ) #endif } -void DecCu::xReconIntraQT( CodingUnit &cu ) +void DecCu::xIntraRecACTBlk(TransformUnit& tu) { - if( cu.ipcm ) + CodingStructure &cs = *tu.cs; + const PredictionUnit &pu = *tu.cs->getPU(tu.blocks[COMPONENT_Y], CHANNEL_TYPE_LUMA); + const Slice &slice = *cs.slice; + + CHECK(!tu.Y().valid() || !tu.Cb().valid() || !tu.Cr().valid(), "Invalid TU"); + CHECK(&pu != tu.cu->firstPU, "wrong PU fetch"); + CHECK(tu.cu->ispMode, "adaptive color transform cannot be applied to ISP"); + CHECK(pu.intraDir[CHANNEL_TYPE_CHROMA] != DM_CHROMA_IDX, "chroma should use DM mode for adaptive color transform"); + + bool flag = slice.getPicHeader()->getLmcsEnabledFlag() && (slice.isIntra() || (!slice.isIntra() && m_pcReshape->getCTUFlag())); + if (flag && slice.getPicHeader()->getLmcsChromaResidualScaleFlag() && (tu.cbf[COMPONENT_Cb] || tu.cbf[COMPONENT_Cr])) { - xReconPCM( *cu.firstTU ); - return; + const Area area = tu.Y().valid() ? tu.Y() : Area(recalcPosition(tu.chromaFormat, tu.chType, CHANNEL_TYPE_LUMA, tu.blocks[tu.chType].pos()), recalcSize(tu.chromaFormat, tu.chType, CHANNEL_TYPE_LUMA, tu.blocks[tu.chType].size())); + const CompArea &areaY = CompArea(COMPONENT_Y, tu.chromaFormat, area); + int adj = m_pcReshape->calculateChromaAdjVpduNei(tu, areaY); + tu.setChromaAdj(adj); } - const uint32_t numChType = ::getNumberValidChannels( cu.chromaFormat ); + for (int i = 0; i < getNumberValidComponents(tu.chromaFormat); i++) + { + ComponentID compID = (ComponentID)i; + const CompArea &area = tu.blocks[compID]; + const ChannelType chType = toChannelType(compID); - for( uint32_t chType = CHANNEL_TYPE_LUMA; chType < numChType; chType++ ) + PelBuf piPred = cs.getPredBuf(area); + m_pcIntraPred->initIntraPatternChType(*tu.cu, area); + if (PU::isMIP(pu, chType)) + { + m_pcIntraPred->initIntraMip(pu, area); + m_pcIntraPred->predIntraMip(compID, piPred, pu); + } + else + { + m_pcIntraPred->predIntraAng(compID, piPred, pu); + } + + PelBuf piResi = cs.getResiBuf(area); + + QpParam cQP(tu, compID); + for (int qpIdx = 0; qpIdx < 2; qpIdx++) + { + cQP.Qps[qpIdx] = cQP.Qps[qpIdx] + (compID == COMPONENT_Cr ? DELTA_QP_FOR_Co : DELTA_QP_FOR_Y_Cg); + cQP.pers[qpIdx] = cQP.Qps[qpIdx] / 6; + cQP.rems[qpIdx] = cQP.Qps[qpIdx] % 6; + } + + if (tu.jointCbCr && isChroma(compID)) + { + if (compID == COMPONENT_Cb) + { + PelBuf resiCr = cs.getResiBuf(tu.blocks[COMPONENT_Cr]); + if (tu.jointCbCr >> 1) + { + m_pcTrQuant->invTransformNxN(tu, COMPONENT_Cb, piResi, cQP); + } + else + { + QpParam qpCr(tu, COMPONENT_Cr); + for (int qpIdx = 0; qpIdx < 2; qpIdx++) + { + qpCr.Qps[qpIdx] = qpCr.Qps[qpIdx] + DELTA_QP_FOR_Co; + qpCr.pers[qpIdx] = qpCr.Qps[qpIdx] / 6; + qpCr.rems[qpIdx] = qpCr.Qps[qpIdx] % 6; + } + + m_pcTrQuant->invTransformNxN(tu, COMPONENT_Cr, resiCr, qpCr); + } + m_pcTrQuant->invTransformICT(tu, piResi, resiCr); + } + } + else + { + if (TU::getCbf(tu, compID)) + { + m_pcTrQuant->invTransformNxN(tu, compID, piResi, cQP); + } + else + { + piResi.fill(0); + } + } + + flag = flag && (tu.blocks[compID].width*tu.blocks[compID].height > 4); + if (flag && (TU::getCbf(tu, compID) || tu.jointCbCr) && isChroma(compID) && slice.getPicHeader()->getLmcsChromaResidualScaleFlag()) + { + piResi.scaleSignal(tu.getChromaAdj(), 0, tu.cu->cs->slice->clpRng(compID)); + } + + cs.setDecomp(area); + } + + cs.getResiBuf(tu).colorSpaceConvert(cs.getResiBuf(tu), false); + + for (int i = 0; i < getNumberValidComponents(tu.chromaFormat); i++) { - if( cu.blocks[chType].valid() ) + ComponentID compID = (ComponentID)i; + const CompArea &area = tu.blocks[compID]; + + PelBuf piPred = cs.getPredBuf(area); + PelBuf piResi = cs.getResiBuf(area); + PelBuf piReco = cs.getRecoBuf(area); + + PelBuf tmpPred; + if (slice.getPicHeader()->getLmcsEnabledFlag() && (m_pcReshape->getCTUFlag() || slice.isIntra()) && compID == COMPONENT_Y) { - xIntraRecQT( cu, ChannelType( chType ) ); + CompArea tmpArea(COMPONENT_Y, area.chromaFormat, Position(0, 0), area.size()); + tmpPred = m_tmpStorageLCU->getBuf(tmpArea); + tmpPred.copyFrom(piPred); + } + + piPred.reconstruct(piPred, piResi, tu.cu->cs->slice->clpRng(compID)); + piReco.copyFrom(piPred); + + if (slice.getPicHeader()->getLmcsEnabledFlag() && (m_pcReshape->getCTUFlag() || slice.isIntra()) && compID == COMPONENT_Y) + { + piPred.copyFrom(tmpPred); + } + + if (cs.pcv->isEncoder) + { + cs.picture->getRecoBuf(area).copyFrom(piReco); + cs.picture->getPredBuf(area).copyFrom(piPred); } } } -/** Function for deriving reconstructed luma/chroma samples of a PCM mode CU. -* \param pcCU pointer to current CU -* \param uiPartIdx part index -* \param piPCM pointer to PCM code arrays -* \param piReco pointer to reconstructed sample arrays -* \param uiStride stride of reconstructed sample arrays -* \param uiWidth CU width -* \param uiHeight CU height -* \param compID colour component ID -* \returns void -*/ -void DecCu::xDecodePCMTexture(TransformUnit &tu, const ComponentID compID) +void DecCu::xReconIntraQT( CodingUnit &cu ) { - const CompArea &area = tu.blocks[compID]; - PelBuf piPicReco = tu.cs->getRecoBuf( area ); - const CPelBuf piPicPcm = tu.getPcmbuf(compID); - const SPS &sps = *tu.cs->sps; - const uint32_t uiPcmLeftShiftBit = sps.getBitDepth(toChannelType(compID)) - sps.getPCMBitDepth(toChannelType(compID)); - for (uint32_t uiY = 0; uiY < area.height; uiY++) + if (CU::isPLT(cu)) { - for (uint32_t uiX = 0; uiX < area.width; uiX++) + if (cu.isSepTree()) + { + if (cu.chType == CHANNEL_TYPE_LUMA) + { + xReconPLT(cu, COMPONENT_Y, 1); + } + if (cu.chromaFormat != CHROMA_400 && (cu.chType == CHANNEL_TYPE_CHROMA)) + { + xReconPLT(cu, COMPONENT_Cb, 2); + } + } + else { - piPicReco.at(uiX, uiY) = (piPicPcm.at(uiX, uiY) << uiPcmLeftShiftBit); + xReconPLT(cu, COMPONENT_Y, 3); } + return; + } + + if (cu.colorTransform) + { + xIntraRecACTQT(cu); } + else + { + const uint32_t numChType = ::getNumberValidChannels( cu.chromaFormat ); - tu.cs->picture->getRecoBuf( area ).copyFrom( piPicReco ); - tu.cs->setDecomp( area ); + for( uint32_t chType = CHANNEL_TYPE_LUMA; chType < numChType; chType++ ) + { + if( cu.blocks[chType].valid() ) + { + xIntraRecQT( cu, ChannelType( chType ) ); + } + } + } } -/** Function for reconstructing a PCM mode CU. -* \param pcCU pointer to current CU -* \param uiDepth CU Depth -* \returns void -*/ -void DecCu::xReconPCM(TransformUnit &tu) +void DecCu::xReconPLT(CodingUnit &cu, ComponentID compBegin, uint32_t numComp) { - const CodingStructure *cs = tu.cs; - const ChannelType chType = tu.chType; + const SPS& sps = *(cu.cs->sps); + TransformUnit& tu = *cu.firstTU; + PelBuf curPLTIdx = tu.getcurPLTIdx(compBegin); - ComponentID compStr = (CS::isDualITree(*cs) && !isLuma(chType)) ? COMPONENT_Cb: COMPONENT_Y; - ComponentID compEnd = (CS::isDualITree(*cs) && isLuma(chType)) ? COMPONENT_Y : COMPONENT_Cr; - for( ComponentID compID = compStr; compID <= compEnd; compID = ComponentID(compID+1) ) + uint32_t height = cu.block(compBegin).height; + uint32_t width = cu.block(compBegin).width; + + //recon. pixels + uint32_t scaleX = getComponentScaleX(COMPONENT_Cb, sps.getChromaFormatIdc()); + uint32_t scaleY = getComponentScaleY(COMPONENT_Cb, sps.getChromaFormatIdc()); + for (uint32_t y = 0; y < height; y++) { + for (uint32_t x = 0; x < width; x++) + { + for (uint32_t compID = compBegin; compID < (compBegin + numComp); compID++) + { + const int channelBitDepth = cu.cs->sps->getBitDepth(toChannelType((ComponentID)compID)); + const CompArea &area = cu.blocks[compID]; + + PelBuf picReco = cu.cs->getRecoBuf(area); + PLTescapeBuf escapeValue = tu.getescapeValue((ComponentID)compID); + if (curPLTIdx.at(x, y) == cu.curPLTSize[compBegin]) + { + Pel value; + QpParam cQP(tu, (ComponentID)compID); + int qp = cQP.Qp(true); + int qpRem = qp % 6; + int qpPer = qp / 6; + if (compBegin != COMPONENT_Y || compID == COMPONENT_Y) + { + int invquantiserRightShift = IQUANT_SHIFT; + int add = 1 << (invquantiserRightShift - 1); + value = ((((escapeValue.at(x, y)*g_invQuantScales[0][qpRem]) << qpPer) + add) >> invquantiserRightShift); + value = Pel(ClipBD<int>(value, channelBitDepth)); + picReco.at(x, y) = value; + } + else if (compBegin == COMPONENT_Y && compID != COMPONENT_Y && y % (1 << scaleY) == 0 && x % (1 << scaleX) == 0) + { + uint32_t posYC = y >> scaleY; + uint32_t posXC = x >> scaleX; + int invquantiserRightShift = IQUANT_SHIFT; + int add = 1 << (invquantiserRightShift - 1); + value = ((((escapeValue.at(posXC, posYC)*g_invQuantScales[0][qpRem]) << qpPer) + add) >> invquantiserRightShift); + value = Pel(ClipBD<int>(value, channelBitDepth)); + picReco.at(posXC, posYC) = value; - xDecodePCMTexture(tu, compID); + } + } + else + { + uint32_t curIdx = curPLTIdx.at(x, y); + if (compBegin != COMPONENT_Y || compID == COMPONENT_Y) + { + picReco.at(x, y) = cu.curPLT[compID][curIdx]; + } + else if (compBegin == COMPONENT_Y && compID != COMPONENT_Y && y % (1 << scaleY) == 0 && x % (1 << scaleX) == 0) + { + uint32_t posYC = y >> scaleY; + uint32_t posXC = x >> scaleX; + picReco.at(posXC, posYC) = cu.curPLT[compID][curIdx]; + } + } + } + } + } + for (uint32_t compID = compBegin; compID < (compBegin + numComp); compID++) + { + const CompArea &area = cu.blocks[compID]; + PelBuf picReco = cu.cs->getRecoBuf(area); + cu.cs->picture->getRecoBuf(area).copyFrom(picReco); + cu.cs->setDecomp(area); } } @@ -387,6 +604,14 @@ DecCu::xIntraRecQT(CodingUnit &cu, const ChannelType chType) } } +void DecCu::xIntraRecACTQT(CodingUnit &cu) +{ + for (auto &currTU : CU::traverseTUs(cu)) + { + xIntraRecACTBlk(currTU); + } +} + /** Function for filling the PCM buffer of a CU using its reconstructed sample array * \param pCU pointer to current CU * \param depth CU Depth @@ -424,7 +649,7 @@ void DecCu::xReconInter(CodingUnit &cu) m_pcIntraPred->geneIntrainterPred(cu); // inter prediction - CHECK(CU::isIBC(cu) && cu.firstPU->mhIntraFlag, "IBC and MHIntra cannot be used together"); + CHECK(CU::isIBC(cu) && cu.firstPU->ciipFlag, "IBC and Ciip cannot be used together"); CHECK(CU::isIBC(cu) && cu.affine, "IBC and Affine cannot be used together"); CHECK(CU::isIBC(cu) && cu.triangle, "IBC and triangle cannot be used together"); CHECK(CU::isIBC(cu) && cu.firstPU->mmvdMergeFlag, "IBC and MMVD cannot be used together"); @@ -441,24 +666,22 @@ void DecCu::xReconInter(CodingUnit &cu) } if (cu.Y().valid()) { - const PredictionUnit &pu = *cu.firstPU; - if (!cu.affine && !cu.triangle) - { - MotionInfo mi = pu.getMotionInfo(); - mi.GBiIdx = (mi.interDir == 3) ? cu.GBiIdx : GBI_DEFAULT; - cu.cs->addMiToLut(CU::isIBC(cu) ? cu.cs->motionLut.lutIbc : cu.cs->motionLut.lut, mi ); - } + bool isIbcSmallBlk = CU::isIBC(cu) && (cu.lwidth() * cu.lheight() <= 16); + CU::saveMotionInHMVP( cu, isIbcSmallBlk ); } - if (cu.firstPU->mhIntraFlag) + if (cu.firstPU->ciipFlag) { - if (cu.cs->slice->getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag()) + if (cu.cs->picHeader->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag()) { cu.cs->getPredBuf(*cu.firstPU).Y().rspSignal(m_pcReshape->getFwdLUT()); } m_pcIntraPred->geneWeightedPred(COMPONENT_Y, cu.cs->getPredBuf(*cu.firstPU).Y(), *cu.firstPU, m_pcIntraPred->getPredictorPtr2(COMPONENT_Y, 0)); - m_pcIntraPred->geneWeightedPred(COMPONENT_Cb, cu.cs->getPredBuf(*cu.firstPU).Cb(), *cu.firstPU, m_pcIntraPred->getPredictorPtr2(COMPONENT_Cb, 0)); - m_pcIntraPred->geneWeightedPred(COMPONENT_Cr, cu.cs->getPredBuf(*cu.firstPU).Cr(), *cu.firstPU, m_pcIntraPred->getPredictorPtr2(COMPONENT_Cr, 0)); + if (cu.chromaSize().width > 2) + { + m_pcIntraPred->geneWeightedPred(COMPONENT_Cb, cu.cs->getPredBuf(*cu.firstPU).Cb(), *cu.firstPU, m_pcIntraPred->getPredictorPtr2(COMPONENT_Cb, 0)); + m_pcIntraPred->geneWeightedPred(COMPONENT_Cr, cu.cs->getPredBuf(*cu.firstPU).Cr(), *cu.firstPU, m_pcIntraPred->getPredictorPtr2(COMPONENT_Cr, 0)); + } } DTRACE ( g_trace_ctx, D_TMP, "pred " ); @@ -472,12 +695,16 @@ void DecCu::xReconInter(CodingUnit &cu) if (cu.rootCbf) { + if (cu.colorTransform) + { + cs.getResiBuf(cu).colorSpaceConvert(cs.getResiBuf(cu), false); + } #if REUSE_CU_RESULTS const CompArea &area = cu.blocks[COMPONENT_Y]; CompArea tmpArea(COMPONENT_Y, area.chromaFormat, Position(0, 0), area.size()); PelBuf tmpPred; #endif - if (cs.slice->getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag()) + if (cs.picHeader->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag()) { #if REUSE_CU_RESULTS if (cs.pcv->isEncoder) @@ -486,7 +713,7 @@ void DecCu::xReconInter(CodingUnit &cu) tmpPred.copyFrom(cs.getPredBuf(cu).get(COMPONENT_Y)); } #endif - if (!cu.firstPU->mhIntraFlag && !CU::isIBC(cu)) + if (!cu.firstPU->ciipFlag && !CU::isIBC(cu)) cs.getPredBuf(cu).get(COMPONENT_Y).rspSignal(m_pcReshape->getFwdLUT()); } #if KEEP_PRED_AND_RESI_SIGNALS @@ -495,7 +722,7 @@ void DecCu::xReconInter(CodingUnit &cu) cs.getResiBuf( cu ).reconstruct( cs.getPredBuf( cu ), cs.getResiBuf( cu ), cs.slice->clpRngs() ); cs.getRecoBuf( cu ).copyFrom ( cs.getResiBuf( cu ) ); #endif - if (cs.slice->getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag()) + if (cs.picHeader->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag()) { #if REUSE_CU_RESULTS if (cs.pcv->isEncoder) @@ -508,7 +735,7 @@ void DecCu::xReconInter(CodingUnit &cu) else { cs.getRecoBuf(cu).copyClip(cs.getPredBuf(cu), cs.slice->clpRngs()); - if (cs.slice->getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag() && !cu.firstPU->mhIntraFlag && !CU::isIBC(cu)) + if (cs.picHeader->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag() && !cu.firstPU->ciipFlag && !CU::isIBC(cu)) { cs.getRecoBuf(cu).get(COMPONENT_Y).rspSignal(m_pcReshape->getFwdLUT()); } @@ -531,8 +758,44 @@ void DecCu::xDecodeInterTU( TransformUnit & currTU, const ComponentID compID ) //===== inverse transform ===== PelBuf resiBuf = cs.getResiBuf(area); - const QpParam cQP(currTU, compID); + QpParam cQP(currTU, compID); + if (currTU.cu->colorTransform) + { + for (int qpIdx = 0; qpIdx < 2; qpIdx++) + { + cQP.Qps[qpIdx] = cQP.Qps[qpIdx] + (compID == COMPONENT_Cr ? DELTA_QP_FOR_Co : DELTA_QP_FOR_Y_Cg); + cQP.pers[qpIdx] = cQP.Qps[qpIdx] / 6; + cQP.rems[qpIdx] = cQP.Qps[qpIdx] % 6; + } + } + if( currTU.jointCbCr && isChroma(compID) ) + { + if( compID == COMPONENT_Cb ) + { + PelBuf resiCr = cs.getResiBuf( currTU.blocks[ COMPONENT_Cr ] ); + if( currTU.jointCbCr >> 1 ) + { + m_pcTrQuant->invTransformNxN( currTU, COMPONENT_Cb, resiBuf, cQP ); + } + else + { + QpParam qpCr(currTU, COMPONENT_Cr); + if (currTU.cu->colorTransform) + { + for (int qpIdx = 0; qpIdx < 2; qpIdx++) + { + qpCr.Qps[qpIdx] = qpCr.Qps[qpIdx] + DELTA_QP_FOR_Co; + qpCr.pers[qpIdx] = qpCr.Qps[qpIdx] / 6; + qpCr.rems[qpIdx] = qpCr.Qps[qpIdx] % 6; + } + } + m_pcTrQuant->invTransformNxN( currTU, COMPONENT_Cr, resiCr, qpCr ); + } + m_pcTrQuant->invTransformICT( currTU, resiBuf, resiCr ); + } + } + else if( TU::getCbf( currTU, compID ) ) { m_pcTrQuant->invTransformNxN( currTU, compID, resiBuf, cQP ); @@ -544,7 +807,8 @@ void DecCu::xDecodeInterTU( TransformUnit & currTU, const ComponentID compID ) //===== reconstruction ===== const Slice &slice = *cs.slice; - if ( slice.getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag() && isChroma(compID) && TU::getCbf(currTU, compID) && slice.getReshapeInfo().getSliceReshapeChromaAdj() && currTU.blocks[compID].width*currTU.blocks[compID].height > 4 ) + if (slice.getPicHeader()->getLmcsEnabledFlag() && isChroma(compID) && (TU::getCbf(currTU, compID) || currTU.jointCbCr) + && slice.getPicHeader()->getLmcsChromaResidualScaleFlag() && currTU.blocks[compID].width * currTU.blocks[compID].height > 4) { resiBuf.scaleSignal(currTU.getChromaAdj(), 0, currTU.cu->cs->slice->clpRng(compID)); } @@ -571,17 +835,10 @@ void DecCu::xDecodeInterTexture(CodingUnit &cu) { CodingStructure &cs = *cu.cs; const Slice &slice = *cs.slice; - if (slice.getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag() && slice.getReshapeInfo().getSliceReshapeChromaAdj() && (compID == COMPONENT_Y)) + if (slice.getPicHeader()->getLmcsEnabledFlag() && slice.getPicHeader()->getLmcsChromaResidualScaleFlag() && (compID == COMPONENT_Y) && (currTU.cbf[COMPONENT_Cb] || currTU.cbf[COMPONENT_Cr])) { const CompArea &areaY = currTU.blocks[COMPONENT_Y]; - PelBuf predY = cs.getPredBuf(areaY); - CompArea tmpArea(COMPONENT_Y, areaY.chromaFormat, Position(0, 0), areaY.size()); - PelBuf tmpPred = m_tmpStorageLCU->getBuf(tmpArea); - tmpPred.copyFrom(predY); - if (!cu.firstPU->mhIntraFlag && !CU::isIBC(cu)) - tmpPred.rspSignal(m_pcReshape->getFwdLUT()); - const Pel avgLuma = tmpPred.computeAvg(); - int adj = m_pcReshape->calculateChromaAdj(avgLuma); + int adj = m_pcReshape->calculateChromaAdjVpduNei(currTU, areaY); currTU.setChromaAdj(adj); } xDecodeInterTU( currTU, compID ); @@ -607,7 +864,7 @@ void DecCu::xDeriveCUMV( CodingUnit &cu ) { if (pu.mmvdMergeFlag || pu.cu->mmvdSkip) { - CHECK(pu.mhIntraFlag == true, "invalid MHIntra"); + CHECK(pu.ciipFlag == true, "invalid Ciip"); if (pu.cs->sps->getSBTMVPEnabledFlag()) { Size bufSize = g_miScaling.scale(pu.lumaSize()); @@ -615,8 +872,6 @@ void DecCu::xDeriveCUMV( CodingUnit &cu ) } int fPosBaseIdx = pu.mmvdMergeIdx / MMVD_MAX_REFINE_NUM; - pu.shareParentPos = cu.shareParentPos; - pu.shareParentSize = cu.shareParentSize; PU::getInterMergeCandidates(pu, mrgCtx, 1, fPosBaseIdx + 1); PU::getInterMMVDMergeCandidates(pu, mrgCtx, pu.mmvdMergeIdx @@ -646,7 +901,7 @@ void DecCu::xDeriveCUMV( CodingUnit &cu ) PU::getAffineMergeCand( pu, affineMergeCtx, pu.mergeIdx ); pu.interDir = affineMergeCtx.interDirNeighbours[pu.mergeIdx]; pu.cu->affineType = affineMergeCtx.affineType[pu.mergeIdx]; - pu.cu->GBiIdx = affineMergeCtx.GBiIdx[pu.mergeIdx]; + pu.cu->BcwIdx = affineMergeCtx.BcwIdx[pu.mergeIdx]; pu.mergeType = affineMergeCtx.mergeType[pu.mergeIdx]; if ( pu.mergeType == MRG_TYPE_SUBPU_ATMVP ) { @@ -671,8 +926,6 @@ void DecCu::xDeriveCUMV( CodingUnit &cu ) } else { - pu.shareParentPos = cu.shareParentPos; - pu.shareParentSize = cu.shareParentSize; if (CU::isIBC(*pu.cu)) PU::getIBCMergeCandidates(pu, mrgCtx, pu.mergeIdx); else @@ -713,31 +966,27 @@ void DecCu::xDeriveCUMV( CodingUnit &cu ) // Mv mv[3]; CHECK( pu.refIdx[eRefList] < 0, "Unexpected negative refIdx." ); - const int imvShift = ( !cu.cs->pcv->isEncoder && pu.cu->imv == 2 ) ? MV_FRACTIONAL_BITS_DIFF : 0; - pu.mvdAffi[eRefList][0] <<= imvShift; - pu.mvdAffi[eRefList][1] <<= imvShift; + if (!cu.cs->pcv->isEncoder) + { + pu.mvdAffi[eRefList][0].changeAffinePrecAmvr2Internal(pu.cu->imv); + pu.mvdAffi[eRefList][1].changeAffinePrecAmvr2Internal(pu.cu->imv); + if (cu.affineType == AFFINEMODEL_6PARAM) + { + pu.mvdAffi[eRefList][2].changeAffinePrecAmvr2Internal(pu.cu->imv); + } + } Mv mvLT = affineAMVPInfo.mvCandLT[mvp_idx] + pu.mvdAffi[eRefList][0]; Mv mvRT = affineAMVPInfo.mvCandRT[mvp_idx] + pu.mvdAffi[eRefList][1]; mvRT += pu.mvdAffi[eRefList][0]; - if ( pu.cu->imv != 1 ) - { - mvLT.changePrecision( MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL ); - mvRT.changePrecision( MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL ); - } Mv mvLB; if ( cu.affineType == AFFINEMODEL_6PARAM ) { - pu.mvdAffi[eRefList][2] <<= imvShift; mvLB = affineAMVPInfo.mvCandLB[mvp_idx] + pu.mvdAffi[eRefList][2]; mvLB += pu.mvdAffi[eRefList][0]; - if ( pu.cu->imv != 1 ) - { - mvLB.changePrecision( MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL ); - } } - PU::setAllAffineMv( pu, mvLT, mvRT, mvLB, eRefList ); + PU::setAllAffineMv(pu, mvLT, mvRT, mvLB, eRefList, true); } } } @@ -750,9 +999,15 @@ void DecCu::xDeriveCUMV( CodingUnit &cu ) #if REUSE_CU_RESULTS if (!cu.cs->pcv->isEncoder) #endif - mvd <<= 2; + { + mvd.changeIbcPrecAmvr2Internal(pu.cu->imv); + } + if ( pu.cu->slice->getPicHeader()->getMaxNumIBCMergeCand() == 1 ) + { + CHECK( pu.mvpIdx[REF_PIC_LIST_0], "mvpIdx for IBC mode should be 0" ); + } pu.mv[REF_PIC_LIST_0] = amvpInfo.mvCand[pu.mvpIdx[REF_PIC_LIST_0]] + mvd; - pu.mv[REF_PIC_LIST_0].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); + pu.mv[REF_PIC_LIST_0].mvCliptoStorageBitDepth(); } else { @@ -764,8 +1019,12 @@ void DecCu::xDeriveCUMV( CodingUnit &cu ) AMVPInfo amvpInfo; PU::fillMvpCand(pu, eRefList, pu.refIdx[eRefList], amvpInfo); pu.mvpNum [eRefList] = amvpInfo.numCand; + if (!cu.cs->pcv->isEncoder) + { + pu.mvd[eRefList].changeTransPrecAmvr2Internal(pu.cu->imv); + } pu.mv[eRefList] = amvpInfo.mvCand[pu.mvpIdx[eRefList]] + pu.mvd[eRefList]; - pu.mv[eRefList].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); + pu.mv[eRefList].mvCliptoStorageBitDepth(); } } } @@ -779,6 +1038,17 @@ void DecCu::xDeriveCUMV( CodingUnit &cu ) printf( "DECODER: pu motion vector across tile boundaries (%d,%d,%d,%d)\n", pu.lx(), pu.ly(), pu.lwidth(), pu.lheight() ); } } + if (CU::isIBC(cu)) + { + const int cuPelX = pu.Y().x; + const int cuPelY = pu.Y().y; + int roiWidth = pu.lwidth(); + int roiHeight = pu.lheight(); + const unsigned int lcuWidth = pu.cs->slice->getSPS()->getMaxCUWidth(); + int xPred = pu.mv[0].getHor() >> MV_FRACTIONAL_BITS_INTERNAL; + int yPred = pu.mv[0].getVer() >> MV_FRACTIONAL_BITS_INTERNAL; + CHECK(!m_pcInterPred->isLumaBvValid(lcuWidth, cuPelX, cuPelY, roiWidth, roiHeight, xPred, yPred), "invalid block vector for IBC detected."); + } } } //! \} diff --git a/source/Lib/DecoderLib/DecCu.h b/source/Lib/DecoderLib/DecCu.h index 8d70b5f06275284f8d0ba6b91b8726e262c17fbf..2cbb597a9b9a48a09675b02746ef44840e9384df 100644 --- a/source/Lib/DecoderLib/DecCu.h +++ b/source/Lib/DecoderLib/DecCu.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -73,13 +73,10 @@ public: void initDecCuReshaper ( Reshape* pcReshape, ChromaFormat chromaFormatIDC) ; void destoryDecCuReshaprBuf(); - void setShareStateDec (int shareStateDecIn) { m_shareStateDec = shareStateDecIn; } -#if ENABLE_SPLIT_PARALLELISM - int getShareStateDec () const { return m_shareStateDec; } -#endif /// reconstruct Ctu information protected: void xIntraRecQT ( CodingUnit& cu, const ChannelType chType ); + void xIntraRecACTQT(CodingUnit& cu); void xReconInter ( CodingUnit& cu ); void xDecodeInterTexture( CodingUnit& cu ); @@ -87,18 +84,17 @@ protected: void xFillPCMBuffer ( CodingUnit& cu ); void xIntraRecBlk ( TransformUnit& tu, const ComponentID compID ); - void xReconPCM ( TransformUnit& tu); - void xDecodePCMTexture ( TransformUnit& tu, const ComponentID compID ); + void xIntraRecACTBlk(TransformUnit& tu); void xDecodeInterTU ( TransformUnit& tu, const ComponentID compID ); void xDeriveCUMV ( CodingUnit& cu ); + void xReconPLT ( CodingUnit& cu, ComponentID compBegin, uint32_t numComp ); PelStorage *m_tmpStorageLCU; private: TrQuant* m_pcTrQuant; IntraPrediction* m_pcIntraPred; InterPrediction* m_pcInterPred; - int m_shareStateDec; MotionInfo m_SubPuMiBuf[(MAX_CU_SIZE * MAX_CU_SIZE) >> (MIN_CU_LOG2 << 1)]; diff --git a/source/Lib/DecoderLib/DecLib.cpp b/source/Lib/DecoderLib/DecLib.cpp index b3ecdee4e51e98498763894205abc054eb390e97..fdfe5456da4f4617b21e8a18fdbfdb3ca186066d 100644 --- a/source/Lib/DecoderLib/DecLib.cpp +++ b/source/Lib/DecoderLib/DecLib.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -168,6 +168,7 @@ bool tryDecodePicture( Picture* pcEncPic, const int expectedPoc, const std::stri pcEncPic->slices.back()->initSlice(); pcEncPic->slices.back()->setPPS( pcEncPic->slices[0]->getPPS() ); pcEncPic->slices.back()->setSPS( pcEncPic->slices[0]->getSPS() ); + pcEncPic->slices.back()->setVPS( pcEncPic->slices[0]->getVPS() ); pcEncPic->slices.back()->setPic( pcEncPic->slices[0]->getPic() ); } pcEncPic->slices[i]->copySliceInfo( pic->slices[i], false ); @@ -201,16 +202,26 @@ bool tryDecodePicture( Picture* pcEncPic, const int expectedPoc, const std::stri if( pic->cs->sps->getALFEnabledFlag() ) { + std::copy(pic->getAlfCtbFilterIndexVec().begin(), pic->getAlfCtbFilterIndexVec().end(), pcEncPic->getAlfCtbFilterIndexVec().begin()); for( int compIdx = 0; compIdx < MAX_NUM_COMPONENT; compIdx++ ) { std::copy( pic->getAlfCtuEnableFlag()[compIdx].begin(), pic->getAlfCtuEnableFlag()[compIdx].end(), pcEncPic->getAlfCtuEnableFlag()[compIdx].begin() ); } + pcEncPic->resizeAlfCtbFilterIndex(pic->cs->pcv->sizeInCtus); + memcpy( pcEncPic->getAlfCtbFilterIndex(), pic->getAlfCtbFilterIndex(), sizeof(short)*pic->cs->pcv->sizeInCtus ); + + std::copy( pic->getAlfCtuAlternative(COMPONENT_Cb).begin(), pic->getAlfCtuAlternative(COMPONENT_Cb).end(), pcEncPic->getAlfCtuAlternative(COMPONENT_Cb).begin() ); + std::copy( pic->getAlfCtuAlternative(COMPONENT_Cr).begin(), pic->getAlfCtuAlternative(COMPONENT_Cr).end(), pcEncPic->getAlfCtuAlternative(COMPONENT_Cr).begin() ); for( int i = 0; i < pic->slices.size(); i++ ) { - pcEncPic->slices[i]->setAPSId(pic->slices[i]->getAPSId()); - pcEncPic->slices[i]->setAPS( pic->slices[i]->getAPS()); - pcEncPic->slices[i]->setTileGroupAlfEnabledFlag( pic->slices[i]->getTileGroupAlfEnabledFlag()); + pcEncPic->slices[i]->setTileGroupNumAps(pic->slices[i]->getTileGroupNumAps()); + pcEncPic->slices[i]->setAlfAPSs(pic->slices[i]->getTileGroupApsIdLuma()); + pcEncPic->slices[i]->setAlfAPSs(pic->slices[i]->getAlfAPSs()); + pcEncPic->slices[i]->setTileGroupApsIdChroma(pic->slices[i]->getTileGroupApsIdChroma()); + pcEncPic->slices[i]->setTileGroupAlfEnabledFlag(COMPONENT_Y, pic->slices[i]->getTileGroupAlfEnabledFlag(COMPONENT_Y)); + pcEncPic->slices[i]->setTileGroupAlfEnabledFlag(COMPONENT_Cb, pic->slices[i]->getTileGroupAlfEnabledFlag(COMPONENT_Cb)); + pcEncPic->slices[i]->setTileGroupAlfEnabledFlag(COMPONENT_Cr, pic->slices[i]->getTileGroupAlfEnabledFlag(COMPONENT_Cr)); } } @@ -375,6 +386,7 @@ DecLib::DecLib() , m_cInterPred() , m_cTrQuant() , m_cSliceDecoder() + , m_cTrQuantScalingList() , m_cCuDecoder() , m_HLSReader() , m_seiReader() @@ -385,6 +397,7 @@ DecLib::DecLib() , m_cacheModel() #endif , m_pcPic(NULL) + , m_prevLayerID(MAX_INT) , m_prevPOC(MAX_INT) , m_prevTid0POC(0) , m_bFirstSliceInPicture(true) @@ -394,7 +407,8 @@ DecLib::DecLib() , m_bFirstSliceInBitstream(true) , m_lastPOCNoOutputPriorPics(-1) , m_isNoOutputPriorPics(false) - , m_craNoRaslOutputFlag(false) + , m_lastNoIncorrectPicOutputFlag(false) + , m_sliceLmcsApsId(-1) , m_pDecodedSEIOutputStream(NULL) , m_decodedPictureHashSEIEnabled(false) , m_numberOfChecksumErrorsDetected(0) @@ -402,6 +416,9 @@ DecLib::DecLib() , m_prefixSEINALUs() , m_debugPOC( -1 ) , m_debugCTU( -1 ) + , m_vps( nullptr ) + , m_scalingListUpdateFlag(true) + , m_PreScalingListAPSId(-1) { #if ENABLE_SIMD_OPT_BUFFER g_pelBufOP.initPelBufOpsX86(); @@ -415,6 +432,7 @@ DecLib::~DecLib() delete m_prefixSEINALUs.front(); m_prefixSEINALUs.pop_front(); } + } void DecLib::create() @@ -470,7 +488,7 @@ void DecLib::deletePicBuffer ( ) m_cReshaper.destroy(); } -Picture* DecLib::xGetNewPicBuffer ( const SPS &sps, const PPS &pps, const uint32_t temporalLayer ) +Picture* DecLib::xGetNewPicBuffer( const SPS &sps, const PPS &pps, const uint32_t temporalLayer, const int layerId ) { Picture * pcPic = nullptr; m_iMaxRefPicNum = sps.getMaxDecPicBuffering(temporalLayer); // m_uiMaxDecPicBuffering has the space for the picture currently being decoded @@ -478,7 +496,7 @@ Picture* DecLib::xGetNewPicBuffer ( const SPS &sps, const PPS &pps, const uint32 { pcPic = new Picture(); - pcPic->create( sps.getChromaFormatIdc(), Size( sps.getPicWidthInLumaSamples(), sps.getPicHeightInLumaSamples() ), sps.getMaxCUWidth(), sps.getMaxCUWidth() + 16, true ); + pcPic->create( sps.getChromaFormatIdc(), Size( pps.getPicWidthInLumaSamples(), pps.getPicHeightInLumaSamples() ), sps.getMaxCUWidth(), sps.getMaxCUWidth() + 16, true, layerId ); m_cListPic.push_back( pcPic ); @@ -514,14 +532,14 @@ Picture* DecLib::xGetNewPicBuffer ( const SPS &sps, const PPS &pps, const uint32 m_cListPic.push_back( pcPic ); - pcPic->create( sps.getChromaFormatIdc(), Size( sps.getPicWidthInLumaSamples(), sps.getPicHeightInLumaSamples() ), sps.getMaxCUWidth(), sps.getMaxCUWidth() + 16, true ); + pcPic->create( sps.getChromaFormatIdc(), Size( pps.getPicWidthInLumaSamples(), pps.getPicHeightInLumaSamples() ), sps.getMaxCUWidth(), sps.getMaxCUWidth() + 16, true, layerId ); } else { - if( !pcPic->Y().Size::operator==( Size( sps.getPicWidthInLumaSamples(), sps.getPicHeightInLumaSamples() ) ) || pcPic->cs->pcv->maxCUWidth != sps.getMaxCUWidth() || pcPic->cs->pcv->maxCUHeight != sps.getMaxCUHeight() ) + if( !pcPic->Y().Size::operator==( Size( pps.getPicWidthInLumaSamples(), pps.getPicHeightInLumaSamples() ) ) || pps.pcv->maxCUWidth != sps.getMaxCUWidth() || pps.pcv->maxCUHeight != sps.getMaxCUHeight() ) { pcPic->destroy(); - pcPic->create( sps.getChromaFormatIdc(), Size( sps.getPicWidthInLumaSamples(), sps.getPicHeightInLumaSamples() ), sps.getMaxCUWidth(), sps.getMaxCUWidth() + 16, true ); + pcPic->create( sps.getChromaFormatIdc(), Size( pps.getPicWidthInLumaSamples(), pps.getPicHeightInLumaSamples() ), sps.getMaxCUWidth(), sps.getMaxCUWidth() + 16, true, layerId ); } } @@ -540,9 +558,11 @@ void DecLib::executeLoopFilters() return; // nothing to deblock } + m_pcPic->cs->slice->startProcessingTimer(); + CodingStructure& cs = *m_pcPic->cs; - if (cs.sps->getUseReshaper() && m_cReshaper.getSliceReshaperInfo().getUseSliceReshaper()) + if (cs.sps->getUseLmcs() && m_cReshaper.getSliceReshaperInfo().getUseSliceReshaper()) { CHECK((m_cReshaper.getRecReshaped() == false), "Rec picture is not reshaped!"); m_pcPic->getRecoBuf(COMPONENT_Y).rspSignal(m_cReshaper.getInvLUT()); @@ -559,24 +579,21 @@ void DecLib::executeLoopFilters() if( cs.sps->getALFEnabledFlag() ) { - if (cs.slice->getTileGroupAlfEnabledFlag()) - { // ALF decodes the differentially coded coefficients and stores them in the parameters structure. // Code could be restructured to do directly after parsing. So far we just pass a fresh non-const // copy in case the APS gets used more than once. - - AlfSliceParam alfParamCopy = cs.aps->getAlfAPSParam(); - m_cALF.ALFProcess(cs, alfParamCopy); - } + m_cALF.ALFProcess(cs); } + + m_pcPic->cs->slice->stopProcessingTimer(); } void DecLib::finishPictureLight(int& poc, PicList*& rpcListPic ) { Slice* pcSlice = m_pcPic->cs->slice; - m_pcPic->neededForOutput = (pcSlice->getPicOutputFlag() ? true : false); + m_pcPic->neededForOutput = (pcSlice->getPicHeader()->getPicOutputFlag() ? true : false); m_pcPic->reconstructed = true; Slice::sortPicList( m_cListPic ); // sorting for application output @@ -600,8 +617,10 @@ void DecLib::finishPicture(int& poc, PicList*& rpcListPic, MsgLevel msgl ) c += 32; // tolower } + if (pcSlice->isDRAP()) c = 'D'; + //-- For time output for each slice - msg( msgl, "POC %4d TId: %1d ( %c-SLICE, QP%3d ) ", pcSlice->getPOC(), + msg( msgl, "POC %4d LId: %2d TId: %1d ( %c-SLICE, QP%3d ) ", pcSlice->getPOC(), pcSlice->getPic()->layerId, pcSlice->getTLayer(), c, pcSlice->getSliceQp() ); @@ -612,7 +631,29 @@ void DecLib::finishPicture(int& poc, PicList*& rpcListPic, MsgLevel msgl ) msg( msgl, "[L%d ", iRefList); for (int iRefIndex = 0; iRefIndex < pcSlice->getNumRefIdx(RefPicList(iRefList)); iRefIndex++) { - msg( msgl, "%d ", pcSlice->getRefPOC(RefPicList(iRefList), iRefIndex)); + const std::pair<int, int>& scaleRatio = pcSlice->getScalingRatio( RefPicList( iRefList ), iRefIndex ); + + if( pcSlice->getPicHeader()->getEnableTMVPFlag() && pcSlice->getColFromL0Flag() == bool(1 - iRefList) && pcSlice->getColRefIdx() == iRefIndex ) + { + if ( scaleRatio.first != 1 << SCALE_RATIO_BITS || scaleRatio.second != 1 << SCALE_RATIO_BITS ) + msg( msgl, "%dc(%1.2lfx, %1.2lfx) ", pcSlice->getRefPOC( RefPicList( iRefList ), iRefIndex ), double( scaleRatio.first ) / ( 1 << SCALE_RATIO_BITS ), double( scaleRatio.second ) / ( 1 << SCALE_RATIO_BITS ) ); + else + msg( msgl, "%dc ", pcSlice->getRefPOC( RefPicList( iRefList ), iRefIndex ) ); + } + else + { + if ( scaleRatio.first != 1 << SCALE_RATIO_BITS || scaleRatio.second != 1 << SCALE_RATIO_BITS ) + msg( msgl, "%d(%1.2lfx, %1.2lfx) ", pcSlice->getRefPOC( RefPicList( iRefList ), iRefIndex ), double( scaleRatio.first ) / ( 1 << SCALE_RATIO_BITS ), double( scaleRatio.second ) / ( 1 << SCALE_RATIO_BITS ) ); + else + msg( msgl, "%d ", pcSlice->getRefPOC( RefPicList( iRefList ), iRefIndex ) ); + } + + if( pcSlice->getRefPOC( RefPicList( iRefList ), iRefIndex ) == pcSlice->getPOC() ) + { + msg( msgl, ".%d", pcSlice->getRefPic( RefPicList( iRefList ), iRefIndex )->layerId ); + } + + msg( msgl, " " ); } msg( msgl, "] "); } @@ -629,7 +670,13 @@ void DecLib::finishPicture(int& poc, PicList*& rpcListPic, MsgLevel msgl ) msg( msgl, "\n"); - m_pcPic->neededForOutput = (pcSlice->getPicOutputFlag() ? true : false); +#if JVET_J0090_MEMORY_BANDWITH_MEASURE + m_cacheModel.reportFrame(); + m_cacheModel.accumulateFrame(); + m_cacheModel.clear(); +#endif + + m_pcPic->neededForOutput = (pcSlice->getPicHeader()->getPicOutputFlag() ? true : false); m_pcPic->reconstructed = true; @@ -641,6 +688,7 @@ void DecLib::finishPicture(int& poc, PicList*& rpcListPic, MsgLevel msgl ) m_pcPic->destroyTempBuffers(); m_pcPic->cs->destroyCoeffs(); m_pcPic->cs->releaseIntermediateData(); + m_pcPic->cs->picHeader->initPicHeader(); } void DecLib::checkNoOutputPriorPics (PicList* pcListPic) @@ -676,10 +724,10 @@ void DecLib::xUpdateRasInit(Slice* slice) } } -void DecLib::xCreateLostPicture(int iLostPoc) +void DecLib::xCreateLostPicture( int iLostPoc, const int layerId ) { msg( INFO, "\ninserting lost poc : %d\n",iLostPoc); - Picture *cFillPic = xGetNewPicBuffer(*(m_parameterSetManager.getFirstSPS()), *(m_parameterSetManager.getFirstPPS()), 0); + Picture *cFillPic = xGetNewPicBuffer( *( m_parameterSetManager.getFirstSPS() ), *( m_parameterSetManager.getFirstPPS() ), 0, layerId ); CHECK( !cFillPic->slices.size(), "No slices in picture" ); @@ -720,42 +768,197 @@ void DecLib::xCreateLostPicture(int iLostPoc) } +void DecLib::xCreateUnavailablePicture(int iUnavailablePoc, bool longTermFlag, const int layerId, const bool interLayerRefPicFlag) +{ + msg(INFO, "\ninserting unavailable poc : %d\n", iUnavailablePoc); + Picture* cFillPic = xGetNewPicBuffer( *( m_parameterSetManager.getFirstSPS() ), *( m_parameterSetManager.getFirstPPS() ), 0, layerId ); + + CHECK(!cFillPic->slices.size(), "No slices in picture"); + + cFillPic->slices[0]->initSlice(); + + uint32_t yFill = 1 << (m_parameterSetManager.getFirstSPS()->getBitDepth(CHANNEL_TYPE_LUMA) - 1); + uint32_t cFill = 1 << (m_parameterSetManager.getFirstSPS()->getBitDepth(CHANNEL_TYPE_CHROMA) - 1); + cFillPic->getRecoBuf().Y().fill(yFill); + cFillPic->getRecoBuf().Cb().fill(cFill); + cFillPic->getRecoBuf().Cr().fill(cFill); + + // for(int ctuRsAddr=0; ctuRsAddr<cFillPic->getNumberOfCtusInFrame(); ctuRsAddr++) { cFillPic->getCtu(ctuRsAddr)->initCtu(cFillPic, ctuRsAddr); } + cFillPic->referenced = true; + cFillPic->interLayerRefPicFlag = interLayerRefPicFlag; + cFillPic->longTerm = longTermFlag; + cFillPic->slices[0]->setPOC(iUnavailablePoc); + xUpdatePreviousTid0POC(cFillPic->slices[0]); + cFillPic->reconstructed = true; + cFillPic->neededForOutput = false; + if (m_pocRandomAccess == MAX_INT) + { + m_pocRandomAccess = iUnavailablePoc; + } + +} -void DecLib::xActivateParameterSets() +/** + - Determine if the first VCL NAL unit of a picture is also the first VCL NAL of an Access Unit + */ +bool DecLib::isSliceNaluFirstInAU( bool newPicture, InputNALUnit &nalu ) { - if (m_bFirstSliceInPicture) + // can only be the start of an AU if this is the start of a new picture + if( newPicture == false ) + { + return false; + } + + // should only be called for slice NALU types + if( nalu.m_nalUnitType != NAL_UNIT_CODED_SLICE_TRAIL && + nalu.m_nalUnitType != NAL_UNIT_CODED_SLICE_STSA && + nalu.m_nalUnitType != NAL_UNIT_CODED_SLICE_RASL && + nalu.m_nalUnitType != NAL_UNIT_CODED_SLICE_RADL && + nalu.m_nalUnitType != NAL_UNIT_CODED_SLICE_IDR_W_RADL && + nalu.m_nalUnitType != NAL_UNIT_CODED_SLICE_IDR_N_LP && + nalu.m_nalUnitType != NAL_UNIT_CODED_SLICE_CRA && + nalu.m_nalUnitType != NAL_UNIT_CODED_SLICE_GDR ) + { + return false; + } + + // check for valid picture header + if(m_picHeader.isValid() == false) + { + return false; + } + + // check for layer ID less than or equal to previous picture's layer ID + if( nalu.m_nuhLayerId <= m_prevLayerID ) + { + return true; + } + + // get slice POC + m_apcSlicePilot->setPicHeader( &m_picHeader ); + m_apcSlicePilot->initSlice(); + m_HLSReader.setBitstream( &nalu.getBitstream() ); + m_HLSReader.parseSliceHeaderToPoc( m_apcSlicePilot, &m_picHeader, &m_parameterSetManager, m_prevTid0POC ); + + // check for different POC + return (m_apcSlicePilot->getPOC() != m_prevPOC); +} + +void activateAPS(PicHeader* picHeader, Slice* pSlice, ParameterSetManager& parameterSetManager, APS** apss, APS* lmcsAPS, APS* scalingListAPS) +{ + //luma APSs + if (pSlice->getTileGroupAlfEnabledFlag(COMPONENT_Y)) + { + for (int i = 0; i < pSlice->getTileGroupApsIdLuma().size(); i++) + { + int apsId = pSlice->getTileGroupApsIdLuma()[i]; + APS* aps = parameterSetManager.getAPS(apsId, ALF_APS); + + if (aps) + { + apss[apsId] = aps; + if (false == parameterSetManager.activateAPS(apsId, ALF_APS)) + { + THROW("APS activation failed!"); + } + + CHECK( aps->getTemporalId() > pSlice->getTLayer(), "TemporalId shall be less than or equal to the TemporalId of the coded slice NAL unit" ); + //ToDO: APS NAL unit containing the APS RBSP shall have nuh_layer_id either equal to the nuh_layer_id of a coded slice NAL unit that referrs it, or equal to the nuh_layer_id of a direct dependent layer of the layer containing a coded slice NAL unit that referrs it. + } + } + } + if (pSlice->getTileGroupAlfEnabledFlag(COMPONENT_Cb)||pSlice->getTileGroupAlfEnabledFlag(COMPONENT_Cr) ) + { + //chroma APS + int apsId = pSlice->getTileGroupApsIdChroma(); + APS* aps = parameterSetManager.getAPS(apsId, ALF_APS); + if (aps) + { + apss[apsId] = aps; + if (false == parameterSetManager.activateAPS(apsId, ALF_APS)) + { + THROW("APS activation failed!"); + } + + CHECK( aps->getTemporalId() > pSlice->getTLayer(), "TemporalId shall be less than or equal to the TemporalId of the coded slice NAL unit" ); + //ToDO: APS NAL unit containing the APS RBSP shall have nuh_layer_id either equal to the nuh_layer_id of a coded slice NAL unit that referrs it, or equal to the nuh_layer_id of a direct dependent layer of the layer containing a coded slice NAL unit that referrs it. + } + } + + if (picHeader->getLmcsEnabledFlag() && lmcsAPS == nullptr) + { + lmcsAPS = parameterSetManager.getAPS(picHeader->getLmcsAPSId(), LMCS_APS); + CHECK(lmcsAPS == nullptr, "No LMCS APS present"); + if (lmcsAPS) + { + parameterSetManager.clearAPSChangedFlag(picHeader->getLmcsAPSId(), LMCS_APS); + if (false == parameterSetManager.activateAPS(picHeader->getLmcsAPSId(), LMCS_APS)) + { + THROW("LMCS APS activation failed!"); + } + + CHECK( lmcsAPS->getTemporalId() > pSlice->getTLayer(), "TemporalId shall be less than or equal to the TemporalId of the coded slice NAL unit" ); + //ToDO: APS NAL unit containing the APS RBSP shall have nuh_layer_id either equal to the nuh_layer_id of a coded slice NAL unit that referrs it, or equal to the nuh_layer_id of a direct dependent layer of the layer containing a coded slice NAL unit that referrs it. + } + } + picHeader->setLmcsAPS(lmcsAPS); + + if( picHeader->getScalingListPresentFlag() && scalingListAPS == nullptr) { - APS *aps = m_parameterSetManager.getAPS(m_apcSlicePilot->getAPSId()); // this is a temporary APS object. Do not store this value - if (m_apcSlicePilot->getAPSId() != -1) + scalingListAPS = parameterSetManager.getAPS( picHeader->getScalingListAPSId(), SCALING_LIST_APS ); + CHECK( scalingListAPS == nullptr, "No SCALING LIST APS present" ); + if( scalingListAPS ) { - CHECK(aps == 0, "No APS present"); + parameterSetManager.clearAPSChangedFlag( picHeader->getScalingListAPSId(), SCALING_LIST_APS ); + if( false == parameterSetManager.activateAPS( picHeader->getScalingListAPSId(), SCALING_LIST_APS ) ) + { + THROW( "SCALING LIST APS activation failed!" ); + } + + CHECK( scalingListAPS->getTemporalId() > pSlice->getTLayer(), "TemporalId shall be less than or equal to the TemporalId of the coded slice NAL unit" ); + //ToDO: APS NAL unit containing the APS RBSP shall have nuh_layer_id either equal to the nuh_layer_id of a coded slice NAL unit that referrs it, or equal to the nuh_layer_id of a direct dependent layer of the layer containing a coded slice NAL unit that referrs it. } - const PPS *pps = m_parameterSetManager.getPPS(m_apcSlicePilot->getPPSId()); // this is a temporary PPS object. Do not store this value + } + picHeader->setScalingListAPS(scalingListAPS); +} + +void DecLib::xActivateParameterSets( const int layerId ) +{ + if (m_bFirstSliceInPicture) + { + APS** apss = m_parameterSetManager.getAPSs(); + memset(apss, 0, sizeof(*apss) * ALF_CTB_MAX_NUM_APS); + const PPS *pps = m_parameterSetManager.getPPS(m_picHeader.getPPSId()); // this is a temporary PPS object. Do not store this value CHECK(pps == 0, "No PPS present"); const SPS *sps = m_parameterSetManager.getSPS(pps->getSPSId()); // this is a temporary SPS object. Do not store this value CHECK(sps == 0, "No SPS present"); + const VPS *vps = sps->getVPSId() ? m_parameterSetManager.getVPS( sps->getVPSId() ) : nullptr; + if (NULL == pps->pcv) { - m_parameterSetManager.getPPS( m_apcSlicePilot->getPPSId() )->pcv = new PreCalcValues( *sps, *pps, false ); + m_parameterSetManager.getPPS( m_picHeader.getPPSId() )->pcv = new PreCalcValues( *sps, *pps, false ); } m_parameterSetManager.clearSPSChangedFlag(sps->getSPSId()); m_parameterSetManager.clearPPSChangedFlag(pps->getPPSId()); - if (false == m_parameterSetManager.activatePPS(m_apcSlicePilot->getPPSId(),m_apcSlicePilot->isIRAP())) + if (false == m_parameterSetManager.activatePPS(m_picHeader.getPPSId(),m_apcSlicePilot->isIRAP())) { THROW("Parameter set activation failed!"); } - - if (aps) + m_parameterSetManager.getApsMap()->clear(); + for (int i = 0; i < ALF_CTB_MAX_NUM_APS; i++) { - m_parameterSetManager.clearAPSChangedFlag(aps->getAPSId()); - if (false == m_parameterSetManager.activateAPS(m_apcSlicePilot->getAPSId())) + APS* aps = m_parameterSetManager.getAPS(i, ALF_APS); + if (aps) { - THROW("APS activation failed!"); + m_parameterSetManager.clearAPSChangedFlag(i, ALF_APS); } } + APS* lmcsAPS = nullptr; + APS* scalinglistAPS = nullptr; + activateAPS(&m_picHeader, m_apcSlicePilot, m_parameterSetManager, apss, lmcsAPS, scalinglistAPS); xParsePrefixSEImessages(); @@ -767,14 +970,12 @@ void DecLib::xActivateParameterSets() #endif // Get a new picture buffer. This will also set up m_pcPic, and therefore give us a SPS and PPS pointer that we can use. - m_pcPic = xGetNewPicBuffer (*sps, *pps, m_apcSlicePilot->getTLayer()); - - m_apcSlicePilot->applyReferencePictureSet(m_cListPic, m_apcSlicePilot->getRPS()); - - m_pcPic->finalInit(*sps, *pps, *aps); + m_pcPic = xGetNewPicBuffer( *sps, *pps, m_apcSlicePilot->getTLayer(), layerId ); + m_apcSlicePilot->applyReferencePictureListBasedMarking( m_cListPic, m_apcSlicePilot->getRPL0(), m_apcSlicePilot->getRPL1(), layerId ); + m_pcPic->finalInit( vps, *sps, *pps, &m_picHeader, apss, lmcsAPS, scalinglistAPS ); m_pcPic->createTempBuffers( m_pcPic->cs->pps->pcv->maxCUWidth ); - m_pcPic->cs->createCoeffs(); + m_pcPic->cs->createCoeffs((bool)m_pcPic->cs->sps->getPLTMode()); m_pcPic->allocateNewSlice(); // make the slice-pilot a real slice, and set up the slice-pilot for the next slice @@ -785,7 +986,6 @@ void DecLib::xActivateParameterSets() Slice *pSlice = m_pcPic->slices[m_uiSliceSegmentIdx]; // Update the PPS and SPS pointers with the ones of the picture. - aps= pSlice->getAPS(); pps=pSlice->getPPS(); sps=pSlice->getSPS(); @@ -793,18 +993,20 @@ void DecLib::xActivateParameterSets() m_pcPic->cs->slice = pSlice; m_pcPic->cs->sps = sps; m_pcPic->cs->pps = pps; - m_pcPic->cs->aps = aps; -#if HEVC_VPS - m_pcPic->cs->vps = pSlice->getVPS(); -#endif + m_pcPic->cs->vps = vps; + + memcpy(m_pcPic->cs->alfApss, apss, sizeof(m_pcPic->cs->alfApss)); + m_pcPic->cs->lmcsAps = lmcsAPS; + m_pcPic->cs->scalinglistAps = scalinglistAPS; + m_pcPic->cs->pcv = pps->pcv; // Initialise the various objects for the new set of settings - m_cSAO.create( sps->getPicWidthInLumaSamples(), sps->getPicHeightInLumaSamples(), sps->getChromaFormatIdc(), sps->getMaxCUWidth(), sps->getMaxCUHeight(), sps->getMaxCodingDepth(), pps->getPpsRangeExtension().getLog2SaoOffsetScale(CHANNEL_TYPE_LUMA), pps->getPpsRangeExtension().getLog2SaoOffsetScale(CHANNEL_TYPE_CHROMA) ); + m_cSAO.create( pps->getPicWidthInLumaSamples(), pps->getPicHeightInLumaSamples(), sps->getChromaFormatIdc(), sps->getMaxCUWidth(), sps->getMaxCUHeight(), sps->getMaxCodingDepth(), pps->getPpsRangeExtension().getLog2SaoOffsetScale( CHANNEL_TYPE_LUMA ), pps->getPpsRangeExtension().getLog2SaoOffsetScale( CHANNEL_TYPE_CHROMA ) ); m_cLoopFilter.create( sps->getMaxCodingDepth() ); m_cIntraPred.init( sps->getChromaFormatIdc(), sps->getBitDepth( CHANNEL_TYPE_LUMA ) ); - m_cInterPred.init( &m_cRdCost, sps->getChromaFormatIdc() ); - if (sps->getUseReshaper()) + m_cInterPred.init( &m_cRdCost, sps->getChromaFormatIdc(), sps->getMaxCUHeight() ); + if (sps->getUseLmcs()) { m_cReshaper.createDec(sps->getBitDepth(CHANNEL_TYPE_LUMA)); } @@ -814,13 +1016,13 @@ void DecLib::xActivateParameterSets() if(!m_SEIs.empty()) { - // Check if any new Picture Timing SEI has arrived - SEIMessages pictureTimingSEIs = getSeisByType(m_SEIs, SEI::PICTURE_TIMING); - if (pictureTimingSEIs.size()>0) + // Check if any new Frame Field Info SEI has arrived + SEIMessages frameFieldSEIs = getSeisByType(m_SEIs, SEI::FRAME_FIELD_INFO); + if (frameFieldSEIs.size()>0) { - SEIPictureTiming* pictureTiming = (SEIPictureTiming*) *(pictureTimingSEIs.begin()); - isField = (pictureTiming->m_picStruct == 1) || (pictureTiming->m_picStruct == 2) || (pictureTiming->m_picStruct == 9) || (pictureTiming->m_picStruct == 10) || (pictureTiming->m_picStruct == 11) || (pictureTiming->m_picStruct == 12); - isTopField = (pictureTiming->m_picStruct == 1) || (pictureTiming->m_picStruct == 9) || (pictureTiming->m_picStruct == 11); + SEIFrameFieldInfo* ff = (SEIFrameFieldInfo*) *(frameFieldSEIs.begin()); + isField = ff->m_fieldPicFlag; + isTopField = isField && (!ff->m_bottomFieldFlag); } } @@ -834,15 +1036,11 @@ void DecLib::xActivateParameterSets() // Recursive structure m_cCuDecoder.init( &m_cTrQuant, &m_cIntraPred, &m_cInterPred ); - if (sps->getUseReshaper()) + if (sps->getUseLmcs()) { m_cCuDecoder.initDecCuReshaper(&m_cReshaper, sps->getChromaFormatIdc()); } -#if MAX_TB_SIZE_SIGNALLING - m_cTrQuant.init( nullptr, sps->getMaxTbSize(), false, false, false, false, false ); -#else - m_cTrQuant.init( nullptr, MAX_TB_SIZEY, false, false, false, false, false ); -#endif + m_cTrQuant.init(m_cTrQuantScalingList.getQuant(), sps->getMaxTbSize(), false, false, false, false); // RdCost m_cRdCost.setCostMode ( COST_STANDARD_LOSSY ); // not used in decoder side RdCost stuff -> set to default @@ -851,7 +1049,7 @@ void DecLib::xActivateParameterSets() if( sps->getALFEnabledFlag() ) { - m_cALF.create( sps->getPicWidthInLumaSamples(), sps->getPicHeightInLumaSamples(), sps->getChromaFormatIdc(), sps->getMaxCUWidth(), sps->getMaxCUHeight(), sps->getMaxCodingDepth(), sps->getBitDepths().recon ); + m_cALF.create( pps->getPicWidthInLumaSamples(), pps->getPicHeightInLumaSamples(), sps->getChromaFormatIdc(), sps->getMaxCUWidth(), sps->getMaxCUHeight(), sps->getMaxCodingDepth(), sps->getBitDepths().recon ); } } else @@ -865,15 +1063,18 @@ void DecLib::xActivateParameterSets() const SPS *sps = pSlice->getSPS(); const PPS *pps = pSlice->getPPS(); - APS *aps = pSlice->getAPS(); + APS** apss = pSlice->getAlfAPSs(); + APS *lmcsAPS = m_picHeader.getLmcsAPS(); + APS *scalinglistAPS = m_picHeader.getScalingListAPS(); + // fix Parameter Sets, now that we have the real slice m_pcPic->cs->slice = pSlice; m_pcPic->cs->sps = sps; m_pcPic->cs->pps = pps; - m_pcPic->cs->aps = aps; -#if HEVC_VPS - m_pcPic->cs->vps = pSlice->getVPS(); -#endif + memcpy(m_pcPic->cs->alfApss, apss, sizeof(m_pcPic->cs->alfApss)); + m_pcPic->cs->lmcsAps = lmcsAPS; + m_pcPic->cs->scalinglistAps = scalinglistAPS; + m_pcPic->cs->pcv = pps->pcv; // check that the current active PPS has not changed... @@ -885,11 +1086,29 @@ void DecLib::xActivateParameterSets() { EXIT("Error - a new PPS has been decoded while processing a picture"); } - if (aps && m_parameterSetManager.getAPSChangedFlag(aps->getAPSId())) + for (int i = 0; i < ALF_CTB_MAX_NUM_APS; i++) + { + APS* aps = m_parameterSetManager.getAPS(i, ALF_APS); + if (aps && m_parameterSetManager.getAPSChangedFlag(i, ALF_APS)) + { + EXIT("Error - a new APS has been decoded while processing a picture"); + } + } + + if (lmcsAPS && m_parameterSetManager.getAPSChangedFlag(lmcsAPS->getAPSId(), LMCS_APS) ) + { + EXIT("Error - a new LMCS APS has been decoded while processing a picture"); + } + if( scalinglistAPS && m_parameterSetManager.getAPSChangedFlag( scalinglistAPS->getAPSId(), SCALING_LIST_APS ) ) { - EXIT("Error - a new APS has been decoded while processing a picture"); + EXIT( "Error - a new SCALING LIST APS has been decoded while processing a picture" ); } + activateAPS(&m_picHeader, pSlice, m_parameterSetManager, apss, lmcsAPS, scalinglistAPS); + + m_pcPic->cs->lmcsAps = lmcsAPS; + m_pcPic->cs->scalinglistAps = scalinglistAPS; + xParsePrefixSEImessages(); // Check if any new SEI has arrived @@ -902,6 +1121,36 @@ void DecLib::xActivateParameterSets() deleteSEIs(m_SEIs); } } + + // Conformance checks + Slice *pSlice = m_pcPic->slices[m_uiSliceSegmentIdx]; + const SPS *sps = pSlice->getSPS(); + const PPS *pps = pSlice->getPPS(); + + if( !sps->getUseWP() ) + { + CHECK( pps->getUseWP(), "When sps_weighted_pred_flag is equal to 0, the value of pps_weighted_pred_flag shall be equal to 0." ); + } + + if( !sps->getUseWPBiPred() ) + { + CHECK( pps->getWPBiPred(), "When sps_weighted_bipred_flag is equal to 0, the value of pps_weighted_bipred_flag shall be equal to 0." ); + } + + CHECK( ( pps->getPicWidthInLumaSamples() % ( std::max( 8, int( sps->getMaxCUWidth() >> ( sps->getMaxCodingDepth() - 1 ) ) ) ) ) != 0, "Coded frame width must be a multiple of Max(8, the minimum unit size)" ); + CHECK( ( pps->getPicHeightInLumaSamples() % ( std::max( 8, int( sps->getMaxCUHeight() >> ( sps->getMaxCodingDepth() - 1 ) ) ) ) ) != 0, "Coded frame height must be a multiple of Max(8, the minimum unit size)" ); + if( !sps->getRprEnabledFlag() ) // subpics_present_flag is equal to 1 condition shall be added + { + CHECK( pps->getPicWidthInLumaSamples() != sps->getMaxPicWidthInLumaSamples(), "When subpics_present_flag is equal to 1 or ref_pic_resampling_enabled_flag equal to 0, the value of pic_width_in_luma_samples shall be equal to pic_width_max_in_luma_samples." ); + CHECK( pps->getPicHeightInLumaSamples() != sps->getMaxPicHeightInLumaSamples(), "When subpics_present_flag is equal to 1 or ref_pic_resampling_enabled_flag equal to 0, the value of pic_height_in_luma_samples shall be equal to pic_height_max_in_luma_samples." ); + } + + CHECK( !sps->getRprEnabledFlag() && pps->getScalingWindow().getWindowEnabledFlag(), "When ref_pic_resampling_enabled_flag is equal to 0, the value of scaling_window_flag shall be equal to 0." ); + + if( sps->getCTUSize() + 2 * ( 1 << sps->getLog2MinCodingBlockSize() ) > pps->getPicWidthInLumaSamples() ) + { + CHECK( sps->getWrapAroundEnabledFlag(), "Wraparound shall be disabled when the value of ( CtbSizeY / MinCbSizeY + 1) is less than or equal to ( pic_width_in_luma_samples / MinCbSizeY - 1 )" ); + } } @@ -922,62 +1171,72 @@ void DecLib::xParsePrefixSEImessages() while (!m_prefixSEINALUs.empty()) { InputNALUnit &nalu=*m_prefixSEINALUs.front(); - m_seiReader.parseSEImessage( &(nalu.getBitstream()), m_SEIs, nalu.m_nalUnitType, m_parameterSetManager.getActiveSPS(), m_pDecodedSEIOutputStream ); + m_seiReader.parseSEImessage( &(nalu.getBitstream()), m_SEIs, nalu.m_nalUnitType, nalu.m_temporalId, m_parameterSetManager.getActiveSPS(), m_HRD, m_pDecodedSEIOutputStream ); delete m_prefixSEINALUs.front(); m_prefixSEINALUs.pop_front(); } } +void DecLib::xDecodePicHeader( InputNALUnit& nalu ) +{ + m_HLSReader.setBitstream( &nalu.getBitstream() ); + m_HLSReader.parsePictureHeader( &m_picHeader, &m_parameterSetManager); + m_picHeader.setValid(); +} bool DecLib::xDecodeSlice(InputNALUnit &nalu, int &iSkipFrame, int iPOCLastDisplay ) { + if(m_picHeader.isValid() == false) { + return false; + } + m_apcSlicePilot->setPicHeader( &m_picHeader ); m_apcSlicePilot->initSlice(); // the slice pilot is an object to prepare for a new slice // it is not associated with picture, sps or pps structures. + Picture* scaledRefPic[MAX_NUM_REF] = {}; + if (m_bFirstSliceInPicture) { m_uiSliceSegmentIdx = 0; } else { + CHECK(nalu.m_nalUnitType != m_pcPic->slices[m_uiSliceSegmentIdx - 1]->getNalUnitType(), "The value of NAL unit type shall be the same for all coded slice NAL units of a picture"); m_apcSlicePilot->copySliceInfo( m_pcPic->slices[m_uiSliceSegmentIdx-1] ); } -#if HEVC_DEPENDENT_SLICES - m_apcSlicePilot->setSliceSegmentIdx(m_uiSliceSegmentIdx); -#endif m_apcSlicePilot->setNalUnitType(nalu.m_nalUnitType); -#if !JVET_M0101_HLS - bool nonReferenceFlag = (m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_TRAIL_N || - m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_TSA_N || - m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_STSA_N || - m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_RADL_N || - m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_RASL_N); - m_apcSlicePilot->setTemporalLayerNonReferenceFlag(nonReferenceFlag); -#endif m_apcSlicePilot->setTLayer(nalu.m_temporalId); + for( auto& naluTemporalId : m_accessUnitNals ) + { + if( naluTemporalId.first != NAL_UNIT_DPS + && naluTemporalId.first != NAL_UNIT_VPS + && naluTemporalId.first != NAL_UNIT_SPS + && naluTemporalId.first != NAL_UNIT_EOS + && naluTemporalId.first != NAL_UNIT_EOB ) + { + CHECK( naluTemporalId.second < nalu.m_temporalId, "TemporalId shall be greater than or equal to the TemporalId of the layer access unit containing the NAL unit" ); + } + } + + if (nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_GDR) + CHECK(nalu.m_temporalId != 0, "Current GDR picture has TemporalId not equal to 0"); + m_HLSReader.setBitstream( &nalu.getBitstream() ); - m_HLSReader.parseSliceHeader( m_apcSlicePilot, &m_parameterSetManager, m_prevTid0POC ); + m_HLSReader.parseSliceHeader( m_apcSlicePilot, &m_picHeader, &m_parameterSetManager, m_prevTid0POC ); // update independent slice index uint32_t uiIndependentSliceIdx = 0; if (!m_bFirstSliceInPicture) { uiIndependentSliceIdx = m_pcPic->slices[m_uiSliceSegmentIdx-1]->getIndependentSliceIdx(); -#if HEVC_DEPENDENT_SLICES - if (!m_apcSlicePilot->getDependentSliceSegmentFlag()) - { -#endif uiIndependentSliceIdx++; -#if HEVC_DEPENDENT_SLICES - } -#endif } m_apcSlicePilot->setIndependentSliceIdx(uiIndependentSliceIdx); #if K0149_BLOCK_STATISTICS - PPS *pps = m_parameterSetManager.getPPS(m_apcSlicePilot->getPPSId()); + PPS *pps = m_parameterSetManager.getPPS(m_picHeader.getPPSId()); CHECK(pps == 0, "No PPS present"); SPS *sps = m_parameterSetManager.getSPS(pps->getSPSId()); CHECK(sps == 0, "No SPS present"); @@ -987,13 +1246,14 @@ bool DecLib::xDecodeSlice(InputNALUnit &nalu, int &iSkipFrame, int iPOCLastDispl DTRACE_UPDATE( g_trace_ctx, std::make_pair( "poc", m_apcSlicePilot->getPOC() ) ); -#if HEVC_DEPENDENT_SLICES - // set POC for dependent slices in skipped pictures - if(m_apcSlicePilot->getDependentSliceSegmentFlag() && m_prevSliceSkipped) - { - m_apcSlicePilot->setPOC(m_skippedPOC); - } -#endif + if ((m_bFirstSliceInPicture || + m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_CRA || + m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_GDR) && + getNoOutputPriorPicsFlag()) + { + checkNoOutputPriorPics(&m_cListPic); + setNoOutputPriorPicsFlag (false); + } xUpdatePreviousTid0POC(m_apcSlicePilot); @@ -1001,38 +1261,35 @@ bool DecLib::xDecodeSlice(InputNALUnit &nalu, int &iSkipFrame, int iPOCLastDispl m_apcSlicePilot->setAssociatedIRAPType(m_associatedIRAPType); //For inference of NoOutputOfPriorPicsFlag - if (m_apcSlicePilot->getRapPicFlag()) + if (m_apcSlicePilot->getRapPicFlag() || m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_GDR) { -#if !JVET_M0101_HLS - if ((m_apcSlicePilot->getNalUnitType() >= NAL_UNIT_CODED_SLICE_BLA_W_LP && m_apcSlicePilot->getNalUnitType() <= NAL_UNIT_CODED_SLICE_IDR_N_LP) || - (m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_CRA && m_bFirstSliceInSequence) || - (m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_CRA && m_apcSlicePilot->getHandleCraAsCvsStartFlag())) -#else if ((m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_CRA && m_bFirstSliceInSequence) || - (m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_CRA && m_apcSlicePilot->getHandleCraAsCvsStartFlag())) -#endif + (m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_CRA && m_apcSlicePilot->getHandleCraAsCvsStartFlag()) || + (m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_GDR && m_bFirstSliceInSequence)) { - m_apcSlicePilot->setNoRaslOutputFlag(true); + m_apcSlicePilot->setNoIncorrectPicOutputFlag(true); } //the inference for NoOutputPriorPicsFlag - if (!m_bFirstSliceInBitstream && m_apcSlicePilot->getRapPicFlag() && m_apcSlicePilot->getNoRaslOutputFlag()) + if (!m_bFirstSliceInBitstream && + (m_apcSlicePilot->getRapPicFlag() || m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_GDR) && + m_apcSlicePilot->getNoIncorrectPicOutputFlag()) { - if (m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_CRA) + if (m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_CRA || m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_GDR) { - m_apcSlicePilot->setNoOutputPriorPicsFlag(true); + m_picHeader.setNoOutputOfPriorPicsFlag(true); } } else { - m_apcSlicePilot->setNoOutputPriorPicsFlag(false); + m_picHeader.setNoOutputOfPriorPicsFlag(false); } - if(m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_CRA) + if (m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_CRA || m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_GDR) { - m_craNoRaslOutputFlag = m_apcSlicePilot->getNoRaslOutputFlag(); + m_lastNoIncorrectPicOutputFlag = m_apcSlicePilot->getNoIncorrectPicOutputFlag(); } } - if (m_apcSlicePilot->getRapPicFlag() && m_apcSlicePilot->getNoOutputPriorPicsFlag()) + if ((m_apcSlicePilot->getRapPicFlag() || m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_GDR) && m_picHeader.getNoOutputOfPriorPicsFlag()) { m_lastPOCNoOutputPriorPics = m_apcSlicePilot->getPOC(); m_isNoOutputPriorPics = true; @@ -1043,32 +1300,24 @@ bool DecLib::xDecodeSlice(InputNALUnit &nalu, int &iSkipFrame, int iPOCLastDispl } //For inference of PicOutputFlag -#if !JVET_M0101_HLS - if (m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_RASL_N || m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_RASL_R) -#else if (m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_RASL) -#endif { - if ( m_craNoRaslOutputFlag ) + if (m_lastNoIncorrectPicOutputFlag) { - m_apcSlicePilot->setPicOutputFlag(false); + m_picHeader.setPicOutputFlag(false); } } - if (m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_CRA && m_craNoRaslOutputFlag) //Reset POC MSB when CRA has NoRaslOutputFlag equal to 1 + if ((m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_CRA || m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_GDR) && + m_lastNoIncorrectPicOutputFlag) //Reset POC MSB when CRA or GDR has NoIncorrectPicOutputFlag equal to 1 { - PPS *pps = m_parameterSetManager.getPPS(m_apcSlicePilot->getPPSId()); + PPS *pps = m_parameterSetManager.getPPS(m_picHeader.getPPSId()); CHECK(pps == 0, "No PPS present"); SPS *sps = m_parameterSetManager.getSPS(pps->getSPSId()); CHECK(sps == 0, "No SPS present"); int iMaxPOClsb = 1 << sps->getBitsForPOC(); m_apcSlicePilot->setPOC( m_apcSlicePilot->getPOC() & (iMaxPOClsb - 1) ); xUpdatePreviousTid0POC(m_apcSlicePilot); - if (m_apcSlicePilot->getAPSId() != -1) - { - APS *aps = m_parameterSetManager.getAPS(m_apcSlicePilot->getAPSId()); - CHECK(aps == 0, "No APS present"); - } } // Skip pictures due to random access @@ -1080,34 +1329,19 @@ bool DecLib::xDecodeSlice(InputNALUnit &nalu, int &iSkipFrame, int iPOCLastDispl return false; } // Skip TFD pictures associated with BLA/BLANT pictures -#if !JVET_M0101_HLS - if (isSkipPictureForBLA(iPOCLastDisplay)) - { - m_prevSliceSkipped = true; - m_skippedPOC = m_apcSlicePilot->getPOC(); - return false; - } -#endif // clear previous slice skipped flag m_prevSliceSkipped = false; //we should only get a different poc for a new picture (with CTU address==0) -#if HEVC_DEPENDENT_SLICES - if (!m_apcSlicePilot->getDependentSliceSegmentFlag() && m_apcSlicePilot->getPOC()!=m_prevPOC && !m_bFirstSliceInSequence && (m_apcSlicePilot->getSliceCurStartCtuTsAddr() != 0)) -#else - if(m_apcSlicePilot->getPOC() != m_prevPOC && !m_bFirstSliceInSequence && (m_apcSlicePilot->getSliceCurStartCtuTsAddr() != 0)) -#endif + if(m_apcSlicePilot->getPOC() != m_prevPOC && !m_bFirstSliceInSequence && (m_apcSlicePilot->getFirstCtuRsAddrInSlice() != 0)) { msg( WARNING, "Warning, the first slice of a picture might have been lost!\n"); } + m_prevLayerID = nalu.m_nuhLayerId; // leave when a new picture is found -#if HEVC_DEPENDENT_SLICES - if (!m_apcSlicePilot->getDependentSliceSegmentFlag() && (m_apcSlicePilot->getSliceCurStartCtuTsAddr() == 0 && !m_bFirstSliceInPicture) ) -#else - if(m_apcSlicePilot->getSliceCurStartCtuTsAddr() == 0 && !m_bFirstSliceInPicture) -#endif + if(m_apcSlicePilot->getFirstCtuRsAddrInSlice() == 0 && !m_bFirstSliceInPicture) { if (m_prevPOC >= m_pocRandomAccess) { @@ -1125,20 +1359,38 @@ bool DecLib::xDecodeSlice(InputNALUnit &nalu, int &iSkipFrame, int iPOCLastDispl //detect lost reference picture and insert copy of earlier frame. { int lostPoc; - while((lostPoc=m_apcSlicePilot->checkThatAllRefPicsAreAvailable(m_cListPic, m_apcSlicePilot->getRPS(), true, m_pocRandomAccess)) > 0) + int refPicIndex; + while ((lostPoc = m_apcSlicePilot->checkThatAllRefPicsAreAvailable(m_cListPic, m_apcSlicePilot->getRPL0(), 0, true, &refPicIndex)) > 0) { - xCreateLostPicture(lostPoc-1); + if ( ( (m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_GDR) || (m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_CRA) ) && m_apcSlicePilot->getNoIncorrectPicOutputFlag() ) + { + if (m_apcSlicePilot->getRPL0()->isInterLayerRefPic(refPicIndex) == 0) + { + xCreateUnavailablePicture(lostPoc - 1, m_apcSlicePilot->getRPL0()->isRefPicLongterm(refPicIndex), m_apcSlicePilot->getPic()->layerId, m_apcSlicePilot->getRPL0()->isInterLayerRefPic(refPicIndex)); + } + } + else + { + xCreateLostPicture( lostPoc - 1, m_apcSlicePilot->getPic()->layerId ); + } + } + while ((lostPoc = m_apcSlicePilot->checkThatAllRefPicsAreAvailable(m_cListPic, m_apcSlicePilot->getRPL1(), 0, true, &refPicIndex)) > 0) + { + if (((m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_GDR) || (m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_CRA)) && m_apcSlicePilot->getNoIncorrectPicOutputFlag()) + { + if (m_apcSlicePilot->getRPL1()->isInterLayerRefPic(refPicIndex) == 0) + { + xCreateUnavailablePicture(lostPoc - 1, m_apcSlicePilot->getRPL1()->isRefPicLongterm(refPicIndex), m_apcSlicePilot->getPic()->layerId, m_apcSlicePilot->getRPL1()->isInterLayerRefPic(refPicIndex)); + } + } + else + { + xCreateLostPicture( lostPoc - 1, m_apcSlicePilot->getPic()->layerId ); + } } } -#if HEVC_DEPENDENT_SLICES - if (!m_apcSlicePilot->getDependentSliceSegmentFlag()) - { -#endif m_prevPOC = m_apcSlicePilot->getPOC(); -#if HEVC_DEPENDENT_SLICES - } -#endif if (m_bFirstSliceInPicture) { @@ -1146,7 +1398,7 @@ bool DecLib::xDecodeSlice(InputNALUnit &nalu, int &iSkipFrame, int iPOCLastDispl } // actual decoding starts here - xActivateParameterSets(); + xActivateParameterSets( nalu.m_nuhLayerId ); m_bFirstSliceInSequence = false; m_bFirstSliceInBitstream = false; @@ -1158,36 +1410,15 @@ bool DecLib::xDecodeSlice(InputNALUnit &nalu, int &iSkipFrame, int iPOCLastDispl m_pcPic->layer = pcSlice->getTLayer(); m_pcPic->referenced = true; m_pcPic->layer = nalu.m_temporalId; + m_pcPic->layerId = nalu.m_nuhLayerId; + m_pcPic->subLayerNonReferencePictureDueToSTSA = false; - // When decoding the slice header, the stored start and end addresses were actually RS addresses, not TS addresses. - // Now, having set up the maps, convert them to the correct form. -#if HEVC_TILES_WPP - const TileMap& tileMap = *(m_pcPic->tileMap); -#endif -#if HEVC_DEPENDENT_SLICES -#if HEVC_TILES_WPP - pcSlice->setSliceSegmentCurStartCtuTsAddr( tileMap.getCtuRsToTsAddrMap(pcSlice->getSliceSegmentCurStartCtuTsAddr()) ); - pcSlice->setSliceSegmentCurEndCtuTsAddr( tileMap.getCtuRsToTsAddrMap(pcSlice->getSliceSegmentCurEndCtuTsAddr()) ); -#endif - if(!pcSlice->getDependentSliceSegmentFlag()) - { -#endif -#if HEVC_TILES_WPP - pcSlice->setSliceCurStartCtuTsAddr( tileMap.getCtuRsToTsAddrMap(pcSlice->getSliceCurStartCtuTsAddr()) ); - pcSlice->setSliceCurEndCtuTsAddr( tileMap.getCtuRsToTsAddrMap(pcSlice->getSliceCurEndCtuTsAddr()) ); -#endif -#if HEVC_DEPENDENT_SLICES - } -#endif + pcSlice->checkCRA(pcSlice->getRPL0(), pcSlice->getRPL1(), m_pocCRA, m_associatedIRAPType, m_cListPic); + pcSlice->constructRefPicList(m_cListPic); + pcSlice->checkSTSA(m_cListPic); -#if HEVC_DEPENDENT_SLICES - if (!pcSlice->getDependentSliceSegmentFlag()) - { -#endif - pcSlice->checkCRA(pcSlice->getRPS(), m_pocCRA, m_associatedIRAPType, m_cListPic ); - // Set reference list - pcSlice->setRefPicList( m_cListPic, true, true ); + pcSlice->scaleRefPicList( scaledRefPic, m_pcPic->cs->picHeader, m_parameterSetManager.getAPSs(), m_picHeader.getLmcsAPS(), m_picHeader.getScalingListAPS(), true ); if (!pcSlice->isIntra()) { @@ -1216,7 +1447,9 @@ bool DecLib::xDecodeSlice(InputNALUnit &nalu, int &iSkipFrame, int iPOCLastDispl pcSlice->setCheckLDC(bLowDelay); } - if ( pcSlice->getCheckLDC() == false && pcSlice->getMvdL1ZeroFlag() == false ) + if (pcSlice->getSPS()->getUseSMVD() && pcSlice->getCheckLDC() == false + && pcSlice->getPicHeader()->getMvdL1ZeroFlag() == false + ) { int currPOC = pcSlice->getPOC(); @@ -1230,7 +1463,8 @@ bool DecLib::xDecodeSlice(InputNALUnit &nalu, int &iSkipFrame, int iPOCLastDispl for ( ref = 0; ref < pcSlice->getNumRefIdx( REF_PIC_LIST_0 ); ref++ ) { int poc = pcSlice->getRefPic( REF_PIC_LIST_0, ref )->getPOC(); - if ( poc < currPOC && (poc > forwardPOC || refIdx0 == -1) ) + const bool isRefLongTerm = pcSlice->getRefPic(REF_PIC_LIST_0, ref)->longTerm; + if ( poc < currPOC && (poc > forwardPOC || refIdx0 == -1) && !isRefLongTerm ) { forwardPOC = poc; refIdx0 = ref; @@ -1241,7 +1475,8 @@ bool DecLib::xDecodeSlice(InputNALUnit &nalu, int &iSkipFrame, int iPOCLastDispl for ( ref = 0; ref < pcSlice->getNumRefIdx( REF_PIC_LIST_1 ); ref++ ) { int poc = pcSlice->getRefPic( REF_PIC_LIST_1, ref )->getPOC(); - if ( poc > currPOC && (poc < backwardPOC || refIdx1 == -1) ) + const bool isRefLongTerm = pcSlice->getRefPic(REF_PIC_LIST_1, ref)->longTerm; + if ( poc > currPOC && (poc < backwardPOC || refIdx1 == -1) && !isRefLongTerm ) { backwardPOC = poc; refIdx1 = ref; @@ -1259,7 +1494,8 @@ bool DecLib::xDecodeSlice(InputNALUnit &nalu, int &iSkipFrame, int iPOCLastDispl for ( ref = 0; ref < pcSlice->getNumRefIdx( REF_PIC_LIST_0 ); ref++ ) { int poc = pcSlice->getRefPic( REF_PIC_LIST_0, ref )->getPOC(); - if ( poc > currPOC && (poc < backwardPOC || refIdx0 == -1) ) + const bool isRefLongTerm = pcSlice->getRefPic(REF_PIC_LIST_0, ref)->longTerm; + if ( poc > currPOC && (poc < backwardPOC || refIdx0 == -1) && !isRefLongTerm ) { backwardPOC = poc; refIdx0 = ref; @@ -1270,7 +1506,8 @@ bool DecLib::xDecodeSlice(InputNALUnit &nalu, int &iSkipFrame, int iPOCLastDispl for ( ref = 0; ref < pcSlice->getNumRefIdx( REF_PIC_LIST_1 ); ref++ ) { int poc = pcSlice->getRefPic( REF_PIC_LIST_1, ref )->getPOC(); - if ( poc < currPOC && (poc > forwardPOC || refIdx1 == -1) ) + const bool isRefLongTerm = pcSlice->getRefPic(REF_PIC_LIST_1, ref)->longTerm; + if ( poc < currPOC && (poc > forwardPOC || refIdx1 == -1) && !isRefLongTerm ) { forwardPOC = poc; refIdx1 = ref; @@ -1295,42 +1532,78 @@ bool DecLib::xDecodeSlice(InputNALUnit &nalu, int &iSkipFrame, int iPOCLastDispl //--------------- pcSlice->setRefPOCList(); -#if HEVC_DEPENDENT_SLICES - } -#endif + SEIMessages drapSEIs = getSeisByType(m_pcPic->SEIs, SEI::DEPENDENT_RAP_INDICATION ); + if (!drapSEIs.empty()) + { + msg( NOTICE, "Dependent RAP indication SEI decoded\n"); + pcSlice->setDRAP(true); + pcSlice->setLatestDRAPPOC(pcSlice->getPOC()); + } + pcSlice->checkConformanceForDRAP(nalu.m_temporalId); -#if HEVC_USE_SCALING_LISTS Quant *quant = m_cTrQuant.getQuant(); - if(pcSlice->getSPS()->getScalingListFlag()) + if( pcSlice->getSPS()->getScalingListFlag() ) { ScalingList scalingList; - if(pcSlice->getPPS()->getScalingListPresentFlag()) - { - scalingList = pcSlice->getPPS()->getScalingList(); - } - else if (pcSlice->getSPS()->getScalingListPresentFlag()) + if( pcSlice->getPicHeader()->getScalingListPresentFlag() ) { - scalingList = pcSlice->getSPS()->getScalingList(); + APS* scalingListAPS = pcSlice->getPicHeader()->getScalingListAPS(); + scalingList = scalingListAPS->getScalingList(); } else { scalingList.setDefaultScalingList(); } - quant->setScalingListDec(scalingList); - quant->setUseScalingList(true); + int scalingListAPSId = pcSlice->getPicHeader()->getScalingListAPSId(); + if (getScalingListUpdateFlag() || (scalingListAPSId != getPreScalingListAPSId())) + { + quant->setScalingListDec(scalingList); + setScalingListUpdateFlag(false); + setPreScalingListAPSId(scalingListAPSId); + } + quant->setUseScalingList( true ); } else { - quant->setUseScalingList(false); + quant->setUseScalingList( false ); } -#endif - if (pcSlice->getSPS()->getUseReshaper()) + if (pcSlice->getSPS()->getUseLmcs()) { - m_cReshaper.copySliceReshaperInfo(m_cReshaper.getSliceReshaperInfo(), pcSlice->getReshapeInfo()); - if (pcSlice->getReshapeInfo().getSliceReshapeModelPresentFlag()) + if (m_bFirstSliceInPicture) + m_sliceLmcsApsId = -1; + if (pcSlice->getPicHeader()->getLmcsEnabledFlag()) + { + APS* lmcsAPS = pcSlice->getPicHeader()->getLmcsAPS(); + if (m_sliceLmcsApsId == -1) + { + m_sliceLmcsApsId = lmcsAPS->getAPSId(); + } + else + { + CHECK(lmcsAPS->getAPSId() != m_sliceLmcsApsId, "same APS ID shall be used for all slices in one picture"); + } + SliceReshapeInfo& sInfo = lmcsAPS->getReshaperAPSInfo(); + SliceReshapeInfo& tInfo = m_cReshaper.getSliceReshaperInfo(); + tInfo.reshaperModelMaxBinIdx = sInfo.reshaperModelMaxBinIdx; + tInfo.reshaperModelMinBinIdx = sInfo.reshaperModelMinBinIdx; + memcpy(tInfo.reshaperModelBinCWDelta, sInfo.reshaperModelBinCWDelta, sizeof(int)*(PIC_CODE_CW_BINS)); + tInfo.maxNbitsNeededDeltaCW = sInfo.maxNbitsNeededDeltaCW; + tInfo.chrResScalingOffset = sInfo.chrResScalingOffset; + tInfo.setUseSliceReshaper(pcSlice->getPicHeader()->getLmcsEnabledFlag()); + tInfo.setSliceReshapeChromaAdj(pcSlice->getPicHeader()->getLmcsChromaResidualScaleFlag()); + tInfo.setSliceReshapeModelPresentFlag(true); + } + else + { + SliceReshapeInfo& tInfo = m_cReshaper.getSliceReshaperInfo(); + tInfo.setUseSliceReshaper(false); + tInfo.setSliceReshapeChromaAdj(false); + tInfo.setSliceReshapeModelPresentFlag(false); + } + if (pcSlice->getPicHeader()->getLmcsEnabledFlag()) { m_cReshaper.constructReshaper(); } @@ -1356,6 +1629,7 @@ bool DecLib::xDecodeSlice(InputNALUnit &nalu, int &iSkipFrame, int iPOCLastDispl m_cReshaper.setRecReshaped(false); } } + m_cReshaper.setVPDULoc(-1, -1); } else { @@ -1369,34 +1643,52 @@ bool DecLib::xDecodeSlice(InputNALUnit &nalu, int &iSkipFrame, int iPOCLastDispl m_bFirstSliceInPicture = false; m_uiSliceSegmentIdx++; + pcSlice->freeScaledRefPicList( scaledRefPic ); + return false; } -#if HEVC_VPS void DecLib::xDecodeVPS( InputNALUnit& nalu ) { - VPS* vps = new VPS(); + m_vps = new VPS(); m_HLSReader.setBitstream( &nalu.getBitstream() ); - m_HLSReader.parseVPS( vps ); - m_parameterSetManager.storeVPS( vps, nalu.getBitstream().getFifo() ); + + CHECK( nalu.m_temporalId, "The value of TemporalId of VPS NAL units shall be equal to 0" ); + + m_HLSReader.parseVPS( m_vps ); + m_parameterSetManager.storeVPS( m_vps, nalu.getBitstream().getFifo()); +} + +void DecLib::xDecodeDPS( InputNALUnit& nalu ) +{ + DPS* dps = new DPS(); + m_HLSReader.setBitstream( &nalu.getBitstream() ); + + CHECK( nalu.m_temporalId, "The value of TemporalId of DPS NAL units shall be equal to 0" ); + + m_HLSReader.parseDPS( dps ); + m_parameterSetManager.storeDPS( dps, nalu.getBitstream().getFifo() ); } -#endif void DecLib::xDecodeSPS( InputNALUnit& nalu ) { SPS* sps = new SPS(); m_HLSReader.setBitstream( &nalu.getBitstream() ); - m_HLSReader.parseSPS( sps ); - m_parameterSetManager.storeSPS( sps, nalu.getBitstream().getFifo() ); + CHECK( nalu.m_temporalId, "The value of TemporalId of SPS NAL units shall be equal to 0" ); + + m_HLSReader.parseSPS( sps ); DTRACE( g_trace_ctx, D_QP_PER_CTU, "CTU Size: %dx%d", sps->getMaxCUWidth(), sps->getMaxCUHeight() ); + m_parameterSetManager.storeSPS( sps, nalu.getBitstream().getFifo() ); } void DecLib::xDecodePPS( InputNALUnit& nalu ) { PPS* pps = new PPS(); m_HLSReader.setBitstream( &nalu.getBitstream() ); - m_HLSReader.parsePPS( pps ); + m_HLSReader.parsePPS( pps, &m_parameterSetManager ); + pps->setLayerId( nalu.m_nuhLayerId ); + pps->setTemporalId( nalu.m_temporalId ); m_parameterSetManager.storePPS( pps, nalu.getBitstream().getFifo() ); } @@ -1405,25 +1697,34 @@ void DecLib::xDecodeAPS(InputNALUnit& nalu) APS* aps = new APS(); m_HLSReader.setBitstream(&nalu.getBitstream()); m_HLSReader.parseAPS(aps); + aps->setTemporalId(nalu.m_temporalId); + aps->setLayerId( nalu.m_nuhLayerId ); + m_parameterSetManager.checkAuApsContent( aps, m_accessUnitApsNals ); + if (aps->getAPSType() == SCALING_LIST_APS) + { + setScalingListUpdateFlag(true); + } + + // aps will be deleted if it was already stored (and did not changed), + // thus, storing it must be last action. m_parameterSetManager.storeAPS(aps, nalu.getBitstream().getFifo()); } bool DecLib::decode(InputNALUnit& nalu, int& iSkipFrame, int& iPOCLastDisplay) { bool ret; // ignore all NAL units of layers > 0 - if (nalu.m_nuhLayerId > 0) - { - msg( WARNING, "Warning: found NAL unit with nuh_layer_id equal to %d. Ignoring.\n", nalu.m_nuhLayerId); - return false; - } + + m_accessUnitNals.push_back( std::pair<NalUnitType, int>( nalu.m_nalUnitType, nalu.m_temporalId ) ); switch (nalu.m_nalUnitType) { -#if HEVC_VPS case NAL_UNIT_VPS: xDecodeVPS( nalu ); return false; -#endif + + case NAL_UNIT_DPS: + xDecodeDPS( nalu ); + return false; case NAL_UNIT_SPS: xDecodeSPS( nalu ); @@ -1432,7 +1733,13 @@ bool DecLib::decode(InputNALUnit& nalu, int& iSkipFrame, int& iPOCLastDisplay) case NAL_UNIT_PPS: xDecodePPS( nalu ); return false; - case NAL_UNIT_APS: + + case NAL_UNIT_PH: + xDecodePicHeader(nalu); + return !m_bFirstSliceInPicture; + + case NAL_UNIT_PREFIX_APS: + case NAL_UNIT_SUFFIX_APS: xDecodeAPS(nalu); return false; @@ -1444,7 +1751,7 @@ bool DecLib::decode(InputNALUnit& nalu, int& iSkipFrame, int& iPOCLastDisplay) case NAL_UNIT_SUFFIX_SEI: if (m_pcPic) { - m_seiReader.parseSEImessage( &(nalu.getBitstream()), m_pcPic->SEIs, nalu.m_nalUnitType, m_parameterSetManager.getActiveSPS(), m_pDecodedSEIOutputStream ); + m_seiReader.parseSEImessage( &(nalu.getBitstream()), m_pcPic->SEIs, nalu.m_nalUnitType, nalu.m_temporalId, m_parameterSetManager.getActiveSPS(), m_HRD, m_pDecodedSEIOutputStream ); } else { @@ -1452,47 +1759,22 @@ bool DecLib::decode(InputNALUnit& nalu, int& iSkipFrame, int& iPOCLastDisplay) } return false; -#if !JVET_M0101_HLS - case NAL_UNIT_CODED_SLICE_TRAIL_R: - case NAL_UNIT_CODED_SLICE_TRAIL_N: - case NAL_UNIT_CODED_SLICE_TSA_R: - case NAL_UNIT_CODED_SLICE_TSA_N: - case NAL_UNIT_CODED_SLICE_STSA_R: - case NAL_UNIT_CODED_SLICE_STSA_N: - case NAL_UNIT_CODED_SLICE_BLA_W_LP: - case NAL_UNIT_CODED_SLICE_BLA_W_RADL: - case NAL_UNIT_CODED_SLICE_BLA_N_LP: - case NAL_UNIT_CODED_SLICE_IDR_W_RADL: - case NAL_UNIT_CODED_SLICE_IDR_N_LP: - case NAL_UNIT_CODED_SLICE_CRA: - case NAL_UNIT_CODED_SLICE_RADL_N: - case NAL_UNIT_CODED_SLICE_RADL_R: - case NAL_UNIT_CODED_SLICE_RASL_N: - case NAL_UNIT_CODED_SLICE_RASL_R: -#else case NAL_UNIT_CODED_SLICE_TRAIL: case NAL_UNIT_CODED_SLICE_STSA: case NAL_UNIT_CODED_SLICE_IDR_W_RADL: case NAL_UNIT_CODED_SLICE_IDR_N_LP: case NAL_UNIT_CODED_SLICE_CRA: + case NAL_UNIT_CODED_SLICE_GDR: case NAL_UNIT_CODED_SLICE_RADL: case NAL_UNIT_CODED_SLICE_RASL: -#endif ret = xDecodeSlice(nalu, iSkipFrame, iPOCLastDisplay); -#if JVET_J0090_MEMORY_BANDWITH_MEASURE - if ( ret ) - { - m_cacheModel.reportFrame( ); - m_cacheModel.accumulateFrame( ); - m_cacheModel.clear( ); - } -#endif return ret; case NAL_UNIT_EOS: m_associatedIRAPType = NAL_UNIT_INVALID; m_pocCRA = 0; m_pocRandomAccess = MAX_INT; + m_prevLayerID = MAX_INT; m_prevPOC = MAX_INT; m_prevSliceSkipped = false; m_skippedPOC = 0; @@ -1503,99 +1785,28 @@ bool DecLib::decode(InputNALUnit& nalu, int& iSkipFrame, int& iPOCLastDisplay) AUDReader audReader; uint32_t picType; audReader.parseAccessUnitDelimiter(&(nalu.getBitstream()),picType); - msg( NOTICE, "Note: found NAL_UNIT_ACCESS_UNIT_DELIMITER\n"); - return false; + return !m_bFirstSliceInPicture; } case NAL_UNIT_EOB: return false; - case NAL_UNIT_FILLER_DATA: - { - FDReader fdReader; - uint32_t size; - fdReader.parseFillerData(&(nalu.getBitstream()),size); - msg( NOTICE, "Note: found NAL_UNIT_FILLER_DATA with %u bytes payload.\n", size); - return false; - } -#if !JVET_M0101_HLS - case NAL_UNIT_RESERVED_VCL_N10: - case NAL_UNIT_RESERVED_VCL_R11: - case NAL_UNIT_RESERVED_VCL_N12: - case NAL_UNIT_RESERVED_VCL_R13: - case NAL_UNIT_RESERVED_VCL_N14: - case NAL_UNIT_RESERVED_VCL_R15: - - case NAL_UNIT_RESERVED_IRAP_VCL22: - case NAL_UNIT_RESERVED_IRAP_VCL23: - - case NAL_UNIT_RESERVED_VCL24: - case NAL_UNIT_RESERVED_VCL25: - case NAL_UNIT_RESERVED_VCL26: - case NAL_UNIT_RESERVED_VCL27: - case NAL_UNIT_RESERVED_VCL28: - case NAL_UNIT_RESERVED_VCL29: - case NAL_UNIT_RESERVED_VCL30: - case NAL_UNIT_RESERVED_VCL31: -#if !HEVC_VPS - case NAL_UNIT_RESERVED_32: -#endif -#else - case NAL_UNIT_RESERVED_VCL_4: - case NAL_UNIT_RESERVED_VCL_5: - case NAL_UNIT_RESERVED_VCL_6: - case NAL_UNIT_RESERVED_VCL_7: - - case NAL_UNIT_RESERVED_IRAP_VCL11: - case NAL_UNIT_RESERVED_IRAP_VCL12: - case NAL_UNIT_RESERVED_IRAP_VCL13: - - case NAL_UNIT_RESERVED_VCL14: -#if !HEVC_VPS - case NAL_UNIT_RESERVED_VCL15: -#endif -#endif + case NAL_UNIT_RESERVED_IRAP_VCL_11: + case NAL_UNIT_RESERVED_IRAP_VCL_12: msg( NOTICE, "Note: found reserved VCL NAL unit.\n"); xParsePrefixSEIsForUnknownVCLNal(); return false; -#if !JVET_M0101_HLS - case NAL_UNIT_RESERVED_NVCL41: - case NAL_UNIT_RESERVED_NVCL42: - case NAL_UNIT_RESERVED_NVCL43: - case NAL_UNIT_RESERVED_NVCL44: - case NAL_UNIT_RESERVED_NVCL45: - case NAL_UNIT_RESERVED_NVCL46: - case NAL_UNIT_RESERVED_NVCL47: -#else - case NAL_UNIT_RESERVED_NVCL16: - case NAL_UNIT_RESERVED_NVCL26: - case NAL_UNIT_RESERVED_NVCL27: -#endif + case NAL_UNIT_RESERVED_VCL_4: + case NAL_UNIT_RESERVED_VCL_5: + case NAL_UNIT_RESERVED_VCL_6: + case NAL_UNIT_RESERVED_NVCL_26: + case NAL_UNIT_RESERVED_NVCL_27: msg( NOTICE, "Note: found reserved NAL unit.\n"); return false; -#if !JVET_M0101_HLS - case NAL_UNIT_UNSPECIFIED_48: - case NAL_UNIT_UNSPECIFIED_49: - case NAL_UNIT_UNSPECIFIED_50: - case NAL_UNIT_UNSPECIFIED_51: - case NAL_UNIT_UNSPECIFIED_52: - case NAL_UNIT_UNSPECIFIED_53: - case NAL_UNIT_UNSPECIFIED_54: - case NAL_UNIT_UNSPECIFIED_55: - case NAL_UNIT_UNSPECIFIED_56: - case NAL_UNIT_UNSPECIFIED_57: - case NAL_UNIT_UNSPECIFIED_58: - case NAL_UNIT_UNSPECIFIED_59: - case NAL_UNIT_UNSPECIFIED_60: - case NAL_UNIT_UNSPECIFIED_61: - case NAL_UNIT_UNSPECIFIED_62: - case NAL_UNIT_UNSPECIFIED_63: -#else case NAL_UNIT_UNSPECIFIED_28: case NAL_UNIT_UNSPECIFIED_29: case NAL_UNIT_UNSPECIFIED_30: case NAL_UNIT_UNSPECIFIED_31: -#endif msg( NOTICE, "Note: found unspecified NAL unit.\n"); return false; default: @@ -1606,21 +1817,6 @@ bool DecLib::decode(InputNALUnit& nalu, int& iSkipFrame, int& iPOCLastDisplay) return false; } -#if !JVET_M0101_HLS -/** Function for checking if picture should be skipped because of association with a previous BLA picture - * This function skips all TFD pictures that follow a BLA picture in decoding order and precede it in output order. - */ -bool DecLib::isSkipPictureForBLA( int& iPOCLastDisplay ) -{ - if( ( m_associatedIRAPType == NAL_UNIT_CODED_SLICE_BLA_N_LP || m_associatedIRAPType == NAL_UNIT_CODED_SLICE_BLA_W_LP || m_associatedIRAPType == NAL_UNIT_CODED_SLICE_BLA_W_RADL ) && - m_apcSlicePilot->getPOC() < m_pocCRA && ( m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_RASL_R || m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_RASL_N ) ) - { - iPOCLastDisplay++; - return true; - } - return false; -} -#endif /** Function for checking if picture should be skipped because of random access. This function checks the skipping of pictures in the case of -s option random access. * All pictures prior to the random access point indicated by the counter iSkipFrame are skipped. @@ -1638,24 +1834,17 @@ bool DecLib::isRandomAccessSkipPicture( int& iSkipFrame, int& iPOCLastDisplay ) iSkipFrame--; // decrement the counter return true; } + else if ( m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_W_RADL || m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_N_LP ) + { + m_pocRandomAccess = -MAX_INT; // no need to skip the reordered pictures in IDR, they are decodable. + } else if (m_pocRandomAccess == MAX_INT) // start of random access point, m_pocRandomAccess has not been set yet. { -#if !JVET_M0101_HLS - if ( m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_CRA - || m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_BLA_W_LP - || m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_BLA_N_LP - || m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_BLA_W_RADL ) -#else if (m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_CRA ) -#endif { // set the POC random access since we need to skip the reordered pictures in the case of CRA/CRANT/BLA/BLANT. m_pocRandomAccess = m_apcSlicePilot->getPOC(); } - else if ( m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_W_RADL || m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_N_LP ) - { - m_pocRandomAccess = -MAX_INT; // no need to skip the reordered pictures in IDR, they are decodable. - } else { if(!m_warningMessageSkipPicture) @@ -1667,11 +1856,7 @@ bool DecLib::isRandomAccessSkipPicture( int& iSkipFrame, int& iPOCLastDisplay ) } } // skip the reordered pictures, if necessary -#if !JVET_M0101_HLS - else if (m_apcSlicePilot->getPOC() < m_pocRandomAccess && (m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_RASL_R || m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_RASL_N)) -#else else if (m_apcSlicePilot->getPOC() < m_pocRandomAccess && (m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_RASL)) -#endif { iPOCLastDisplay++; return true; @@ -1680,7 +1865,49 @@ bool DecLib::isRandomAccessSkipPicture( int& iSkipFrame, int& iPOCLastDisplay ) return false; } - - +void DecLib::checkNalUnitConstraints( uint32_t naluType ) +{ + if (m_parameterSetManager.getActiveSPS() != NULL && m_parameterSetManager.getActiveSPS()->getProfileTierLevel() != NULL) + { + const ConstraintInfo *cInfo = m_parameterSetManager.getActiveSPS()->getProfileTierLevel()->getConstraintInfo(); + xCheckNalUnitConstraintFlags( cInfo, naluType ); + } + if (m_parameterSetManager.getActiveDPS() != NULL) + { + const DPS *dps = m_parameterSetManager.getActiveDPS(); + for (int i=0; i< dps->getNumPTLs(); i++) + { + ProfileTierLevel ptl = dps->getProfileTierLevel(i); + const ConstraintInfo *cInfo = ptl.getConstraintInfo(); + xCheckNalUnitConstraintFlags( cInfo, naluType ); + } + } +} +void DecLib::xCheckNalUnitConstraintFlags( const ConstraintInfo *cInfo, uint32_t naluType ) +{ + if (cInfo != NULL) + { + CHECK(cInfo->getNoTrailConstraintFlag() && naluType == NAL_UNIT_CODED_SLICE_TRAIL, + "Non-conforming bitstream. no_trail_constraint_flag is equal to 1 but bitstream contains NAL unit of type TRAIL_NUT."); + CHECK(cInfo->getNoStsaConstraintFlag() && naluType == NAL_UNIT_CODED_SLICE_STSA, + "Non-conforming bitstream. no_stsa_constraint_flag is equal to 1 but bitstream contains NAL unit of type STSA_NUT."); + CHECK(cInfo->getNoRaslConstraintFlag() && naluType == NAL_UNIT_CODED_SLICE_RASL, + "Non-conforming bitstream. no_rasl_constraint_flag is equal to 1 but bitstream contains NAL unit of type RASL_NUT."); + CHECK(cInfo->getNoRadlConstraintFlag() && naluType == NAL_UNIT_CODED_SLICE_RADL, + "Non-conforming bitstream. no_radl_constraint_flag is equal to 1 but bitstream contains NAL unit of type RADL_NUT."); + CHECK(cInfo->getNoIdrConstraintFlag() && (naluType == NAL_UNIT_CODED_SLICE_IDR_W_RADL), + "Non-conforming bitstream. no_idr_constraint_flag is equal to 1 but bitstream contains NAL unit of type IDR_W_RADL."); + CHECK(cInfo->getNoIdrConstraintFlag() && (naluType == NAL_UNIT_CODED_SLICE_IDR_N_LP), + "Non-conforming bitstream. no_idr_constraint_flag is equal to 1 but bitstream contains NAL unit of type IDR_N_LP."); + CHECK(cInfo->getNoCraConstraintFlag() && naluType == NAL_UNIT_CODED_SLICE_CRA, + "Non-conforming bitstream. no_cra_constraint_flag is equal to 1 but bitstream contains NAL unit of type CRA_NUT."); + CHECK(cInfo->getNoGdrConstraintFlag() && naluType == NAL_UNIT_CODED_SLICE_GDR, + "Non-conforming bitstream. no_gdr_constraint_flag is equal to 1 but bitstream contains NAL unit of type GDR_NUT."); + CHECK(cInfo->getNoApsConstraintFlag() && naluType == NAL_UNIT_PREFIX_APS, + "Non-conforming bitstream. no_aps_constraint_flag is equal to 1 but bitstream contains NAL unit of type APS_PREFIX_NUT."); + CHECK(cInfo->getNoApsConstraintFlag() && naluType == NAL_UNIT_SUFFIX_APS, + "Non-conforming bitstream. no_aps_constraint_flag is equal to 1 but bitstream contains NAL unit of type APS_SUFFIX_NUT."); + } +} //! \} diff --git a/source/Lib/DecoderLib/DecLib.h b/source/Lib/DecoderLib/DecLib.h index 1ea3aacf47749403410a6fed3a3c3c4640871773..d9490513428e302c9b3d7d8a3593acff15a07594 100644 --- a/source/Lib/DecoderLib/DecLib.h +++ b/source/Lib/DecoderLib/DecLib.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -77,16 +77,19 @@ private: PicList m_cListPic; // Dynamic buffer ParameterSetManager m_parameterSetManager; // storage for parameter sets + PicHeader m_picHeader; // picture header Slice* m_apcSlicePilot; SEIMessages m_SEIs; ///< List of SEI messages that have been received before the first slice and between slices, excluding prefix SEIs... + // functional classes IntraPrediction m_cIntraPred; InterPrediction m_cInterPred; TrQuant m_cTrQuant; DecSlice m_cSliceDecoder; + TrQuant m_cTrQuantScalingList; DecCu m_cCuDecoder; HLSyntaxReader m_HLSReader; CABACDecoder m_CABACDecoder; @@ -95,17 +98,16 @@ private: SampleAdaptiveOffset m_cSAO; AdaptiveLoopFilter m_cALF; Reshape m_cReshaper; ///< reshaper class + HRD m_HRD; // decoder side RD cost computation RdCost m_cRdCost; ///< RD cost computation class #if JVET_J0090_MEMORY_BANDWITH_MEASURE CacheModel m_cacheModel; -#endif -#if !JVET_M0101_HLS - bool isSkipPictureForBLA(int& iPOCLastDisplay); #endif bool isRandomAccessSkipPicture(int& iSkipFrame, int& iPOCLastDisplay); Picture* m_pcPic; uint32_t m_uiSliceSegmentIdx; + uint32_t m_prevLayerID; int m_prevPOC; int m_prevTid0POC; bool m_bFirstSliceInPicture; @@ -115,7 +117,8 @@ private: bool m_bFirstSliceInBitstream; int m_lastPOCNoOutputPriorPics; bool m_isNoOutputPriorPics; - bool m_craNoRaslOutputFlag; //value of variable NoRaslOutputFlag of the last CRA pic + bool m_lastNoIncorrectPicOutputFlag; //value of variable NoIncorrectPicOutputFlag of the last CRA / GDR pic + int m_sliceLmcsApsId; //value of LmcsApsId, constraint is same id for all slices in one picture std::ostream *m_pDecodedSEIOutputStream; int m_decodedPictureHashSEIEnabled; ///< Checksum(3)/CRC(2)/MD5(1)/disable(0) acting on decoded picture hash SEI message @@ -126,6 +129,14 @@ private: std::list<InputNALUnit*> m_prefixSEINALUs; /// Buffered up prefix SEI NAL Units. int m_debugPOC; int m_debugCTU; + + std::vector<std::pair<NalUnitType, int>> m_accessUnitNals; + std::vector<int> m_accessUnitApsNals; + + VPS* m_vps; + bool m_scalingListUpdateFlag; + int m_PreScalingListAPSId; + public: DecLib(); virtual ~DecLib(); @@ -147,10 +158,13 @@ public: void finishPicture(int& poc, PicList*& rpcListPic, MsgLevel msgl = INFO); void finishPictureLight(int& poc, PicList*& rpcListPic ); void checkNoOutputPriorPics (PicList* rpcListPic); + void checkNalUnitConstraints( uint32_t naluType ); + bool getNoOutputPriorPicsFlag () const { return m_isNoOutputPriorPics; } void setNoOutputPriorPicsFlag (bool val) { m_isNoOutputPriorPics = val; } void setFirstSliceInPicture (bool val) { m_bFirstSliceInPicture = val; } + bool getFirstSliceInPicture () const { return m_bFirstSliceInPicture; } bool getFirstSliceInSequence () const { return m_bFirstSliceInSequence; } void setFirstSliceInSequence (bool val) { m_bFirstSliceInSequence = val; } void setDecodedSEIMessageOutputStream(std::ostream *pOpStream) { m_pDecodedSEIOutputStream = pOpStream; } @@ -160,28 +174,40 @@ public: void setDebugCTU( int debugCTU ) { m_debugCTU = debugCTU; } int getDebugPOC( ) const { return m_debugPOC; }; void setDebugPOC( int debugPOC ) { m_debugPOC = debugPOC; }; + void resetAccessUnitNals() { m_accessUnitNals.clear(); } + void resetAccessUnitApsNals() { m_accessUnitApsNals.clear(); } + bool isSliceNaluFirstInAU( bool newPicture, InputNALUnit &nalu ); + + const VPS* getVPS() { return m_vps; } + void initScalingList() + { + m_cTrQuantScalingList.init(nullptr, MAX_TB_SIZEY, false, false, false, false); + } + bool getScalingListUpdateFlag() { return m_scalingListUpdateFlag; } + void setScalingListUpdateFlag(bool b) { m_scalingListUpdateFlag = b; } + int getPreScalingListAPSId() { return m_PreScalingListAPSId; } + void setPreScalingListAPSId(int id) { m_PreScalingListAPSId = id; } + protected: void xUpdateRasInit(Slice* slice); - Picture * xGetNewPicBuffer(const SPS &sps, const PPS &pps, const uint32_t temporalLayer); - void xCreateLostPicture (int iLostPOC); - - void xActivateParameterSets(); + Picture * xGetNewPicBuffer( const SPS &sps, const PPS &pps, const uint32_t temporalLayer, const int layerId ); + void xCreateLostPicture( int iLostPOC, const int layerId ); + void xCreateUnavailablePicture(int iUnavailablePoc, bool longTermFlag, const int layerId, const bool interLayerRefPicFlag); + void xActivateParameterSets( const int layerId ); + void xDecodePicHeader( InputNALUnit& nalu ); bool xDecodeSlice(InputNALUnit &nalu, int &iSkipFrame, int iPOCLastDisplay); -#if HEVC_VPS void xDecodeVPS( InputNALUnit& nalu ); -#endif + void xDecodeDPS( InputNALUnit& nalu ); void xDecodeSPS( InputNALUnit& nalu ); void xDecodePPS( InputNALUnit& nalu ); void xDecodeAPS(InputNALUnit& nalu); -#if !JVET_M0101_HLS - void xUpdatePreviousTid0POC( Slice *pSlice ) { if ((pSlice->getTLayer()==0) && (pSlice->isReferenceNalu() && (pSlice->getNalUnitType()!=NAL_UNIT_CODED_SLICE_RASL_R)&& (pSlice->getNalUnitType()!=NAL_UNIT_CODED_SLICE_RADL_R))) { m_prevTid0POC=pSlice->getPOC(); } } -#else void xUpdatePreviousTid0POC(Slice *pSlice) { if ((pSlice->getTLayer() == 0) && (pSlice->getNalUnitType()!=NAL_UNIT_CODED_SLICE_RASL) && (pSlice->getNalUnitType()!=NAL_UNIT_CODED_SLICE_RADL)) { m_prevTid0POC = pSlice->getPOC(); } } -#endif void xParsePrefixSEImessages(); void xParsePrefixSEIsForUnknownVCLNal(); + void xCheckNalUnitConstraintFlags( const ConstraintInfo *cInfo, uint32_t naluType ); + };// END CLASS DEFINITION DecLib diff --git a/source/Lib/DecoderLib/DecSlice.cpp b/source/Lib/DecoderLib/DecSlice.cpp index 9e3b90fb681a25ecf7b4abe357ae7302b70d4372..8adadb3a22e641dc8db273f76428105e7b0f7380 100644 --- a/source/Lib/DecoderLib/DecSlice.cpp +++ b/source/Lib/DecoderLib/DecSlice.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -77,9 +77,6 @@ void DecSlice::decompressSlice( Slice* slice, InputBitstream* bitstream, int deb const SPS* sps = slice->getSPS(); Picture* pic = slice->getPic(); -#if HEVC_TILES_WPP - const TileMap& tileMap = *pic->tileMap; -#endif CABACReader& cabacReader = *m_CABACDecoder->getCABACReader( 0 ); // setup coding structure @@ -87,18 +84,23 @@ void DecSlice::decompressSlice( Slice* slice, InputBitstream* bitstream, int deb cs.slice = slice; cs.sps = sps; cs.pps = slice->getPPS(); - cs.aps = slice->getAPS(); -#if HEVC_VPS - cs.vps = slice->getVPS(); -#endif + memcpy(cs.alfApss, slice->getAlfAPSs(), sizeof(cs.alfApss)); + + cs.lmcsAps = slice->getPicHeader()->getLmcsAPS(); + cs.scalinglistAps = slice->getPicHeader()->getScalingListAPS(); + cs.pcv = slice->getPPS()->pcv; cs.chromaQpAdj = 0; cs.picture->resizeSAO(cs.pcv->sizeInCtus, 0); - if (slice->getSliceCurStartCtuTsAddr() == 0) + cs.resetPrevPLT(cs.prevPLT); + + if (slice->getFirstCtuRsAddrInSlice() == 0) { cs.picture->resizeAlfCtuEnableFlag( cs.pcv->sizeInCtus ); + cs.picture->resizeAlfCtbFilterIndex(cs.pcv->sizeInCtus); + cs.picture->resizeAlfCtuAlternative( cs.pcv->sizeInCtus ); } const unsigned numSubstreams = slice->getNumberOfSubstreamSizes() + 1; @@ -111,85 +113,33 @@ void DecSlice::decompressSlice( Slice* slice, InputBitstream* bitstream, int deb ppcSubstreams[idx] = bitstream->extractSubstream( idx+1 < numSubstreams ? ( slice->getSubstreamSize(idx) << 3 ) : bitstream->getNumBitsLeft() ); } -#if HEVC_DEPENDENT_SLICES - const int startCtuTsAddr = slice->getSliceSegmentCurStartCtuTsAddr(); -#else - const int startCtuTsAddr = slice->getSliceCurStartCtuTsAddr(); -#endif -#if HEVC_DEPENDENT_SLICES - const int startCtuRsAddr = startCtuTsAddr; -#elif HEVC_TILES_WPP - const int startCtuRsAddr = tileMap.getCtuTsToRsAddrMap(startCtuTsAddr); -#endif - const unsigned numCtusInFrame = cs.pcv->sizeInCtus; const unsigned widthInCtus = cs.pcv->widthInCtus; -#if HEVC_DEPENDENT_SLICES - const bool depSliceSegmentsEnabled = cs.pps->getDependentSliceSegmentsEnabledFlag(); -#endif -#if HEVC_TILES_WPP const bool wavefrontsEnabled = cs.pps->getEntropyCodingSyncEnabledFlag(); -#endif cabacReader.initBitstream( ppcSubstreams[0] ); cabacReader.initCtxModels( *slice ); // Quantization parameter -#if HEVC_DEPENDENT_SLICES - if(!slice->getDependentSliceSegmentFlag()) - { -#endif pic->m_prevQP[0] = pic->m_prevQP[1] = slice->getSliceQp(); -#if HEVC_DEPENDENT_SLICES - } -#endif CHECK( pic->m_prevQP[0] == std::numeric_limits<int>::max(), "Invalid previous QP" ); DTRACE( g_trace_ctx, D_HEADER, "=========== POC: %d ===========\n", slice->getPOC() ); - // The first CTU of the slice is the first coded substream, but the global substream number, as calculated by getSubstreamForCtuAddr may be higher. - // This calculates the common offset for all substreams in this slice. -#if HEVC_DEPENDENT_SLICES - const unsigned subStreamOffset = tileMap.getSubstreamForCtuAddr( startCtuRsAddr, true, slice ); -#elif HEVC_TILES_WPP - const unsigned subStreamOffset = tileMap.getSubstreamForCtuAddr(startCtuRsAddr, true, slice); -#endif -#if HEVC_DEPENDENT_SLICES - if( depSliceSegmentsEnabled ) - { - // modify initial contexts with previous slice segment if this is a dependent slice. - const unsigned startTileIdx = tileMap.getTileIdxMap(startCtuRsAddr); - const Tile& currentTile = tileMap.tiles[startTileIdx]; - const unsigned firstCtuRsAddrOfTile = currentTile.getFirstCtuRsAddr(); - if( slice->getDependentSliceSegmentFlag() && startCtuRsAddr != firstCtuRsAddrOfTile ) - { - if( currentTile.getTileWidthInCtus() >= 2 || !wavefrontsEnabled ) - { - cabacReader.getCtx() = m_lastSliceSegmentEndContextState; - } - } - } -#endif // for every CTU in the slice segment... - bool isLastCtuOfSliceSegment = false; - for( unsigned ctuTsAddr = startCtuTsAddr; !isLastCtuOfSliceSegment && ctuTsAddr < numCtusInFrame; ctuTsAddr++ ) + unsigned subStrmId = 0; + for( unsigned ctuIdx = 0; ctuIdx < slice->getNumCtuInSlice(); ctuIdx++ ) { -#if HEVC_TILES_WPP - const unsigned ctuRsAddr = tileMap.getCtuTsToRsAddrMap(ctuTsAddr); - const Tile& currentTile = tileMap.tiles[ tileMap.getTileIdxMap(ctuRsAddr) ]; - const unsigned firstCtuRsAddrOfTile = currentTile.getFirstCtuRsAddr(); - const unsigned tileXPosInCtus = firstCtuRsAddrOfTile % widthInCtus; - const unsigned tileYPosInCtus = firstCtuRsAddrOfTile / widthInCtus; -#else - const unsigned ctuRsAddr = ctuTsAddr; -#endif - const unsigned ctuXPosInCtus = ctuRsAddr % widthInCtus; - const unsigned ctuYPosInCtus = ctuRsAddr / widthInCtus; -#if HEVC_TILES_WPP - const unsigned subStrmId = tileMap.getSubstreamForCtuAddr( ctuRsAddr, true, slice ) - subStreamOffset; -#else - const unsigned subStrmId = 0; -#endif + const unsigned ctuRsAddr = slice->getCtuAddrInSlice(ctuIdx); + const unsigned ctuXPosInCtus = ctuRsAddr % widthInCtus; + const unsigned ctuYPosInCtus = ctuRsAddr / widthInCtus; + const unsigned tileColIdx = slice->getPPS()->ctuToTileCol( ctuXPosInCtus ); + const unsigned tileRowIdx = slice->getPPS()->ctuToTileRow( ctuYPosInCtus ); + const unsigned tileXPosInCtus = slice->getPPS()->getTileColumnBd( tileColIdx ); + const unsigned tileYPosInCtus = slice->getPPS()->getTileRowBd( tileRowIdx ); + const unsigned tileColWidth = slice->getPPS()->getTileColumnWidth( tileColIdx ); + const unsigned tileRowHeight = slice->getPPS()->getTileRowHeight( tileRowIdx ); + const unsigned tileIdx = slice->getPPS()->getTileIdx( ctuXPosInCtus, ctuYPosInCtus); const unsigned maxCUSize = sps->getMaxCUWidth(); Position pos( ctuXPosInCtus*maxCUSize, ctuYPosInCtus*maxCUSize) ; UnitArea ctuArea(cs.area.chromaFormat, Area( pos.x, pos.y, maxCUSize, maxCUSize ) ); @@ -198,44 +148,43 @@ void DecSlice::decompressSlice( Slice* slice, InputBitstream* bitstream, int deb cabacReader.initBitstream( ppcSubstreams[subStrmId] ); -#if HEVC_TILES_WPP // set up CABAC contexts' state for this CTU - if( ctuRsAddr == firstCtuRsAddrOfTile ) + if( ctuXPosInCtus == tileXPosInCtus && ctuYPosInCtus == tileYPosInCtus ) { - if( ctuTsAddr != startCtuTsAddr ) // if it is the first CTU, then the entropy coder has already been reset + if( ctuIdx != 0 ) // if it is the first CTU, then the entropy coder has already been reset { cabacReader.initCtxModels( *slice ); + cs.resetPrevPLT(cs.prevPLT); } pic->m_prevQP[0] = pic->m_prevQP[1] = slice->getSliceQp(); } else if( ctuXPosInCtus == tileXPosInCtus && wavefrontsEnabled ) { - // Synchronize cabac probabilities with upper-right CTU if it's available and at the start of a line. - if( ctuTsAddr != startCtuTsAddr ) // if it is the first CTU, then the entropy coder has already been reset + // Synchronize cabac probabilities with top CTU if it's available and at the start of a line. + if( ctuIdx != 0 ) // if it is the first CTU, then the entropy coder has already been reset { cabacReader.initCtxModels( *slice ); + cs.resetPrevPLT(cs.prevPLT); } - if( cs.getCURestricted( pos.offset(maxCUSize, -1), slice->getIndependentSliceIdx(), tileMap.getTileIdxMap( pos ), CH_L ) ) + if( cs.getCURestricted( pos.offset(0, -1), pos, slice->getIndependentSliceIdx(), tileIdx, CH_L ) ) { - // Top-right is available, so use it. + // Top is available, so use it. cabacReader.getCtx() = m_entropyCodingSyncContextState; } pic->m_prevQP[0] = pic->m_prevQP[1] = slice->getSliceQp(); } -#endif - bool updateGbiCodingOrder = cs.slice->getSliceType() == B_SLICE && ctuTsAddr == startCtuTsAddr; - if(updateGbiCodingOrder) + bool updateBcwCodingOrder = cs.slice->getSliceType() == B_SLICE && ctuIdx == 0; + if(updateBcwCodingOrder) { - resetGbiCodingOrder(true, cs); + resetBcwCodingOrder(true, cs); } - if ((cs.slice->getSliceType() != I_SLICE || cs.sps->getIBCFlag()) && ctuXPosInCtus == 0) + if ((cs.slice->getSliceType() != I_SLICE || cs.sps->getIBCFlag()) && ctuXPosInCtus == tileXPosInCtus) { cs.motionLut.lut.resize(0); cs.motionLut.lutIbc.resize(0); - cs.motionLut.lutShare.resize(0); - cs.motionLut.lutShareIbc.resize(0); + cs.resetIBCBuffer = true; } if( !cs.slice->isIntra() ) @@ -245,39 +194,28 @@ void DecSlice::decompressSlice( Slice* slice, InputBitstream* bitstream, int deb if( ctuRsAddr == debugCTU ) { - isLastCtuOfSliceSegment = true; // get out here break; } - isLastCtuOfSliceSegment = cabacReader.coding_tree_unit( cs, ctuArea, pic->m_prevQP, ctuRsAddr ); + cabacReader.coding_tree_unit( cs, ctuArea, pic->m_prevQP, ctuRsAddr ); m_pcCuDecoder->decompressCtu( cs, ctuArea ); -#if HEVC_TILES_WPP - if( ctuXPosInCtus == tileXPosInCtus+1 && wavefrontsEnabled ) + if( ctuXPosInCtus == tileXPosInCtus && wavefrontsEnabled ) { m_entropyCodingSyncContextState = cabacReader.getCtx(); } -#endif - if( isLastCtuOfSliceSegment ) + if( ctuIdx == slice->getNumCtuInSlice()-1 ) { + unsigned binVal = cabacReader.terminating_bit(); + CHECK( !binVal, "Expecting a terminating bit" ); #if DECODER_CHECK_SUBSTREAM_AND_SLICE_TRAILING_BYTES cabacReader.remaining_bytes( false ); -#endif -#if HEVC_DEPENDENT_SLICES - if( !slice->getDependentSliceSegmentFlag() ) - { -#endif - slice->setSliceCurEndCtuTsAddr( ctuTsAddr+1 ); -#if HEVC_DEPENDENT_SLICES - } - slice->setSliceSegmentCurEndCtuTsAddr( ctuTsAddr+1 ); #endif } -#if HEVC_TILES_WPP - else if( ( ctuXPosInCtus + 1 == tileXPosInCtus + currentTile.getTileWidthInCtus () ) && - ( ctuYPosInCtus + 1 == tileYPosInCtus + currentTile.getTileHeightInCtus() || wavefrontsEnabled ) ) + else if( ( ctuXPosInCtus + 1 == tileXPosInCtus + tileColWidth ) && + ( ctuYPosInCtus + 1 == tileYPosInCtus + tileRowHeight || wavefrontsEnabled ) ) { // The sub-stream/stream should be terminated after this CTU. // (end of slice-segment, end of tile, end of wavefront-CTU-row) @@ -286,17 +224,10 @@ void DecSlice::decompressSlice( Slice* slice, InputBitstream* bitstream, int deb #if DECODER_CHECK_SUBSTREAM_AND_SLICE_TRAILING_BYTES cabacReader.remaining_bytes( true ); #endif + subStrmId++; } -#endif } - CHECK( !isLastCtuOfSliceSegment, "Last CTU of slice segment not signalled as such" ); -#if HEVC_DEPENDENT_SLICES - if( depSliceSegmentsEnabled ) - { - m_lastSliceSegmentEndContextState = cabacReader.getCtx(); //ctx end of dep.slice - } -#endif // deallocate all created substreams, including internal buffers. for( auto substr: ppcSubstreams ) { diff --git a/source/Lib/DecoderLib/DecSlice.h b/source/Lib/DecoderLib/DecSlice.h index c34b8a4d570f32472b89dc9a95d367b80d6f24e4..1ff2a2282be31abb227e326a9167d515ffffe64d 100644 --- a/source/Lib/DecoderLib/DecSlice.h +++ b/source/Lib/DecoderLib/DecSlice.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -62,12 +62,7 @@ private: CABACDecoder* m_CABACDecoder; DecCu* m_pcCuDecoder; -#if HEVC_DEPENDENT_SLICES - Ctx m_lastSliceSegmentEndContextState; ///< context storage for state at the end of the previous slice-segment (used for dependent slices only). -#endif -#if HEVC_TILES_WPP Ctx m_entropyCodingSyncContextState; ///< context storage for state of contexts at the wavefront/WPP/entropy-coding-sync second CTU of tile-row -#endif public: DecSlice(); diff --git a/source/Lib/DecoderLib/NALread.cpp b/source/Lib/DecoderLib/NALread.cpp index 7b507b99fbe67d13581ea4bdcf2b7bc48f749907..07dddd03f67e8834f26b88a790f14d2d247b4b5b 100644 --- a/source/Lib/DecoderLib/NALread.cpp +++ b/source/Lib/DecoderLib/NALread.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -110,11 +110,15 @@ static void convertPayloadToRBSP(vector<uint8_t>& nalUnitBuf, InputBitstream *bi static void xTraceNalUnitHeader(InputNALUnit& nalu) { DTRACE( g_trace_ctx, D_NALUNITHEADER, "*********** NAL UNIT (%s) ***********\n", nalUnitTypeToString(nalu.m_nalUnitType) ); - - DTRACE( g_trace_ctx, D_NALUNITHEADER, "%-50s u(%d) : %u\n", "forbidden_zero_bit", 1, 0 ); - DTRACE( g_trace_ctx, D_NALUNITHEADER, "%-50s u(%d) : %u\n", "nal_unit_type", 6, nalu.m_nalUnitType ); - DTRACE( g_trace_ctx, D_NALUNITHEADER, "%-50s u(%d) : %u\n", "nuh_layer_id", 6, nalu.m_nuhLayerId ); + bool zeroTidRequiredFlag = 0; + if((nalu.m_nalUnitType >= 16) && (nalu.m_nalUnitType <= 31)) { + zeroTidRequiredFlag = 1; + } + DTRACE( g_trace_ctx, D_NALUNITHEADER, "%-50s u(%d) : %u\n", "zero_tid_required_flag", 1, zeroTidRequiredFlag ); DTRACE( g_trace_ctx, D_NALUNITHEADER, "%-50s u(%d) : %u\n", "nuh_temporal_id_plus1", 3, nalu.m_temporalId + 1 ); + DTRACE( g_trace_ctx, D_NALUNITHEADER, "%-50s u(%d) : %u\n", "nal_unit_type_lsb", 4, (nalu.m_nalUnitType) - (zeroTidRequiredFlag << 4)); + DTRACE( g_trace_ctx, D_NALUNITHEADER, "%-50s u(%d) : %u\n", "nuh_layer_id_plus1", 7, nalu.m_nuhLayerId+1); + DTRACE( g_trace_ctx, D_NALUNITHEADER, "%-50s u(%d) : %u\n", "nuh_reserved_zero_bit", 1, 0 ); } #endif @@ -122,13 +126,15 @@ void readNalUnitHeader(InputNALUnit& nalu) { InputBitstream& bs = nalu.getBitstream(); - bool forbidden_zero_bit = bs.read(1); // forbidden_zero_bit - if(forbidden_zero_bit != 0) { THROW( "Forbidden zero-bit not '0'" );} - nalu.m_nalUnitType = (NalUnitType) bs.read(6); // nal_unit_type - nalu.m_nuhLayerId = bs.read(6); // nuh_layer_id - nalu.m_temporalId = bs.read(3) - 1; // nuh_temporal_id_plus1 + nalu.m_forbiddenZeroBit = bs.read(1); // forbidden zero bit + nalu.m_nuhReservedZeroBit = bs.read(1); // nuh_reserved_zero_bit + nalu.m_nuhLayerId = bs.read(6); // nuh_layer_id + CHECK(nalu.m_nuhLayerId > 55, "The value of nuh_layer_id shall be in the range of 0 to 55, inclusive"); + nalu.m_nalUnitType = (NalUnitType) bs.read(5); // nal_unit_type + nalu.m_temporalId = bs.read(3) - 1; // nuh_temporal_id_plus1 + #if RExt__DECODER_DEBUG_BIT_STATISTICS - CodingStatistics::IncrementStatisticEP(STATS__NAL_UNIT_HEADER_BITS, 1+6+6+3, 0); + CodingStatistics::IncrementStatisticEP(STATS__NAL_UNIT_HEADER_BITS, 1+3+4+7+1, 0); #endif #if ENABLE_TRACING @@ -140,65 +146,11 @@ void readNalUnitHeader(InputNALUnit& nalu) { if ( nalu.m_temporalId ) { -#if HEVC_VPS -#if !JVET_M0101_HLS - CHECK( nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_BLA_W_LP - || nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_BLA_W_RADL - || nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_BLA_N_LP - || nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_IDR_W_RADL - || nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_IDR_N_LP - || nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_CRA - || nalu.m_nalUnitType == NAL_UNIT_VPS - || nalu.m_nalUnitType == NAL_UNIT_SPS - || nalu.m_nalUnitType == NAL_UNIT_EOS - || nalu.m_nalUnitType == NAL_UNIT_EOB - , "Invalid NAL type" ); -#else - CHECK( nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_IDR_W_RADL - || nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_IDR_N_LP - || nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_CRA - || nalu.m_nalUnitType == NAL_UNIT_VPS - || nalu.m_nalUnitType == NAL_UNIT_SPS - || nalu.m_nalUnitType == NAL_UNIT_EOS - || nalu.m_nalUnitType == NAL_UNIT_EOB - , "Invalid NAL type" ); -#endif -#else -#if !JVET_M0101_HLS - CHECK(nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_BLA_W_LP - || nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_BLA_W_RADL - || nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_BLA_N_LP - || nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_IDR_W_RADL - || nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_IDR_N_LP - || nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_CRA - || nalu.m_nalUnitType == NAL_UNIT_SPS - || nalu.m_nalUnitType == NAL_UNIT_EOS - || nalu.m_nalUnitType == NAL_UNIT_EOB - , "Invalid NAL type"); -#else - CHECK(nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_IDR_W_RADL - || nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_IDR_N_LP - || nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_CRA - || nalu.m_nalUnitType == NAL_UNIT_SPS - || nalu.m_nalUnitType == NAL_UNIT_EOS - || nalu.m_nalUnitType == NAL_UNIT_EOB - , "Invalid NAL type"); -#endif -#endif - } else { -#if !JVET_M0101_HLS - CHECK( nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_TSA_R - || nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_TSA_N - || nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_STSA_R - || nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_STSA_N - , "Invalid NAL type" ); -#else CHECK(nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_STSA - , "Invalid NAL type"); -#endif + , "When NAL unit type is equal to STSA_NUT, TemporalId shall not be equal to 0"); } } } diff --git a/source/Lib/DecoderLib/NALread.h b/source/Lib/DecoderLib/NALread.h index 1b0ea21b90b26bcb62750b1c4ba0c52177b67a10..1778dc5055e3013313ed48bd88c6cc9bacad224f 100644 --- a/source/Lib/DecoderLib/NALread.h +++ b/source/Lib/DecoderLib/NALread.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/source/Lib/DecoderLib/SEIread.cpp b/source/Lib/DecoderLib/SEIread.cpp index 1495228f5d6cece183edb5f59a264976927dcbc1..f69dff292f77bae5f6b8918a117893ab5d3be481 100644 --- a/source/Lib/DecoderLib/SEIread.cpp +++ b/source/Lib/DecoderLib/SEIread.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -50,6 +50,15 @@ //! \ingroup DecoderLib //! \{ +void SEIReader::sei_read_scode(std::ostream *pOS, uint32_t length, int& code, const char *pSymbolName) +{ + READ_SCODE(length, code, pSymbolName); + if (pOS) + { + (*pOS) << " " << std::setw(55) << pSymbolName << ": " << code << "\n"; + } +} + void SEIReader::sei_read_code(std::ostream *pOS, uint32_t uiLength, uint32_t& ruiCode, const char *pSymbolName) { READ_CODE(uiLength, ruiCode, pSymbolName); @@ -96,6 +105,7 @@ static inline void output_sei_message_header(SEI &sei, std::ostream *pDecodedMes } #undef READ_CODE +#undef READ_SCODE #undef READ_SVLC #undef READ_UVLC #undef READ_FLAG @@ -104,14 +114,15 @@ static inline void output_sei_message_header(SEI &sei, std::ostream *pDecodedMes /** * unmarshal a single SEI message from bitstream bs */ -void SEIReader::parseSEImessage(InputBitstream* bs, SEIMessages& seis, const NalUnitType nalUnitType, const SPS *sps, std::ostream *pDecodedMessageOutputStream) + // note: for independent parsing no parameter set should not be required here +void SEIReader::parseSEImessage(InputBitstream* bs, SEIMessages& seis, const NalUnitType nalUnitType, const uint32_t temporalId, const SPS *sps, HRD &hrd, std::ostream *pDecodedMessageOutputStream) { setBitstream(bs); CHECK(m_pcBitstream->getNumBitsUntilByteAligned(), "Bitstream not aligned"); do { - xReadSEImessage(seis, nalUnitType, sps, pDecodedMessageOutputStream); + xReadSEImessage(seis, nalUnitType, temporalId, sps, hrd, pDecodedMessageOutputStream); /* SEI messages are an integer number of bytes, something has failed * in the parsing if bitstream not byte-aligned */ @@ -122,7 +133,7 @@ void SEIReader::parseSEImessage(InputBitstream* bs, SEIMessages& seis, const Nal xReadRbspTrailingBits(); } -void SEIReader::xReadSEImessage(SEIMessages& seis, const NalUnitType nalUnitType, const SPS *sps, std::ostream *pDecodedMessageOutputStream) +void SEIReader::xReadSEImessage(SEIMessages& seis, const NalUnitType nalUnitType, const uint32_t temporalId, const SPS *sps, HRD &hrd, std::ostream *pDecodedMessageOutputStream) { #if ENABLE_TRACING xTraceSEIHeader(); @@ -157,6 +168,7 @@ void SEIReader::xReadSEImessage(SEIMessages& seis, const NalUnitType nalUnitType setBitstream(bs->extractSubstream(payloadSize * 8)); SEI *sei = NULL; + const SEIBufferingPeriod *bp = NULL; if(nalUnitType == NAL_UNIT_PREFIX_SEI) { @@ -166,116 +178,107 @@ void SEIReader::xReadSEImessage(SEIMessages& seis, const NalUnitType nalUnitType sei = new SEIuserDataUnregistered; xParseSEIuserDataUnregistered((SEIuserDataUnregistered&) *sei, payloadSize, pDecodedMessageOutputStream); break; - case SEI::ACTIVE_PARAMETER_SETS: - sei = new SEIActiveParameterSets; - xParseSEIActiveParameterSets((SEIActiveParameterSets&) *sei, payloadSize, pDecodedMessageOutputStream); - break; case SEI::DECODING_UNIT_INFO: - if (!sps) + bp = hrd.getBufferingPeriodSEI(); + if (!bp) { - msg( WARNING, "Warning: Found Decoding unit SEI message, but no active SPS is available. Ignoring."); + msg( WARNING, "Warning: Found Decoding unit information SEI message, but no active buffering period is available. Ignoring."); } else { sei = new SEIDecodingUnitInfo; - xParseSEIDecodingUnitInfo((SEIDecodingUnitInfo&) *sei, payloadSize, sps, pDecodedMessageOutputStream); + xParseSEIDecodingUnitInfo((SEIDecodingUnitInfo&) *sei, payloadSize, *bp, temporalId, pDecodedMessageOutputStream); } break; case SEI::BUFFERING_PERIOD: - if (!sps) - { - msg( WARNING, "Warning: Found Buffering period SEI message, but no active SPS is available. Ignoring."); - } - else - { - sei = new SEIBufferingPeriod; - xParseSEIBufferingPeriod((SEIBufferingPeriod&) *sei, payloadSize, sps, pDecodedMessageOutputStream); - } + sei = new SEIBufferingPeriod; + xParseSEIBufferingPeriod((SEIBufferingPeriod&) *sei, payloadSize, pDecodedMessageOutputStream); + hrd.setBufferingPeriodSEI((SEIBufferingPeriod*) sei); break; case SEI::PICTURE_TIMING: - if (!sps) { - msg( WARNING, "Warning: Found Picture timing SEI message, but no active SPS is available. Ignoring."); - } - else - { - sei = new SEIPictureTiming; - xParseSEIPictureTiming((SEIPictureTiming&)*sei, payloadSize, sps, pDecodedMessageOutputStream); + bp = hrd.getBufferingPeriodSEI(); + if (!bp) + { + msg( WARNING, "Warning: Found Picture timing SEI message, but no active buffering period is available. Ignoring."); + } + else + { + sei = new SEIPictureTiming; + xParseSEIPictureTiming((SEIPictureTiming&)*sei, payloadSize, temporalId, *bp, pDecodedMessageOutputStream); + } } break; - case SEI::RECOVERY_POINT: - sei = new SEIRecoveryPoint; - xParseSEIRecoveryPoint((SEIRecoveryPoint&) *sei, payloadSize, pDecodedMessageOutputStream); + case SEI::FRAME_FIELD_INFO: + sei = new SEIFrameFieldInfo; + xParseSEIFrameFieldinfo((SEIFrameFieldInfo&) *sei, payloadSize, pDecodedMessageOutputStream); + break; + case SEI::DEPENDENT_RAP_INDICATION: + sei = new SEIDependentRAPIndication; + xParseSEIDependentRAPIndication((SEIDependentRAPIndication&) *sei, payloadSize, pDecodedMessageOutputStream); break; case SEI::FRAME_PACKING: sei = new SEIFramePacking; xParseSEIFramePacking((SEIFramePacking&) *sei, payloadSize, pDecodedMessageOutputStream); break; - case SEI::SEGM_RECT_FRAME_PACKING: - sei = new SEISegmentedRectFramePacking; - xParseSEISegmentedRectFramePacking((SEISegmentedRectFramePacking&) *sei, payloadSize, pDecodedMessageOutputStream); - break; - case SEI::DISPLAY_ORIENTATION: - sei = new SEIDisplayOrientation; - xParseSEIDisplayOrientation((SEIDisplayOrientation&) *sei, payloadSize, pDecodedMessageOutputStream); + case SEI::MASTERING_DISPLAY_COLOUR_VOLUME: + sei = new SEIMasteringDisplayColourVolume; + xParseSEIMasteringDisplayColourVolume((SEIMasteringDisplayColourVolume&) *sei, payloadSize, pDecodedMessageOutputStream); break; - case SEI::TEMPORAL_LEVEL0_INDEX: - sei = new SEITemporalLevel0Index; - xParseSEITemporalLevel0Index((SEITemporalLevel0Index&) *sei, payloadSize, pDecodedMessageOutputStream); +#if U0033_ALTERNATIVE_TRANSFER_CHARACTERISTICS_SEI + case SEI::ALTERNATIVE_TRANSFER_CHARACTERISTICS: + sei = new SEIAlternativeTransferCharacteristics; + xParseSEIAlternativeTransferCharacteristics((SEIAlternativeTransferCharacteristics&) *sei, payloadSize, pDecodedMessageOutputStream); break; - case SEI::REGION_REFRESH_INFO: - sei = new SEIGradualDecodingRefreshInfo; - xParseSEIRegionRefreshInfo((SEIGradualDecodingRefreshInfo&) *sei, payloadSize, pDecodedMessageOutputStream); +#endif + case SEI::EQUIRECTANGULAR_PROJECTION: + sei = new SEIEquirectangularProjection; + xParseSEIEquirectangularProjection((SEIEquirectangularProjection&) *sei, payloadSize, pDecodedMessageOutputStream); break; - case SEI::NO_DISPLAY: - sei = new SEINoDisplay; - xParseSEINoDisplay((SEINoDisplay&) *sei, payloadSize, pDecodedMessageOutputStream); + case SEI::SPHERE_ROTATION: + sei = new SEISphereRotation; + xParseSEISphereRotation((SEISphereRotation&) *sei, payloadSize, pDecodedMessageOutputStream); break; - case SEI::TONE_MAPPING_INFO: - sei = new SEIToneMappingInfo; - xParseSEIToneMappingInfo((SEIToneMappingInfo&) *sei, payloadSize, pDecodedMessageOutputStream); + case SEI::OMNI_VIEWPORT: + sei = new SEIOmniViewport; + xParseSEIOmniViewport((SEIOmniViewport&) *sei, payloadSize, pDecodedMessageOutputStream); break; - case SEI::SOP_DESCRIPTION: - sei = new SEISOPDescription; - xParseSEISOPDescription((SEISOPDescription&) *sei, payloadSize, pDecodedMessageOutputStream); + case SEI::REGION_WISE_PACKING: + sei = new SEIRegionWisePacking; + xParseSEIRegionWisePacking((SEIRegionWisePacking&) *sei, payloadSize, pDecodedMessageOutputStream); break; - case SEI::SCALABLE_NESTING: - sei = new SEIScalableNesting; - xParseSEIScalableNesting((SEIScalableNesting&) *sei, nalUnitType, payloadSize, sps, pDecodedMessageOutputStream); + case SEI::GENERALIZED_CUBEMAP_PROJECTION: + sei = new SEIGeneralizedCubemapProjection; + xParseSEIGeneralizedCubemapProjection((SEIGeneralizedCubemapProjection&) *sei, payloadSize, pDecodedMessageOutputStream); break; -#if HEVC_TILES_WPP - case SEI::TEMP_MOTION_CONSTRAINED_TILE_SETS: - sei = new SEITempMotionConstrainedTileSets; - xParseSEITempMotionConstraintsTileSets((SEITempMotionConstrainedTileSets&) *sei, payloadSize, pDecodedMessageOutputStream); + case SEI::SUBPICTURE_LEVEL_INFO: + sei = new SEISubpicureLevelInfo; + xParseSEISubpictureLevelInfo((SEISubpicureLevelInfo&) *sei, sps, payloadSize, pDecodedMessageOutputStream); break; -#endif - case SEI::TIME_CODE: - sei = new SEITimeCode; - xParseSEITimeCode((SEITimeCode&) *sei, payloadSize, pDecodedMessageOutputStream); + case SEI::SAMPLE_ASPECT_RATIO_INFO: + sei = new SEISampleAspectRatioInfo; + xParseSEISampleAspectRatioInfo((SEISampleAspectRatioInfo&) *sei, payloadSize, pDecodedMessageOutputStream); break; - case SEI::CHROMA_RESAMPLING_FILTER_HINT: - sei = new SEIChromaResamplingFilterHint; - xParseSEIChromaResamplingFilterHint((SEIChromaResamplingFilterHint&) *sei, payloadSize, pDecodedMessageOutputStream); - //} + case SEI::USER_DATA_REGISTERED_ITU_T_T35: + sei = new SEIUserDataRegistered; + xParseSEIUserDataRegistered((SEIUserDataRegistered&)*sei, payloadSize, pDecodedMessageOutputStream); break; - case SEI::KNEE_FUNCTION_INFO: - sei = new SEIKneeFunctionInfo; - xParseSEIKneeFunctionInfo((SEIKneeFunctionInfo&) *sei, payloadSize, pDecodedMessageOutputStream); + case SEI::FILM_GRAIN_CHARACTERISTICS: + sei = new SEIFilmGrainCharacteristics; + xParseSEIFilmGrainCharacteristics((SEIFilmGrainCharacteristics&)*sei, payloadSize, pDecodedMessageOutputStream); break; - case SEI::COLOUR_REMAPPING_INFO: - sei = new SEIColourRemappingInfo; - xParseSEIColourRemappingInfo((SEIColourRemappingInfo&) *sei, payloadSize, pDecodedMessageOutputStream); + case SEI::CONTENT_LIGHT_LEVEL_INFO: + sei = new SEIContentLightLevelInfo; + xParseSEIContentLightLevelInfo((SEIContentLightLevelInfo&)*sei, payloadSize, pDecodedMessageOutputStream); break; - case SEI::MASTERING_DISPLAY_COLOUR_VOLUME: - sei = new SEIMasteringDisplayColourVolume; - xParseSEIMasteringDisplayColourVolume((SEIMasteringDisplayColourVolume&) *sei, payloadSize, pDecodedMessageOutputStream); + case SEI::AMBIENT_VIEWING_ENVIRONMENT: + sei = new SEIAmbientViewingEnvironment; + xParseSEIAmbientViewingEnvironment((SEIAmbientViewingEnvironment&)*sei, payloadSize, pDecodedMessageOutputStream); break; -#if U0033_ALTERNATIVE_TRANSFER_CHARACTERISTICS_SEI - case SEI::ALTERNATIVE_TRANSFER_CHARACTERISTICS: - sei = new SEIAlternativeTransferCharacteristics; - xParseSEIAlternativeTransferCharacteristics((SEIAlternativeTransferCharacteristics&) *sei, payloadSize, pDecodedMessageOutputStream); + case SEI::CONTENT_COLOUR_VOLUME: + sei = new SEIContentColourVolume; + xParseSEIContentColourVolume((SEIContentColourVolume&)*sei, payloadSize, pDecodedMessageOutputStream); break; -#endif default: for (uint32_t i = 0; i < payloadSize; i++) { @@ -294,18 +297,22 @@ void SEIReader::xReadSEImessage(SEIMessages& seis, const NalUnitType nalUnitType { switch (payloadType) { +#if HEVC_SEI case SEI::USER_DATA_UNREGISTERED: sei = new SEIuserDataUnregistered; xParseSEIuserDataUnregistered((SEIuserDataUnregistered&) *sei, payloadSize, pDecodedMessageOutputStream); break; +#endif case SEI::DECODED_PICTURE_HASH: sei = new SEIDecodedPictureHash; xParseSEIDecodedPictureHash((SEIDecodedPictureHash&) *sei, payloadSize, pDecodedMessageOutputStream); break; +#if HEVC_SEI case SEI::GREEN_METADATA: sei = new SEIGreenMetadataInfo; xParseSEIGreenMetadataInfo((SEIGreenMetadataInfo&) *sei, payloadSize, pDecodedMessageOutputStream); break; +#endif default: for (uint32_t i = 0; i < payloadSize; i++) { @@ -450,14 +457,12 @@ void SEIReader::xParseSEIDecodedPictureHash(SEIDecodedPictureHash& sei, uint32_t } } +#if HEVC_SEI void SEIReader::xParseSEIActiveParameterSets(SEIActiveParameterSets& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream) { uint32_t val; output_sei_message_header(sei, pDecodedMessageOutputStream, payloadSize); -#if HEVC_VPS - sei_read_code( pDecodedMessageOutputStream, 4, val, "active_video_parameter_set_id"); sei.activeVPSId = val; -#endif sei_read_flag( pDecodedMessageOutputStream, val, "self_contained_cvs_flag"); sei.m_selfContainedCvsFlag = (val != 0); sei_read_flag( pDecodedMessageOutputStream, val, "no_parameter_set_update_flag"); sei.m_noParameterSetUpdateFlag = (val != 0); sei_read_uvlc( pDecodedMessageOutputStream, val, "num_sps_ids_minus1"); sei.numSpsIdsMinus1 = val; @@ -465,153 +470,276 @@ void SEIReader::xParseSEIActiveParameterSets(SEIActiveParameterSets& sei, uint32 sei.activeSeqParameterSetId.resize(sei.numSpsIdsMinus1 + 1); for (int i=0; i < (sei.numSpsIdsMinus1 + 1); i++) { - sei_read_uvlc( pDecodedMessageOutputStream, val, "active_seq_parameter_set_id[i]"); sei.activeSeqParameterSetId[i] = val; + sei_read_code( pDecodedMessageOutputStream, 4, val, "active_seq_parameter_set_id[i]" ); sei.activeSeqParameterSetId[i] = val; } } +#endif -void SEIReader::xParseSEIDecodingUnitInfo(SEIDecodingUnitInfo& sei, uint32_t payloadSize, const SPS *sps, std::ostream *pDecodedMessageOutputStream) +void SEIReader::xParseSEIDecodingUnitInfo(SEIDecodingUnitInfo& sei, uint32_t payloadSize, const SEIBufferingPeriod& bp, const uint32_t temporalId, std::ostream *pDecodedMessageOutputStream) { uint32_t val; output_sei_message_header(sei, pDecodedMessageOutputStream, payloadSize); sei_read_uvlc( pDecodedMessageOutputStream, val, "decoding_unit_idx"); sei.m_decodingUnitIdx = val; - const VUI *vui = sps->getVuiParameters(); - if(vui->getHrdParameters()->getSubPicCpbParamsInPicTimingSEIFlag()) + if(!bp.m_decodingUnitCpbParamsInPicTimingSeiFlag) { - sei_read_code( pDecodedMessageOutputStream, ( vui->getHrdParameters()->getDuCpbRemovalDelayLengthMinus1() + 1 ), val, "du_spt_cpb_removal_delay_increment"); - sei.m_duSptCpbRemovalDelay = val; + for( int i = temporalId; i < bp.m_bpMaxSubLayers - 1; i ++ ) + { + sei_read_flag( pDecodedMessageOutputStream, val, "dui_sub_layer_delays_present_flag[i]" ); + sei.m_duiSubLayerDelaysPresentFlag[i] = val; + if( sei.m_duiSubLayerDelaysPresentFlag[i] ) + { + sei_read_code( pDecodedMessageOutputStream, bp.getDuCpbRemovalDelayIncrementLength(), val, "du_spt_cpb_removal_delay_increment[i]"); + sei.m_duSptCpbRemovalDelayIncrement[i] = val; + } + else + { + sei.m_duSptCpbRemovalDelayIncrement[i] = 0; + } + } } else { - sei.m_duSptCpbRemovalDelay = 0; + for( int i = temporalId; i < bp.m_bpMaxSubLayers - 1; i ++ ) + { + sei.m_duSptCpbRemovalDelayIncrement[i] = 0; + } } sei_read_flag( pDecodedMessageOutputStream, val, "dpb_output_du_delay_present_flag"); sei.m_dpbOutputDuDelayPresentFlag = (val != 0); if(sei.m_dpbOutputDuDelayPresentFlag) { - sei_read_code( pDecodedMessageOutputStream, vui->getHrdParameters()->getDpbOutputDelayDuLengthMinus1() + 1, val, "pic_spt_dpb_output_du_delay"); + sei_read_code( pDecodedMessageOutputStream, bp.getDpbOutputDelayDuLength(), val, "pic_spt_dpb_output_du_delay"); sei.m_picSptDpbOutputDuDelay = val; } } -void SEIReader::xParseSEIBufferingPeriod(SEIBufferingPeriod& sei, uint32_t payloadSize, const SPS *sps, std::ostream *pDecodedMessageOutputStream) +void SEIReader::xParseSEIBufferingPeriod(SEIBufferingPeriod& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream) { int i, nalOrVcl; uint32_t code; - const VUI *pVUI = sps->getVuiParameters(); - const HRD *pHRD = pVUI->getHrdParameters(); output_sei_message_header(sei, pDecodedMessageOutputStream, payloadSize); - sei_read_uvlc( pDecodedMessageOutputStream, code, "bp_seq_parameter_set_id" ); sei.m_bpSeqParameterSetId = code; - if( !pHRD->getSubPicCpbParamsPresentFlag() ) + sei_read_flag( pDecodedMessageOutputStream, code, "bp_nal_hrd_parameters_present_flag" ); sei.m_bpNalCpbParamsPresentFlag = code; + sei_read_flag( pDecodedMessageOutputStream, code, "bp_vcl_hrd_parameters_present_flag" ); sei.m_bpVclCpbParamsPresentFlag = code; + + sei_read_code( pDecodedMessageOutputStream, 5, code, "initial_cpb_removal_delay_length_minus1" ); sei.m_initialCpbRemovalDelayLength = code + 1; + sei_read_code( pDecodedMessageOutputStream, 5, code, "cpb_removal_delay_length_minus1" ); sei.m_cpbRemovalDelayLength = code + 1; + sei_read_code( pDecodedMessageOutputStream, 5, code, "dpb_output_delay_length_minus1" ); sei.m_dpbOutputDelayLength = code + 1; + sei_read_flag( pDecodedMessageOutputStream, code, "alt_cpb_params_present_flag"); sei.m_altCpbParamsPresentFlag = code; + sei_read_flag( pDecodedMessageOutputStream, code, "bp_decoding_unit_hrd_params_present_flag" ); sei.m_bpDecodingUnitHrdParamsPresentFlag = code; + if( sei.m_bpDecodingUnitHrdParamsPresentFlag ) { - sei_read_flag( pDecodedMessageOutputStream, code, "irap_cpb_params_present_flag" ); sei.m_rapCpbParamsPresentFlag = code; + sei_read_code( pDecodedMessageOutputStream, 5, code, "du_cpb_removal_delay_increment_length_minus1" ); sei.m_duCpbRemovalDelayIncrementLength = code + 1; + sei_read_code( pDecodedMessageOutputStream, 5, code, "dpb_output_delay_du_length_minus1" ); sei.m_dpbOutputDelayDuLength = code + 1; + sei_read_flag( pDecodedMessageOutputStream, code, "decoding_unit_cpb_params_in_pic_timing_sei_flag" ); sei.m_decodingUnitCpbParamsInPicTimingSeiFlag = code; } - if( sei.m_rapCpbParamsPresentFlag ) + else { - sei_read_code( pDecodedMessageOutputStream, pHRD->getCpbRemovalDelayLengthMinus1() + 1, code, "cpb_delay_offset" ); sei.m_cpbDelayOffset = code; - sei_read_code( pDecodedMessageOutputStream, pHRD->getDpbOutputDelayLengthMinus1() + 1, code, "dpb_delay_offset" ); sei.m_dpbDelayOffset = code; + sei.m_duCpbRemovalDelayIncrementLength = 24; + sei.m_dpbOutputDelayDuLength = 24; } - //read splicing flag and cpb_removal_delay_delta sei_read_flag( pDecodedMessageOutputStream, code, "concatenation_flag"); sei.m_concatenationFlag = code; - sei_read_code( pDecodedMessageOutputStream, ( pHRD->getCpbRemovalDelayLengthMinus1() + 1 ), code, "au_cpb_removal_delay_delta_minus1" ); - sei.m_auCpbRemovalDelayDelta = code + 1; + sei_read_flag ( pDecodedMessageOutputStream, code, "additional_concatenation_info_present_flag"); + sei.m_additionalConcatenationInfoPresentFlag = code; + if (sei.m_additionalConcatenationInfoPresentFlag) + { + sei_read_code( pDecodedMessageOutputStream, sei.m_initialCpbRemovalDelayLength, code, "max_initial_removal_delay_for_concatenation" ); + sei.m_maxInitialRemovalDelayForConcatenation = code; + } - for( nalOrVcl = 0; nalOrVcl < 2; nalOrVcl ++ ) + sei_read_code( pDecodedMessageOutputStream, ( sei.m_cpbRemovalDelayLength ), code, "au_cpb_removal_delay_delta_minus1" ); + sei.m_auCpbRemovalDelayDelta = code + 1; + sei_read_flag( pDecodedMessageOutputStream, code, "cpb_removal_delay_deltas_present_flag" ); sei.m_cpbRemovalDelayDeltasPresentFlag = code; + if (sei.m_cpbRemovalDelayDeltasPresentFlag) + { + sei_read_uvlc( pDecodedMessageOutputStream, code, "num_cpb_removal_delay_deltas_minus1" ); sei.m_numCpbRemovalDelayDeltas = code + 1; + for( i = 0; i < sei.m_numCpbRemovalDelayDeltas; i ++ ) + { + sei_read_code( pDecodedMessageOutputStream, ( sei.m_cpbRemovalDelayLength ), code, "cpb_removal_delay_delta[i]" ); + sei.m_cpbRemovalDelayDelta[ i ] = code; + } + } + sei_read_code( pDecodedMessageOutputStream, 3, code, "bp_max_sub_layers_minus1" ); sei.m_bpMaxSubLayers = code + 1; + sei_read_uvlc( pDecodedMessageOutputStream, code, "bp_cpb_cnt_minus1" ); sei.m_bpCpbCnt = code + 1; + sei_read_flag(pDecodedMessageOutputStream, code, "sublayer_initial_cpb_removal_delay_present_flag"); + sei.m_sublayerInitialCpbRemovalDelayPresentFlag = code; + for (i = (sei.m_sublayerInitialCpbRemovalDelayPresentFlag ? 0 : sei.m_bpMaxSubLayers - 1); i < sei.m_bpMaxSubLayers; i++) { - if( ( ( nalOrVcl == 0 ) && ( pHRD->getNalHrdParametersPresentFlag() ) ) || - ( ( nalOrVcl == 1 ) && ( pHRD->getVclHrdParametersPresentFlag() ) ) ) + for( nalOrVcl = 0; nalOrVcl < 2; nalOrVcl ++ ) { - for( i = 0; i < ( pHRD->getCpbCntMinus1( 0 ) + 1 ); i ++ ) + if( ( ( nalOrVcl == 0 ) && ( sei.m_bpNalCpbParamsPresentFlag ) ) || + ( ( nalOrVcl == 1 ) && ( sei.m_bpVclCpbParamsPresentFlag ) ) ) { - sei_read_code( pDecodedMessageOutputStream, ( pHRD->getInitialCpbRemovalDelayLengthMinus1() + 1 ) , code, nalOrVcl?"vcl_initial_cpb_removal_delay":"nal_initial_cpb_removal_delay" ); - sei.m_initialCpbRemovalDelay[i][nalOrVcl] = code; - sei_read_code( pDecodedMessageOutputStream, ( pHRD->getInitialCpbRemovalDelayLengthMinus1() + 1 ) , code, nalOrVcl?"vcl_initial_cpb_removal_offset":"nal_initial_cpb_removal_offset" ); - sei.m_initialCpbRemovalDelayOffset[i][nalOrVcl] = code; - if( pHRD->getSubPicCpbParamsPresentFlag() || sei.m_rapCpbParamsPresentFlag ) + for( int j = 0; j < ( sei.m_bpCpbCnt ); j ++ ) { - sei_read_code( pDecodedMessageOutputStream, ( pHRD->getInitialCpbRemovalDelayLengthMinus1() + 1 ) , code, nalOrVcl?"vcl_initial_alt_cpb_removal_delay":"nal_initial_alt_cpb_removal_delay" ); - sei.m_initialAltCpbRemovalDelay[i][nalOrVcl] = code; - sei_read_code( pDecodedMessageOutputStream, ( pHRD->getInitialCpbRemovalDelayLengthMinus1() + 1 ) , code, nalOrVcl?"vcl_initial_alt_cpb_removal_offset":"nal_initial_alt_cpb_removal_offset" ); - sei.m_initialAltCpbRemovalDelayOffset[i][nalOrVcl] = code; + sei_read_code( pDecodedMessageOutputStream, sei.m_initialCpbRemovalDelayLength, code, nalOrVcl ? "vcl_initial_cpb_removal_delay[i][j]" : "nal_initial_cpb_removal_delay[i][j]" ); + sei.m_initialCpbRemovalDelay[i][j][nalOrVcl] = code; + sei_read_code( pDecodedMessageOutputStream, sei.m_initialCpbRemovalDelayLength, code, nalOrVcl ? "vcl_initial_cpb_removal_offset[i][j]" : "nal_initial_cpb_removal_offset[i][j]" ); + sei.m_initialCpbRemovalDelay[i][j][nalOrVcl] = code; } } } } + if (sei.m_altCpbParamsPresentFlag) + { + sei_read_flag(pDecodedMessageOutputStream, code, "use_alt_cpb_params_flag"); sei.m_useAltCpbParamsFlag = code; + } + } -void SEIReader::xParseSEIPictureTiming(SEIPictureTiming& sei, uint32_t payloadSize, const SPS *sps, std::ostream *pDecodedMessageOutputStream) +void SEIReader::xParseSEIPictureTiming(SEIPictureTiming& sei, uint32_t payloadSize, const uint32_t temporalId, const SEIBufferingPeriod& bp, std::ostream *pDecodedMessageOutputStream) { - int i; - uint32_t code; - const VUI *vui = sps->getVuiParameters(); - const HRD *hrd = vui->getHrdParameters(); output_sei_message_header(sei, pDecodedMessageOutputStream, payloadSize); - if( vui->getFrameFieldInfoPresentFlag() ) + uint32_t symbol; + sei_read_code( pDecodedMessageOutputStream, bp.m_cpbRemovalDelayLength, symbol, "cpb_removal_delay_minus1[bp_max_sub_layers_minus1]" ); + sei.m_auCpbRemovalDelay[bp.m_bpMaxSubLayers - 1] = symbol + 1; + + if( bp.m_altCpbParamsPresentFlag ) { - sei_read_code( pDecodedMessageOutputStream, 4, code, "pic_struct" ); sei.m_picStruct = code; - sei_read_code( pDecodedMessageOutputStream, 2, code, "source_scan_type" ); sei.m_sourceScanType = code; - sei_read_flag( pDecodedMessageOutputStream, code, "duplicate_flag" ); sei.m_duplicateFlag = (code == 1); + sei_read_flag( pDecodedMessageOutputStream, symbol, "cpb_alt_timing_info_present_flag" ); sei.m_cpbAltTimingInfoPresentFlag = symbol; + if( sei.m_cpbAltTimingInfoPresentFlag ) + { + sei.m_cpbAltInitialCpbRemovalDelayDelta.resize(bp.m_bpCpbCnt); + sei.m_cpbAltInitialCpbRemovalOffsetDelta.resize(bp.m_bpCpbCnt); + for( int i = 0; i < bp.m_bpCpbCnt; i++ ) + { + sei_read_code( pDecodedMessageOutputStream, bp.m_initialCpbRemovalDelayLength, symbol, "cpb_alt_initial_cpb_removal_delay_delta[ i ]" ); + sei.m_cpbAltInitialCpbRemovalDelayDelta[i]= symbol; + sei_read_code( pDecodedMessageOutputStream, bp.m_initialCpbRemovalDelayLength, symbol, "cpb_alt_initial_cpb_removal_offset_delta[ i ]" ); + sei.m_cpbAltInitialCpbRemovalOffsetDelta[i]= symbol; + } + sei_read_code( pDecodedMessageOutputStream, bp.m_initialCpbRemovalDelayLength, sei.m_cpbDelayOffset, "cpb_delay_offset" ); + sei_read_code( pDecodedMessageOutputStream, bp.m_initialCpbRemovalDelayLength, sei.m_dpbDelayOffset, "dpb_delay_offset" ); + } } - - if( hrd->getCpbDpbDelaysPresentFlag()) + else { - sei_read_code( pDecodedMessageOutputStream, ( hrd->getCpbRemovalDelayLengthMinus1() + 1 ), code, "au_cpb_removal_delay_minus1" ); - sei.m_auCpbRemovalDelay = code + 1; - sei_read_code( pDecodedMessageOutputStream, ( hrd->getDpbOutputDelayLengthMinus1() + 1 ), code, "pic_dpb_output_delay" ); - sei.m_picDpbOutputDelay = code; + sei.m_cpbAltTimingInfoPresentFlag = false; + sei.m_cpbDelayOffset = sei.m_dpbDelayOffset = 0; + } - if(hrd->getSubPicCpbParamsPresentFlag()) + for( int i = temporalId; i < bp.m_bpMaxSubLayers - 1; i ++ ) + { + sei_read_flag( pDecodedMessageOutputStream, symbol, "pt_sub_layer_delays_present_flag[i]" ); sei.m_ptSubLayerDelaysPresentFlag[i] = (symbol == 1); + if( sei.m_ptSubLayerDelaysPresentFlag[ i ] ) { - sei_read_code( pDecodedMessageOutputStream, hrd->getDpbOutputDelayDuLengthMinus1()+1, code, "pic_dpb_output_du_delay" ); - sei.m_picDpbOutputDuDelay = code; + if (bp.m_cpbRemovalDelayDeltasPresentFlag) + { + sei_read_flag(pDecodedMessageOutputStream, symbol, "cpb_removal_delay_delta_enabled_flag[i]"); + sei.m_cpbRemovalDelayDeltaEnabledFlag[i] = (symbol == 1); + } + else + { + sei.m_cpbRemovalDelayDeltaEnabledFlag[i] = false; + } + if( sei.m_cpbRemovalDelayDeltaEnabledFlag[ i ] ) + { + sei_read_code( pDecodedMessageOutputStream, ceilLog2(bp.m_numCpbRemovalDelayDeltas), symbol, "cpb_removal_delay_delta_idx[i]" ); + sei.m_cpbRemovalDelayDeltaIdx[ i ] = symbol; + } + else + { + sei_read_code( pDecodedMessageOutputStream, bp.m_cpbRemovalDelayLength, symbol, "cpb_removal_delay_minus1[i]" ); + sei.m_auCpbRemovalDelay[ i ] = symbol + 1; + } } - - if( hrd->getSubPicCpbParamsPresentFlag() && hrd->getSubPicCpbParamsInPicTimingSEIFlag() ) + } + sei_read_code( pDecodedMessageOutputStream, bp.m_dpbOutputDelayLength, symbol, "dpb_output_delay" ); + sei.m_picDpbOutputDelay = symbol; + if( bp.m_bpDecodingUnitHrdParamsPresentFlag ) + { + sei_read_code( pDecodedMessageOutputStream, bp.getDpbOutputDelayDuLength(), symbol, "pic_dpb_output_du_delay" ); + sei.m_picDpbOutputDuDelay = symbol; + } + if( bp.m_bpDecodingUnitHrdParamsPresentFlag && bp.m_decodingUnitCpbParamsInPicTimingSeiFlag ) + { + sei_read_uvlc( pDecodedMessageOutputStream, symbol, "num_decoding_units_minus1" ); + sei.m_numDecodingUnitsMinus1 = symbol; + sei.m_numNalusInDuMinus1.resize(sei.m_numDecodingUnitsMinus1 + 1 ); + sei.m_duCpbRemovalDelayMinus1.resize( (sei.m_numDecodingUnitsMinus1 + 1) * bp.m_bpMaxSubLayers ); + + sei_read_flag( pDecodedMessageOutputStream, symbol, "du_common_cpb_removal_delay_flag" ); + sei.m_duCommonCpbRemovalDelayFlag = symbol; + if( sei.m_duCommonCpbRemovalDelayFlag ) { - sei_read_uvlc( pDecodedMessageOutputStream, code, "num_decoding_units_minus1"); - sei.m_numDecodingUnitsMinus1 = code; - sei_read_flag( pDecodedMessageOutputStream, code, "du_common_cpb_removal_delay_flag" ); - sei.m_duCommonCpbRemovalDelayFlag = code; - if( sei.m_duCommonCpbRemovalDelayFlag ) + for( int i = temporalId; i < bp.m_bpMaxSubLayers - 1; i ++ ) { - sei_read_code( pDecodedMessageOutputStream, ( hrd->getDuCpbRemovalDelayLengthMinus1() + 1 ), code, "du_common_cpb_removal_delay_increment_minus1" ); - sei.m_duCommonCpbRemovalDelayMinus1 = code; + if( sei.m_ptSubLayerDelaysPresentFlag[i] ) + { + sei_read_code( pDecodedMessageOutputStream, bp.getDuCpbRemovalDelayIncrementLength(), symbol, "du_common_cpb_removal_delay_increment_minus1[i]" ); + sei.m_duCommonCpbRemovalDelayMinus1[i] = symbol; + } } - sei.m_numNalusInDuMinus1.resize(sei.m_numDecodingUnitsMinus1 + 1 ); - sei.m_duCpbRemovalDelayMinus1.resize( sei.m_numDecodingUnitsMinus1 + 1 ); - - for( i = 0; i <= sei.m_numDecodingUnitsMinus1; i ++ ) + } + for( int i = 0; i <= sei.m_numDecodingUnitsMinus1; i ++ ) + { + sei_read_uvlc( pDecodedMessageOutputStream, symbol, "num_nalus_in_du_minus1[i]" ); + sei.m_numNalusInDuMinus1[i] = symbol; + if( !sei.m_duCommonCpbRemovalDelayFlag && i < sei.m_numDecodingUnitsMinus1 ) { - sei_read_uvlc( pDecodedMessageOutputStream, code, "num_nalus_in_du_minus1[i]"); - sei.m_numNalusInDuMinus1[ i ] = code; - if( ( !sei.m_duCommonCpbRemovalDelayFlag ) && ( i < sei.m_numDecodingUnitsMinus1 ) ) + for( int j = temporalId; j < bp.m_bpMaxSubLayers - 1; j ++ ) { - sei_read_code( pDecodedMessageOutputStream, ( hrd->getDuCpbRemovalDelayLengthMinus1() + 1 ), code, "du_cpb_removal_delay_minus1[i]" ); - sei.m_duCpbRemovalDelayMinus1[ i ] = code; + if( sei.m_ptSubLayerDelaysPresentFlag[j] ) + { + sei_read_code( pDecodedMessageOutputStream, bp.getDuCpbRemovalDelayIncrementLength(), symbol, "du_cpb_removal_delay_increment_minus1[i][j]" ); + sei.m_duCpbRemovalDelayMinus1[i * bp.m_bpMaxSubLayers + j] = symbol; + } } } } } } -void SEIReader::xParseSEIRecoveryPoint(SEIRecoveryPoint& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream) +void SEIReader::xParseSEIFrameFieldinfo(SEIFrameFieldInfo& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream) { - int iCode; - uint32_t uiCode; output_sei_message_header(sei, pDecodedMessageOutputStream, payloadSize); - sei_read_svlc( pDecodedMessageOutputStream, iCode, "recovery_poc_cnt" ); sei.m_recoveryPocCnt = iCode; - sei_read_flag( pDecodedMessageOutputStream, uiCode, "exact_matching_flag" ); sei.m_exactMatchingFlag = uiCode; - sei_read_flag( pDecodedMessageOutputStream, uiCode, "broken_link_flag" ); sei.m_brokenLinkFlag = uiCode; + uint32_t symbol; + sei_read_flag( pDecodedMessageOutputStream, symbol, "field_pic_flag" ); + sei.m_fieldPicFlag= symbol; + if (sei.m_fieldPicFlag) + { + sei_read_flag( pDecodedMessageOutputStream, symbol, "bottom_field_flag" ); + sei.m_bottomFieldFlag = symbol; + sei_read_flag( pDecodedMessageOutputStream, symbol, "pairing_indicated_flag" ); + sei.m_pairingIndicatedFlag = symbol; + if (sei.m_pairingIndicatedFlag) + { + sei_read_flag( pDecodedMessageOutputStream, symbol, "paired_with_next_field_flag" ); + sei.m_pairedWithNextFieldFlag = symbol; + } + } + else + { + sei_read_flag( pDecodedMessageOutputStream, symbol, "display_fields_from_frame_flag" ); + sei.m_displayFieldsFromFrameFlag = symbol; + if (sei.m_displayFieldsFromFrameFlag) + { + sei_read_flag( pDecodedMessageOutputStream, symbol, "display_fields_from_frame_flag" ); + sei.m_topFieldFirstFlag = symbol; + } + sei_read_uvlc( pDecodedMessageOutputStream, symbol, "display_elemental_periods_minus1" ); + sei.m_displayElementalPeriodsMinus1 = symbol; + } + sei_read_code( pDecodedMessageOutputStream, 2, symbol, "source_scan_type" ); + sei.m_sourceScanType = symbol; + sei_read_flag( pDecodedMessageOutputStream, symbol, "duplicate_flag" ); + sei.m_duplicateFlag = symbol; +} + +void SEIReader::xParseSEIDependentRAPIndication( SEIDependentRAPIndication& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream ) +{ + output_sei_message_header(sei, pDecodedMessageOutputStream, payloadSize); } + void SEIReader::xParseSEIFramePacking(SEIFramePacking& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream) { uint32_t val; @@ -648,540 +776,398 @@ void SEIReader::xParseSEIFramePacking(SEIFramePacking& sei, uint32_t payloadSize } sei_read_flag( pDecodedMessageOutputStream, val, "upsampled_aspect_ratio_flag" ); sei.m_upsampledAspectRatio = val; } - -void SEIReader::xParseSEISegmentedRectFramePacking(SEISegmentedRectFramePacking& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream) +void SEIReader::xParseSEIMasteringDisplayColourVolume(SEIMasteringDisplayColourVolume& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream) { - uint32_t val; + uint32_t code; output_sei_message_header(sei, pDecodedMessageOutputStream, payloadSize); - sei_read_flag( pDecodedMessageOutputStream, val, "segmented_rect_frame_packing_arrangement_cancel_flag" ); sei.m_arrangementCancelFlag = val; - if( !sei.m_arrangementCancelFlag ) - { - sei_read_code( pDecodedMessageOutputStream, 2, val, "segmented_rect_content_interpretation_type" ); sei.m_contentInterpretationType = val; - sei_read_flag( pDecodedMessageOutputStream, val, "segmented_rect_frame_packing_arrangement_persistence" ); sei.m_arrangementPersistenceFlag = val; - } + + sei_read_code( pDecodedMessageOutputStream, 16, code, "display_primaries_x[0]" ); sei.values.primaries[0][0] = code; + sei_read_code( pDecodedMessageOutputStream, 16, code, "display_primaries_y[0]" ); sei.values.primaries[0][1] = code; + + sei_read_code( pDecodedMessageOutputStream, 16, code, "display_primaries_x[1]" ); sei.values.primaries[1][0] = code; + sei_read_code( pDecodedMessageOutputStream, 16, code, "display_primaries_y[1]" ); sei.values.primaries[1][1] = code; + + sei_read_code( pDecodedMessageOutputStream, 16, code, "display_primaries_x[2]" ); sei.values.primaries[2][0] = code; + sei_read_code( pDecodedMessageOutputStream, 16, code, "display_primaries_y[2]" ); sei.values.primaries[2][1] = code; + + + sei_read_code( pDecodedMessageOutputStream, 16, code, "white_point_x" ); sei.values.whitePoint[0] = code; + sei_read_code( pDecodedMessageOutputStream, 16, code, "white_point_y" ); sei.values.whitePoint[1] = code; + + sei_read_code( pDecodedMessageOutputStream, 32, code, "max_display_mastering_luminance" ); sei.values.maxLuminance = code; + sei_read_code( pDecodedMessageOutputStream, 32, code, "min_display_mastering_luminance" ); sei.values.minLuminance = code; } -void SEIReader::xParseSEIDisplayOrientation(SEIDisplayOrientation& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream) +#if U0033_ALTERNATIVE_TRANSFER_CHARACTERISTICS_SEI +void SEIReader::xParseSEIAlternativeTransferCharacteristics(SEIAlternativeTransferCharacteristics& sei, uint32_t payloadSize, std::ostream* pDecodedMessageOutputStream) +{ + uint32_t code; + output_sei_message_header(sei, pDecodedMessageOutputStream, payloadSize); + + sei_read_code(pDecodedMessageOutputStream, 8, code, "preferred_transfer_characteristics"); sei.m_preferredTransferCharacteristics = code; +} +#endif +void SEIReader::xParseSEIUserDataRegistered(SEIUserDataRegistered& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream) { - uint32_t val; output_sei_message_header(sei, pDecodedMessageOutputStream, payloadSize); - sei_read_flag( pDecodedMessageOutputStream, val, "display_orientation_cancel_flag" ); sei.cancelFlag = val; - if( !sei.cancelFlag ) + + uint32_t code; + assert(payloadSize>0); + sei_read_code(pDecodedMessageOutputStream, 8, code, "itu_t_t35_country_code"); payloadSize--; + if (code == 255) { - sei_read_flag( pDecodedMessageOutputStream, val, "hor_flip" ); sei.horFlip = val; - sei_read_flag( pDecodedMessageOutputStream, val, "ver_flip" ); sei.verFlip = val; - sei_read_code( pDecodedMessageOutputStream, 16, val, "anticlockwise_rotation" ); sei.anticlockwiseRotation = val; - sei_read_flag( pDecodedMessageOutputStream, val, "display_orientation_persistence_flag" ); sei.persistenceFlag = val; + assert(payloadSize>0); + sei_read_code(pDecodedMessageOutputStream, 8, code, "itu_t_t35_country_code_extension_byte"); payloadSize--; + code += 255; + } + sei.m_ituCountryCode = code; + sei.m_userData.resize(payloadSize); + for (uint32_t i = 0; i < sei.m_userData.size(); i++) + { + sei_read_code(NULL, 8, code, "itu_t_t35_payload_byte"); + sei.m_userData[i] = code; + } + if (pDecodedMessageOutputStream) + { + (*pDecodedMessageOutputStream) << " itu_t_t35 payload size: " << sei.m_userData.size() << "\n"; } } -void SEIReader::xParseSEITemporalLevel0Index(SEITemporalLevel0Index& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream) +void SEIReader::xParseSEIFilmGrainCharacteristics(SEIFilmGrainCharacteristics& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream) { - uint32_t val; + uint32_t code; output_sei_message_header(sei, pDecodedMessageOutputStream, payloadSize); - sei_read_code( pDecodedMessageOutputStream, 8, val, "temporal_sub_layer_zero_idx" ); sei.tl0Idx = val; - sei_read_code( pDecodedMessageOutputStream, 8, val, "irap_pic_id" ); sei.rapIdx = val; + + sei_read_flag(pDecodedMessageOutputStream, code, "film_grain_characteristics_cancel_flag"); sei.m_filmGrainCharacteristicsCancelFlag = code != 0; + if (!sei.m_filmGrainCharacteristicsCancelFlag) + { + sei_read_code(pDecodedMessageOutputStream, 2, code, "film_grain_model_id"); sei.m_filmGrainModelId = code; + sei_read_flag(pDecodedMessageOutputStream, code, "separate_colour_description_present_flag"); sei.m_separateColourDescriptionPresentFlag = code != 0; + if (sei.m_separateColourDescriptionPresentFlag) + { + sei_read_code(pDecodedMessageOutputStream, 3, code, "film_grain_bit_depth_luma_minus8"); sei.m_filmGrainBitDepthLumaMinus8 = code; + sei_read_code(pDecodedMessageOutputStream, 3, code, "film_grain_bit_depth_chroma_minus8"); sei.m_filmGrainBitDepthChromaMinus8 = code; + sei_read_flag(pDecodedMessageOutputStream, code, "film_grain_full_range_flag"); sei.m_filmGrainFullRangeFlag = code != 0; + sei_read_code(pDecodedMessageOutputStream, 8, code, "film_grain_colour_primaries"); sei.m_filmGrainColourPrimaries = code; + sei_read_code(pDecodedMessageOutputStream, 8, code, "film_grain_transfer_characteristics"); sei.m_filmGrainTransferCharacteristics = code; + sei_read_code(pDecodedMessageOutputStream, 8, code, "film_grain_matrix_coeffs"); sei.m_filmGrainMatrixCoeffs = code; + } + sei_read_code(pDecodedMessageOutputStream, 2, code, "blending_mode_id"); sei.m_blendingModeId = code; + sei_read_code(pDecodedMessageOutputStream, 4, code, "log2_scale_factor"); sei.m_log2ScaleFactor = code; + for (int c = 0; c<3; c++) + { + sei_read_flag(pDecodedMessageOutputStream, code, "comp_model_present_flag[c]"); sei.m_compModel[c].presentFlag = code != 0; + } + for (int c = 0; c<3; c++) + { + SEIFilmGrainCharacteristics::CompModel &cm = sei.m_compModel[c]; + if (cm.presentFlag) + { + uint32_t numIntensityIntervals; + sei_read_code(pDecodedMessageOutputStream, 8, code, "num_intensity_intervals_minus1[c]"); numIntensityIntervals = code + 1; + sei_read_code(pDecodedMessageOutputStream, 3, code, "num_model_values_minus1[c]"); cm.numModelValues = code + 1; + cm.intensityValues.resize(numIntensityIntervals); + for (uint32_t interval = 0; interval<numIntensityIntervals; interval++) + { + SEIFilmGrainCharacteristics::CompModelIntensityValues &cmiv = cm.intensityValues[interval]; + sei_read_code(pDecodedMessageOutputStream, 8, code, "intensity_interval_lower_bound[c][i]"); cmiv.intensityIntervalLowerBound = code; + sei_read_code(pDecodedMessageOutputStream, 8, code, "intensity_interval_upper_bound[c][i]"); cmiv.intensityIntervalUpperBound = code; + cmiv.compModelValue.resize(cm.numModelValues); + for (uint32_t j = 0; j<cm.numModelValues; j++) + { + sei_read_svlc(pDecodedMessageOutputStream, cmiv.compModelValue[j], "comp_model_value[c][i]"); + } + } + } + } // for c + sei_read_flag(pDecodedMessageOutputStream, code, "film_grain_characteristics_persistence_flag"); sei.m_filmGrainCharacteristicsPersistenceFlag = code != 0; + } // cancel flag } -void SEIReader::xParseSEIRegionRefreshInfo(SEIGradualDecodingRefreshInfo& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream) +void SEIReader::xParseSEIContentLightLevelInfo(SEIContentLightLevelInfo& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream) { - uint32_t val; + uint32_t code; output_sei_message_header(sei, pDecodedMessageOutputStream, payloadSize); - sei_read_flag( pDecodedMessageOutputStream, val, "refreshed_region_flag" ); sei.m_gdrForegroundFlag = val ? 1 : 0; + + sei_read_code(pDecodedMessageOutputStream, 16, code, "max_content_light_level"); sei.m_maxContentLightLevel = code; + sei_read_code(pDecodedMessageOutputStream, 16, code, "max_pic_average_light_level"); sei.m_maxPicAverageLightLevel = code; } -void SEIReader::xParseSEINoDisplay(SEINoDisplay& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream) +void SEIReader::xParseSEIAmbientViewingEnvironment(SEIAmbientViewingEnvironment& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream) { + uint32_t code; output_sei_message_header(sei, pDecodedMessageOutputStream, payloadSize); - sei.m_noDisplay = true; + + sei_read_code(pDecodedMessageOutputStream, 32, code, "ambient_illuminance"); sei.m_ambientIlluminance = code; + sei_read_code(pDecodedMessageOutputStream, 16, code, "ambient_light_x"); sei.m_ambientLightX = (uint16_t)code; + sei_read_code(pDecodedMessageOutputStream, 16, code, "ambient_light_y"); sei.m_ambientLightY = (uint16_t)code; } -void SEIReader::xParseSEIToneMappingInfo(SEIToneMappingInfo& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream) +void SEIReader::xParseSEIContentColourVolume(SEIContentColourVolume& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream) { int i; uint32_t val; output_sei_message_header(sei, pDecodedMessageOutputStream, payloadSize); - sei_read_uvlc( pDecodedMessageOutputStream, val, "tone_map_id" ); sei.m_toneMapId = val; - sei_read_flag( pDecodedMessageOutputStream, val, "tone_map_cancel_flag" ); sei.m_toneMapCancelFlag = val; - if ( !sei.m_toneMapCancelFlag ) + sei_read_flag(pDecodedMessageOutputStream, val, "ccv_cancel_flag"); sei.m_ccvCancelFlag = val; + if (!sei.m_ccvCancelFlag) { - sei_read_flag( pDecodedMessageOutputStream, val, "tone_map_persistence_flag" ); sei.m_toneMapPersistenceFlag = val; - sei_read_code( pDecodedMessageOutputStream, 8, val, "coded_data_bit_depth" ); sei.m_codedDataBitDepth = val; - sei_read_code( pDecodedMessageOutputStream, 8, val, "target_bit_depth" ); sei.m_targetBitDepth = val; - sei_read_uvlc( pDecodedMessageOutputStream, val, "tone_map_model_id" ); sei.m_modelId = val; - switch(sei.m_modelId) + int iVal; + sei_read_flag(pDecodedMessageOutputStream, val, "ccv_persistence_flag"); sei.m_ccvPersistenceFlag = val; + sei_read_flag(pDecodedMessageOutputStream, val, "ccv_primaries_present_flag"); sei.m_ccvPrimariesPresentFlag = val; + sei_read_flag(pDecodedMessageOutputStream, val, "ccv_min_luminance_value_present_flag"); sei.m_ccvMinLuminanceValuePresentFlag = val; + sei_read_flag(pDecodedMessageOutputStream, val, "ccv_max_luminance_value_present_flag"); sei.m_ccvMaxLuminanceValuePresentFlag = val; + sei_read_flag(pDecodedMessageOutputStream, val, "ccv_avg_luminance_value_present_flag"); sei.m_ccvAvgLuminanceValuePresentFlag = val; + + if (sei.m_ccvPrimariesPresentFlag) { - case 0: - { - sei_read_code( pDecodedMessageOutputStream, 32, val, "min_value" ); sei.m_minValue = val; - sei_read_code( pDecodedMessageOutputStream, 32, val, "max_value" ); sei.m_maxValue = val; - break; - } - case 1: - { - sei_read_code( pDecodedMessageOutputStream, 32, val, "sigmoid_midpoint" ); sei.m_sigmoidMidpoint = val; - sei_read_code( pDecodedMessageOutputStream, 32, val, "sigmoid_width" ); sei.m_sigmoidWidth = val; - break; - } - case 2: - { - uint32_t num = 1u << sei.m_targetBitDepth; - sei.m_startOfCodedInterval.resize(num+1); - for(i = 0; i < num; i++) - { - sei_read_code( pDecodedMessageOutputStream, ((( sei.m_codedDataBitDepth + 7 ) >> 3 ) << 3), val, "start_of_coded_interval[i]" ); - sei.m_startOfCodedInterval[i] = val; - } - sei.m_startOfCodedInterval[num] = 1u << sei.m_codedDataBitDepth; - break; - } - case 3: + for (i = 0; i < MAX_NUM_COMPONENT; i++) { - sei_read_code( pDecodedMessageOutputStream, 16, val, "num_pivots" ); sei.m_numPivots = val; - sei.m_codedPivotValue.resize(sei.m_numPivots); - sei.m_targetPivotValue.resize(sei.m_numPivots); - for(i = 0; i < sei.m_numPivots; i++ ) - { - sei_read_code( pDecodedMessageOutputStream, ((( sei.m_codedDataBitDepth + 7 ) >> 3 ) << 3), val, "coded_pivot_value[i]" ); - sei.m_codedPivotValue[i] = val; - sei_read_code( pDecodedMessageOutputStream, ((( sei.m_targetBitDepth + 7 ) >> 3 ) << 3), val, "target_pivot_value[i]" ); - sei.m_targetPivotValue[i] = val; - } - break; + sei_read_scode(pDecodedMessageOutputStream, 32, iVal, "ccv_primaries_x[i]"); sei.m_ccvPrimariesX[i] = iVal; + sei_read_scode(pDecodedMessageOutputStream, 32, iVal, "ccv_primaries_y[i]"); sei.m_ccvPrimariesY[i] = iVal; } - case 4: - { - sei_read_code( pDecodedMessageOutputStream, 8, val, "camera_iso_speed_idc" ); sei.m_cameraIsoSpeedIdc = val; - if( sei.m_cameraIsoSpeedIdc == 255) //Extended_ISO - { - sei_read_code( pDecodedMessageOutputStream, 32, val, "camera_iso_speed_value" ); sei.m_cameraIsoSpeedValue = val; - } - sei_read_code( pDecodedMessageOutputStream, 8, val, "exposure_index_idc" ); sei.m_exposureIndexIdc = val; - if( sei.m_exposureIndexIdc == 255) //Extended_ISO - { - sei_read_code( pDecodedMessageOutputStream, 32, val, "exposure_index_value" ); sei.m_exposureIndexValue = val; - } - sei_read_flag( pDecodedMessageOutputStream, val, "exposure_compensation_value_sign_flag" ); sei.m_exposureCompensationValueSignFlag = val; - sei_read_code( pDecodedMessageOutputStream, 16, val, "exposure_compensation_value_numerator" ); sei.m_exposureCompensationValueNumerator = val; - sei_read_code( pDecodedMessageOutputStream, 16, val, "exposure_compensation_value_denom_idc" ); sei.m_exposureCompensationValueDenomIdc = val; - sei_read_code( pDecodedMessageOutputStream, 32, val, "ref_screen_luminance_white" ); sei.m_refScreenLuminanceWhite = val; - sei_read_code( pDecodedMessageOutputStream, 32, val, "extended_range_white_level" ); sei.m_extendedRangeWhiteLevel = val; - sei_read_code( pDecodedMessageOutputStream, 16, val, "nominal_black_level_code_value" ); sei.m_nominalBlackLevelLumaCodeValue = val; - sei_read_code( pDecodedMessageOutputStream, 16, val, "nominal_white_level_code_value" ); sei.m_nominalWhiteLevelLumaCodeValue= val; - sei_read_code( pDecodedMessageOutputStream, 16, val, "extended_white_level_code_value" ); sei.m_extendedWhiteLevelLumaCodeValue = val; - break; - } - default: - { - THROW("Undefined SEIToneMapModelId"); - break; - } - }//switch model id - }// if(!sei.m_toneMapCancelFlag) -} - -void SEIReader::xParseSEISOPDescription(SEISOPDescription &sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream) -{ - int iCode; - uint32_t uiCode; - output_sei_message_header(sei, pDecodedMessageOutputStream, payloadSize); - - sei_read_uvlc( pDecodedMessageOutputStream, uiCode, "sop_seq_parameter_set_id" ); sei.m_sopSeqParameterSetId = uiCode; - sei_read_uvlc( pDecodedMessageOutputStream, uiCode, "num_pics_in_sop_minus1" ); sei.m_numPicsInSopMinus1 = uiCode; - for (uint32_t i = 0; i <= sei.m_numPicsInSopMinus1; i++) - { - sei_read_code( pDecodedMessageOutputStream, 6, uiCode, "sop_vcl_nut[i]" ); sei.m_sopDescVclNaluType[i] = uiCode; - sei_read_code( pDecodedMessageOutputStream, 3, sei.m_sopDescTemporalId[i], "sop_temporal_id[i]" ); sei.m_sopDescTemporalId[i] = uiCode; - if (sei.m_sopDescVclNaluType[i] != NAL_UNIT_CODED_SLICE_IDR_W_RADL && sei.m_sopDescVclNaluType[i] != NAL_UNIT_CODED_SLICE_IDR_N_LP) + } + if (sei.m_ccvMinLuminanceValuePresentFlag) + { + sei_read_code(pDecodedMessageOutputStream, 32, val, "ccv_min_luminance_value"); sei.m_ccvMinLuminanceValue = val; + } + if (sei.m_ccvMaxLuminanceValuePresentFlag) { - sei_read_uvlc( pDecodedMessageOutputStream, sei.m_sopDescStRpsIdx[i], "sop_short_term_rps_idx[i]" ); sei.m_sopDescStRpsIdx[i] = uiCode; + sei_read_code(pDecodedMessageOutputStream, 32, val, "ccv_max_luminance_value"); sei.m_ccvMaxLuminanceValue = val; } - if (i > 0) + if (sei.m_ccvAvgLuminanceValuePresentFlag) { - sei_read_svlc( pDecodedMessageOutputStream, iCode, "sop_poc_delta[i]" ); sei.m_sopDescPocDelta[i] = iCode; + sei_read_code(pDecodedMessageOutputStream, 32, val, "ccv_avg_luminance_value"); sei.m_ccvAvgLuminanceValue = val; } } } - -void SEIReader::xParseSEIScalableNesting(SEIScalableNesting& sei, const NalUnitType nalUnitType, uint32_t payloadSize, const SPS *sps, std::ostream *pDecodedMessageOutputStream) +void SEIReader::xParseSEIEquirectangularProjection(SEIEquirectangularProjection& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream) { - uint32_t uiCode; - SEIMessages seis; + uint32_t val; output_sei_message_header(sei, pDecodedMessageOutputStream, payloadSize); - sei_read_flag( pDecodedMessageOutputStream, uiCode, "bitstream_subset_flag" ); sei.m_bitStreamSubsetFlag = uiCode; - sei_read_flag( pDecodedMessageOutputStream, uiCode, "nesting_op_flag" ); sei.m_nestingOpFlag = uiCode; - if (sei.m_nestingOpFlag) - { - sei_read_flag( pDecodedMessageOutputStream, uiCode, "default_op_flag" ); sei.m_defaultOpFlag = uiCode; - sei_read_uvlc( pDecodedMessageOutputStream, uiCode, "nesting_num_ops_minus1" ); sei.m_nestingNumOpsMinus1 = uiCode; - for (uint32_t i = sei.m_defaultOpFlag; i <= sei.m_nestingNumOpsMinus1; i++) - { - sei_read_code( pDecodedMessageOutputStream, 3, uiCode, "nesting_max_temporal_id_plus1[i]" ); sei.m_nestingMaxTemporalIdPlus1[i] = uiCode; - sei_read_uvlc( pDecodedMessageOutputStream, uiCode, "nesting_op_idx[i]" ); sei.m_nestingOpIdx[i] = uiCode; - } - } - else + sei_read_flag( pDecodedMessageOutputStream, val, "erp_cancel_flag" ); sei.m_erpCancelFlag = val; + if( !sei.m_erpCancelFlag ) { - sei_read_flag( pDecodedMessageOutputStream, uiCode, "all_layers_flag" ); sei.m_allLayersFlag = uiCode; - if (!sei.m_allLayersFlag) + sei_read_flag( pDecodedMessageOutputStream, val, "erp_persistence_flag" ); sei.m_erpPersistenceFlag = val; + sei_read_flag( pDecodedMessageOutputStream, val, "erp_guard_band_flag" ); sei.m_erpGuardBandFlag = val; + sei_read_code( pDecodedMessageOutputStream, 2, val, "erp_reserved_zero_2bits" ); + if ( sei.m_erpGuardBandFlag == 1) { - sei_read_code( pDecodedMessageOutputStream, 3, uiCode, "nesting_no_op_max_temporal_id_plus1" ); sei.m_nestingNoOpMaxTemporalIdPlus1 = uiCode; - sei_read_uvlc( pDecodedMessageOutputStream, uiCode, "nesting_num_layers_minus1" ); sei.m_nestingNumLayersMinus1 = uiCode; - for (uint32_t i = 0; i <= sei.m_nestingNumLayersMinus1; i++) - { - sei_read_code( pDecodedMessageOutputStream, 6, uiCode, "nesting_layer_id[i]" ); sei.m_nestingLayerId[i] = uiCode; - } + sei_read_code( pDecodedMessageOutputStream, 3, val, "erp_guard_band_type" ); sei.m_erpGuardBandType = val; + sei_read_code( pDecodedMessageOutputStream, 8, val, "erp_left_guard_band_width" ); sei.m_erpLeftGuardBandWidth = val; + sei_read_code( pDecodedMessageOutputStream, 8, val, "erp_right_guard_band_width"); sei.m_erpRightGuardBandWidth = val; } } +} - // byte alignment - while ( m_pcBitstream->getNumBitsRead() % 8 != 0 ) - { - uint32_t code; - sei_read_flag( pDecodedMessageOutputStream, code, "nesting_zero_bit" ); - } - - // read nested SEI messages - do - { - xReadSEImessage(sei.m_nestedSEIs, nalUnitType, sps, pDecodedMessageOutputStream); - } while (m_pcBitstream->getNumBitsLeft() > 8); - - if (pDecodedMessageOutputStream) +void SEIReader::xParseSEISphereRotation(SEISphereRotation& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream) +{ + uint32_t val; + int sval; + output_sei_message_header(sei, pDecodedMessageOutputStream, payloadSize); + sei_read_flag( pDecodedMessageOutputStream, val, "sphere_rotation_cancel_flag" ); sei.m_sphereRotationCancelFlag = val; + if( !sei.m_sphereRotationCancelFlag ) { - (*pDecodedMessageOutputStream) << "End of scalable nesting SEI message\n"; + sei_read_flag ( pDecodedMessageOutputStream, val, "sphere_rotation_persistence_flag" ); sei.m_sphereRotationPersistenceFlag = val; + sei_read_code ( pDecodedMessageOutputStream, 6, val, "sphere_rotation_reserved_zero_6bits" ); + sei_read_scode( pDecodedMessageOutputStream, 32, sval, "sphere_rotation_yaw" ); sei.m_sphereRotationYaw = sval; + sei_read_scode( pDecodedMessageOutputStream, 32, sval, "sphere_rotation_pitch" ); sei.m_sphereRotationPitch = sval; + sei_read_scode( pDecodedMessageOutputStream, 32, sval, "sphere_rotation_roll" ); sei.m_sphereRotationRoll = sval; } } -#if HEVC_TILES_WPP -void SEIReader::xParseSEITempMotionConstraintsTileSets(SEITempMotionConstrainedTileSets& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream) +void SEIReader::xParseSEIOmniViewport(SEIOmniViewport& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream) { uint32_t code; + int scode; output_sei_message_header(sei, pDecodedMessageOutputStream, payloadSize); - sei_read_flag( pDecodedMessageOutputStream, code, "mc_all_tiles_exact_sample_value_match_flag"); sei.m_mc_all_tiles_exact_sample_value_match_flag = (code != 0); - sei_read_flag( pDecodedMessageOutputStream, code, "each_tile_one_tile_set_flag"); sei.m_each_tile_one_tile_set_flag = (code != 0); - - if(!sei.m_each_tile_one_tile_set_flag) + sei_read_code( pDecodedMessageOutputStream, 10, code, "omni_viewport_id" ); sei.m_omniViewportId = code; + sei_read_flag( pDecodedMessageOutputStream, code, "omni_viewport_cancel_flag" ); sei.m_omniViewportCancelFlag = code; + if (!sei.m_omniViewportCancelFlag) { - sei_read_flag( pDecodedMessageOutputStream, code, "limited_tile_set_display_flag"); sei.m_limited_tile_set_display_flag = (code != 0); - sei_read_uvlc( pDecodedMessageOutputStream, code, "num_sets_in_message_minus1"); sei.setNumberOfTileSets(code + 1); - - if(sei.getNumberOfTileSets() != 0) + uint32_t numRegions; + sei_read_flag( pDecodedMessageOutputStream, code, "omni_viewport_persistence_flag" ); sei.m_omniViewportPersistenceFlag = code; + sei_read_code( pDecodedMessageOutputStream, 4, numRegions, "omni_viewport_cnt_minus1" ); numRegions++; + sei.m_omniViewportRegions.resize(numRegions); + for(uint32_t region=0; region<numRegions; region++) { - for(int i = 0; i < sei.getNumberOfTileSets(); i++) - { - sei_read_uvlc( pDecodedMessageOutputStream, code, "mcts_id"); sei.tileSetData(i).m_mcts_id = code; - - if(sei.m_limited_tile_set_display_flag) - { - sei_read_flag( pDecodedMessageOutputStream, code, "display_tile_set_flag"); sei.tileSetData(i).m_display_tile_set_flag = (code != 1); - } - - sei_read_uvlc( pDecodedMessageOutputStream, code, "num_tile_rects_in_set_minus1"); sei.tileSetData(i).setNumberOfTileRects(code + 1); - - for(int j=0; j<sei.tileSetData(i).getNumberOfTileRects(); j++) - { - sei_read_uvlc( pDecodedMessageOutputStream, code, "top_left_tile_index"); sei.tileSetData(i).topLeftTileIndex(j) = code; - sei_read_uvlc( pDecodedMessageOutputStream, code, "bottom_right_tile_index"); sei.tileSetData(i).bottomRightTileIndex(j) = code; - } - - if(!sei.m_mc_all_tiles_exact_sample_value_match_flag) - { - sei_read_flag( pDecodedMessageOutputStream, code, "exact_sample_value_match_flag"); sei.tileSetData(i).m_exact_sample_value_match_flag = (code != 0); - } - sei_read_flag( pDecodedMessageOutputStream, code, "mcts_tier_level_idc_present_flag"); sei.tileSetData(i).m_mcts_tier_level_idc_present_flag = (code != 0); - - if(sei.tileSetData(i).m_mcts_tier_level_idc_present_flag) - { - sei_read_flag( pDecodedMessageOutputStream, code, "mcts_tier_flag"); sei.tileSetData(i).m_mcts_tier_flag = (code != 0); - sei_read_code( pDecodedMessageOutputStream, 8, code, "mcts_level_idc"); sei.tileSetData(i).m_mcts_level_idc = code; - } - } - } + SEIOmniViewport::OmniViewport &viewport = sei.m_omniViewportRegions[region]; + sei_read_scode( pDecodedMessageOutputStream, 32, scode, "omni_viewport_azimuth_centre" ); viewport.azimuthCentre = scode; + sei_read_scode( pDecodedMessageOutputStream, 32, scode, "omni_viewport_elevation_centre" ); viewport.elevationCentre = scode; + sei_read_scode( pDecodedMessageOutputStream, 32, scode, "omni_viewport_tilt_centre" ); viewport.tiltCentre = code; + sei_read_code( pDecodedMessageOutputStream, 32, code, "omni_viewport_hor_range" ); viewport.horRange = code; + sei_read_code( pDecodedMessageOutputStream, 32, code, "omni_viewport_ver_range" ); viewport.verRange = code; + } } else { - sei_read_flag( pDecodedMessageOutputStream, code, "max_mcs_tier_level_idc_present_flag"); sei.m_max_mcs_tier_level_idc_present_flag = code; - if(sei.m_max_mcs_tier_level_idc_present_flag) - { - sei_read_flag( pDecodedMessageOutputStream, code, "max_mcts_tier_flag"); sei.m_max_mcts_tier_flag = code; - sei_read_code( pDecodedMessageOutputStream, 8, code, "max_mcts_level_idc"); sei.m_max_mcts_level_idc = code; - } + sei.m_omniViewportRegions.clear(); + sei.m_omniViewportPersistenceFlag=false; } } -#endif -void SEIReader::xParseSEITimeCode(SEITimeCode& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream) +void SEIReader::xParseSEIRegionWisePacking(SEIRegionWisePacking& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream) { - uint32_t code; output_sei_message_header(sei, pDecodedMessageOutputStream, payloadSize); - sei_read_code( pDecodedMessageOutputStream, 2, code, "num_clock_ts"); sei.numClockTs = code; - for(int i = 0; i < sei.numClockTs; i++) + uint32_t val; + + sei_read_flag( pDecodedMessageOutputStream, val, "rwp_cancel_flag" ); sei.m_rwpCancelFlag = val; + if (!sei.m_rwpCancelFlag) { - SEITimeSet currentTimeSet; - sei_read_flag( pDecodedMessageOutputStream, code, "clock_time_stamp_flag[i]"); currentTimeSet.clockTimeStampFlag = code; - if(currentTimeSet.clockTimeStampFlag) + sei_read_flag( pDecodedMessageOutputStream, val, "rwp_persistence_flag" ); sei.m_rwpPersistenceFlag = val; + sei_read_flag( pDecodedMessageOutputStream, val, "constituent_picture_matching_flag" ); sei.m_constituentPictureMatchingFlag = val; + sei_read_code( pDecodedMessageOutputStream, 5, val, "rwp_reserved_zero_5bits" ); + sei_read_code( pDecodedMessageOutputStream, 8, val, "num_packed_regions" ); sei.m_numPackedRegions = val; + sei_read_code( pDecodedMessageOutputStream, 32, val, "proj_picture_width" ); sei.m_projPictureWidth = val; + sei_read_code( pDecodedMessageOutputStream, 32, val, "proj_picture_height" ); sei.m_projPictureHeight = val; + sei_read_code( pDecodedMessageOutputStream, 16, val, "packed_picture_width" ); sei.m_packedPictureWidth = val; + sei_read_code( pDecodedMessageOutputStream, 16, val, "packed_picture_height" ); sei.m_packedPictureHeight = val; + + sei.m_rwpTransformType.resize(sei.m_numPackedRegions); + sei.m_rwpGuardBandFlag.resize(sei.m_numPackedRegions); + sei.m_projRegionWidth.resize(sei.m_numPackedRegions); + sei.m_projRegionHeight.resize(sei.m_numPackedRegions); + sei.m_rwpProjRegionTop.resize(sei.m_numPackedRegions); + sei.m_projRegionLeft.resize(sei.m_numPackedRegions); + sei.m_packedRegionWidth.resize(sei.m_numPackedRegions); + sei.m_packedRegionHeight.resize(sei.m_numPackedRegions); + sei.m_packedRegionTop.resize(sei.m_numPackedRegions); + sei.m_packedRegionLeft.resize(sei.m_numPackedRegions); + sei.m_rwpLeftGuardBandWidth.resize(sei.m_numPackedRegions); + sei.m_rwpRightGuardBandWidth.resize(sei.m_numPackedRegions); + sei.m_rwpTopGuardBandHeight.resize(sei.m_numPackedRegions); + sei.m_rwpBottomGuardBandHeight.resize(sei.m_numPackedRegions); + sei.m_rwpGuardBandNotUsedForPredFlag.resize(sei.m_numPackedRegions); + sei.m_rwpGuardBandType.resize(4*sei.m_numPackedRegions); + + for( int i=0; i < sei.m_numPackedRegions; i++ ) { - sei_read_flag( pDecodedMessageOutputStream, code, "nuit_field_based_flag"); currentTimeSet.numUnitFieldBasedFlag = code; - sei_read_code( pDecodedMessageOutputStream, 5, code, "counting_type"); currentTimeSet.countingType = code; - sei_read_flag( pDecodedMessageOutputStream, code, "full_timestamp_flag"); currentTimeSet.fullTimeStampFlag = code; - sei_read_flag( pDecodedMessageOutputStream, code, "discontinuity_flag"); currentTimeSet.discontinuityFlag = code; - sei_read_flag( pDecodedMessageOutputStream, code, "cnt_dropped_flag"); currentTimeSet.cntDroppedFlag = code; - sei_read_code( pDecodedMessageOutputStream, 9, code, "n_frames"); currentTimeSet.numberOfFrames = code; - if(currentTimeSet.fullTimeStampFlag) - { - sei_read_code( pDecodedMessageOutputStream, 6, code, "seconds_value"); currentTimeSet.secondsValue = code; - sei_read_code( pDecodedMessageOutputStream, 6, code, "minutes_value"); currentTimeSet.minutesValue = code; - sei_read_code( pDecodedMessageOutputStream, 5, code, "hours_value"); currentTimeSet.hoursValue = code; - } - else - { - sei_read_flag( pDecodedMessageOutputStream, code, "seconds_flag"); currentTimeSet.secondsFlag = code; - if(currentTimeSet.secondsFlag) - { - sei_read_code( pDecodedMessageOutputStream, 6, code, "seconds_value"); currentTimeSet.secondsValue = code; - sei_read_flag( pDecodedMessageOutputStream, code, "minutes_flag"); currentTimeSet.minutesFlag = code; - if(currentTimeSet.minutesFlag) - { - sei_read_code( pDecodedMessageOutputStream, 6, code, "minutes_value"); currentTimeSet.minutesValue = code; - sei_read_flag( pDecodedMessageOutputStream, code, "hours_flag"); currentTimeSet.hoursFlag = code; - if(currentTimeSet.hoursFlag) - { - sei_read_code( pDecodedMessageOutputStream, 5, code, "hours_value"); currentTimeSet.hoursValue = code; - } - } - } - } - sei_read_code( pDecodedMessageOutputStream, 5, code, "time_offset_length"); currentTimeSet.timeOffsetLength = code; - if(currentTimeSet.timeOffsetLength > 0) + sei_read_code( pDecodedMessageOutputStream, 4, val, "rwp_reserved_zero_4bits" ); + sei_read_code( pDecodedMessageOutputStream, 3, val, "rwp_tTransform_type" ); sei.m_rwpTransformType[i] = val; + sei_read_flag( pDecodedMessageOutputStream, val, "rwp_guard_band_flag" ); sei.m_rwpGuardBandFlag[i] = val; + sei_read_code( pDecodedMessageOutputStream, 32, val, "proj_region_width" ); sei.m_projRegionWidth[i] = val; + sei_read_code( pDecodedMessageOutputStream, 32, val, "proj_region_height" ); sei.m_projRegionHeight[i] = val; + sei_read_code( pDecodedMessageOutputStream, 32, val, "rwp_proj_regionTop" ); sei.m_rwpProjRegionTop[i] = val; + sei_read_code( pDecodedMessageOutputStream, 32, val, "proj_region_left" ); sei.m_projRegionLeft[i] = val; + sei_read_code( pDecodedMessageOutputStream, 16, val, "packed_region_width" ); sei.m_packedRegionWidth[i] = val; + sei_read_code( pDecodedMessageOutputStream, 16, val, "packed_region_height" ); sei.m_packedRegionHeight[i] = val; + sei_read_code( pDecodedMessageOutputStream, 16, val, "packed_region_top" ); sei.m_packedRegionTop[i] = val; + sei_read_code( pDecodedMessageOutputStream, 16, val, "packed_region_left" ); sei.m_packedRegionLeft[i] = val; + if( sei.m_rwpGuardBandFlag[i] ) { - sei_read_code( pDecodedMessageOutputStream, currentTimeSet.timeOffsetLength, code, "time_offset_value"); - if((code & (1 << (currentTimeSet.timeOffsetLength-1))) == 0) + sei_read_code( pDecodedMessageOutputStream, 8, val, "rwp_left_guard_band_width" ); sei.m_rwpLeftGuardBandWidth[i] = val; + sei_read_code( pDecodedMessageOutputStream, 8, val, "rwp_right_guard_band_width" ); sei.m_rwpRightGuardBandWidth[i] = val; + sei_read_code( pDecodedMessageOutputStream, 8, val, "rwp_top_guard_band_height" ); sei.m_rwpTopGuardBandHeight[i] = val; + sei_read_code( pDecodedMessageOutputStream, 8, val, "rwp_bottom_guard_band_height" ); sei. m_rwpBottomGuardBandHeight[i] = val; + sei_read_flag( pDecodedMessageOutputStream, val, "rwp_guard_band_not_used_forPred_flag" ); sei.m_rwpGuardBandNotUsedForPredFlag[i] = val; + for( int j=0; j < 4; j++ ) { - currentTimeSet.timeOffsetValue = code; - } - else - { - code &= (1<< (currentTimeSet.timeOffsetLength-1)) - 1; - currentTimeSet.timeOffsetValue = ~code + 1; + sei_read_code( pDecodedMessageOutputStream, 3, val, "rwp_guard_band_type" ); sei.m_rwpGuardBandType[i*4 + j] = val; } + sei_read_code( pDecodedMessageOutputStream, 3, val, "rwp_guard_band_reserved_zero_3bits" ); } } - sei.timeSetArray[i] = currentTimeSet; } } -void SEIReader::xParseSEIChromaResamplingFilterHint(SEIChromaResamplingFilterHint& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream) +void SEIReader::xParseSEIGeneralizedCubemapProjection(SEIGeneralizedCubemapProjection& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream) { - uint32_t uiCode; + uint32_t val; output_sei_message_header(sei, pDecodedMessageOutputStream, payloadSize); - sei_read_code( pDecodedMessageOutputStream, 8, uiCode, "ver_chroma_filter_idc"); sei.m_verChromaFilterIdc = uiCode; - sei_read_code( pDecodedMessageOutputStream, 8, uiCode, "hor_chroma_filter_idc"); sei.m_horChromaFilterIdc = uiCode; - sei_read_flag( pDecodedMessageOutputStream, uiCode, "ver_filtering_field_processing_flag"); sei.m_verFilteringFieldProcessingFlag = uiCode; - if(sei.m_verChromaFilterIdc == 1 || sei.m_horChromaFilterIdc == 1) + sei_read_flag( pDecodedMessageOutputStream, val, "gcmp_cancel_flag" ); sei.m_gcmpCancelFlag = val; + if (!sei.m_gcmpCancelFlag) { - sei_read_uvlc( pDecodedMessageOutputStream, uiCode, "target_format_idc"); sei.m_targetFormatIdc = uiCode; - if(sei.m_verChromaFilterIdc == 1) + sei_read_flag( pDecodedMessageOutputStream, val, "gcmp_persistence_flag" ); sei.m_gcmpPersistenceFlag = val; + sei_read_code( pDecodedMessageOutputStream, 3, val, "gcmp_packing_type" ); sei.m_gcmpPackingType = val; + sei_read_code( pDecodedMessageOutputStream, 2, val, "gcmp_mapping_function_type" ); sei.m_gcmpMappingFunctionType = val; + + int numFace = sei.m_gcmpPackingType == 4 || sei.m_gcmpPackingType == 5 ? 5 : 6; + sei.m_gcmpFaceIndex.resize(numFace); + sei.m_gcmpFaceRotation.resize(numFace); + if (sei.m_gcmpMappingFunctionType == 2) { - uint32_t numVerticalFilters; - sei_read_uvlc( pDecodedMessageOutputStream, numVerticalFilters, "num_vertical_filters"); sei.m_verFilterCoeff.resize(numVerticalFilters); - if(numVerticalFilters > 0) - { - for(int i = 0; i < numVerticalFilters; i++) - { - uint32_t verTapLengthMinus1; - sei_read_uvlc( pDecodedMessageOutputStream, verTapLengthMinus1, "ver_tap_length_minus_1"); sei.m_verFilterCoeff[i].resize(verTapLengthMinus1+1); - for(int j = 0; j < (verTapLengthMinus1 + 1); j++) - { - sei_read_svlc( pDecodedMessageOutputStream, sei.m_verFilterCoeff[i][j], "ver_filter_coeff"); - } - } - } + sei.m_gcmpFunctionCoeffU.resize(numFace); + sei.m_gcmpFunctionUAffectedByVFlag.resize(numFace); + sei.m_gcmpFunctionCoeffV.resize(numFace); + sei.m_gcmpFunctionVAffectedByUFlag.resize(numFace); } - if(sei.m_horChromaFilterIdc == 1) + + for (int i = 0; i < numFace; i++) { - uint32_t numHorizontalFilters; - sei_read_uvlc( pDecodedMessageOutputStream, numHorizontalFilters, "num_horizontal_filters"); sei.m_horFilterCoeff.resize(numHorizontalFilters); - if(numHorizontalFilters > 0) + sei_read_code( pDecodedMessageOutputStream, 3, val, "gcmp_face_index" ); sei.m_gcmpFaceIndex[i] = val; + sei_read_code( pDecodedMessageOutputStream, 2, val, "gcmp_face_rotation" ); sei.m_gcmpFaceRotation[i] = val; + if (sei.m_gcmpMappingFunctionType == 2) { - for(int i = 0; i < numHorizontalFilters; i++) - { - uint32_t horTapLengthMinus1; - sei_read_uvlc( pDecodedMessageOutputStream, horTapLengthMinus1, "hor_tap_length_minus_1"); sei.m_horFilterCoeff[i].resize(horTapLengthMinus1+1); - for(int j = 0; j < (horTapLengthMinus1 + 1); j++) - { - sei_read_svlc( pDecodedMessageOutputStream, sei.m_horFilterCoeff[i][j], "hor_filter_coeff"); - } - } + sei_read_code( pDecodedMessageOutputStream, 7, val, "gcmp_function_coeff_u" ); sei.m_gcmpFunctionCoeffU[i] = val; + sei_read_flag( pDecodedMessageOutputStream, val, "gcmp_function_u_affected_by_v_flag" ); sei.m_gcmpFunctionUAffectedByVFlag[i] = val; + sei_read_code( pDecodedMessageOutputStream, 7, val, "gcmp_function_coeff_v" ); sei.m_gcmpFunctionCoeffV[i] = val; + sei_read_flag( pDecodedMessageOutputStream, val, "gcmp_function_v_affected_by_u_flag" ); sei.m_gcmpFunctionVAffectedByUFlag[i] = val; } } - } -} - -void SEIReader::xParseSEIKneeFunctionInfo(SEIKneeFunctionInfo& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream) -{ - int i; - uint32_t val; - output_sei_message_header(sei, pDecodedMessageOutputStream, payloadSize); - - sei_read_uvlc( pDecodedMessageOutputStream, val, "knee_function_id" ); sei.m_kneeId = val; - sei_read_flag( pDecodedMessageOutputStream, val, "knee_function_cancel_flag" ); sei.m_kneeCancelFlag = val; - if ( !sei.m_kneeCancelFlag ) - { - sei_read_flag( pDecodedMessageOutputStream, val, "knee_function_persistence_flag" ); sei.m_kneePersistenceFlag = val; - sei_read_code( pDecodedMessageOutputStream, 32, val, "input_d_range" ); sei.m_kneeInputDrange = val; - sei_read_code( pDecodedMessageOutputStream, 32, val, "input_disp_luminance" ); sei.m_kneeInputDispLuminance = val; - sei_read_code( pDecodedMessageOutputStream, 32, val, "output_d_range" ); sei.m_kneeOutputDrange = val; - sei_read_code( pDecodedMessageOutputStream, 32, val, "output_disp_luminance" ); sei.m_kneeOutputDispLuminance = val; - sei_read_uvlc( pDecodedMessageOutputStream, val, "num_knee_points_minus1" ); sei.m_kneeNumKneePointsMinus1 = val; - CHECK( sei.m_kneeNumKneePointsMinus1 <= 0, "Invali state" ); - sei.m_kneeInputKneePoint.resize(sei.m_kneeNumKneePointsMinus1+1); - sei.m_kneeOutputKneePoint.resize(sei.m_kneeNumKneePointsMinus1+1); - for(i = 0; i <= sei.m_kneeNumKneePointsMinus1; i++ ) + sei_read_flag( pDecodedMessageOutputStream, val, "gcmp_guard_band_flag" ); sei.m_gcmpGuardBandFlag = val; + if (sei.m_gcmpGuardBandFlag) { - sei_read_code( pDecodedMessageOutputStream, 10, val, "input_knee_point" ); sei.m_kneeInputKneePoint[i] = val; - sei_read_code( pDecodedMessageOutputStream, 10, val, "output_knee_point" ); sei.m_kneeOutputKneePoint[i] = val; + sei_read_flag( pDecodedMessageOutputStream, val, "gcmp_guard_band_boundary_type" ); sei.m_gcmpGuardBandBoundaryType = val; + sei_read_code( pDecodedMessageOutputStream, 4, val, "gcmp_guard_band_samples_minus1" ); sei.m_gcmpGuardBandSamplesMinus1 = val; } } } -void SEIReader::xParseSEIColourRemappingInfo(SEIColourRemappingInfo& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream) +void SEIReader::xParseSEISubpictureLevelInfo(SEISubpicureLevelInfo& sei, const SPS *sps, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream) { - uint32_t uiVal; - int iVal; output_sei_message_header(sei, pDecodedMessageOutputStream, payloadSize); - - sei_read_uvlc( pDecodedMessageOutputStream, uiVal, "colour_remap_id" ); sei.m_colourRemapId = uiVal; - sei_read_flag( pDecodedMessageOutputStream, uiVal, "colour_remap_cancel_flag" ); sei.m_colourRemapCancelFlag = uiVal; - if( !sei.m_colourRemapCancelFlag ) + if (sps == nullptr) { - sei_read_flag( pDecodedMessageOutputStream, uiVal, "colour_remap_persistence_flag" ); sei.m_colourRemapPersistenceFlag = uiVal; - sei_read_flag( pDecodedMessageOutputStream, uiVal, "colour_remap_video_signal_info_present_flag" ); sei.m_colourRemapVideoSignalInfoPresentFlag = uiVal; - if ( sei.m_colourRemapVideoSignalInfoPresentFlag ) - { - sei_read_flag( pDecodedMessageOutputStream, uiVal, "colour_remap_full_range_flag" ); sei.m_colourRemapFullRangeFlag = uiVal; - sei_read_code( pDecodedMessageOutputStream, 8, uiVal, "colour_remap_primaries" ); sei.m_colourRemapPrimaries = uiVal; - sei_read_code( pDecodedMessageOutputStream, 8, uiVal, "colour_remap_transfer_function" ); sei.m_colourRemapTransferFunction = uiVal; - sei_read_code( pDecodedMessageOutputStream, 8, uiVal, "colour_remap_matrix_coefficients" ); sei.m_colourRemapMatrixCoefficients = uiVal; - } - sei_read_code( pDecodedMessageOutputStream, 8, uiVal, "colour_remap_input_bit_depth" ); sei.m_colourRemapInputBitDepth = uiVal; - sei_read_code( pDecodedMessageOutputStream, 8, uiVal, "colour_remap_bit_depth" ); sei.m_colourRemapBitDepth = uiVal; + fprintf (stderr, "no SPS available, not parsing Subpicture level information SEI"); + return; + } + uint32_t val; + sei_read_code( pDecodedMessageOutputStream, 4, val, "sli_seq_parameter_set_id" ); sei.m_sliSeqParameterSetId = val; + sei_read_code( pDecodedMessageOutputStream, 3, val, "num_ref_levels_minus1" ); sei.m_numRefLevels = val + 1; + sei_read_flag( pDecodedMessageOutputStream, val, "explicit_fraction_present_flag" ); sei.m_explicitFractionPresentFlag = val; + + sei.m_refLevelIdc.resize(sei.m_numRefLevels); + if (sei.m_explicitFractionPresentFlag) + { + sei.m_refLevelFraction.resize(sei.m_numRefLevels); + } - for( int c=0 ; c<3 ; c++ ) + for( int i = 0; i < sei.m_numRefLevels; i++ ) + { + sei_read_code( pDecodedMessageOutputStream, 8, val, "ref_level_idc[i]" ); sei.m_refLevelIdc[i] = (Level::Name) val; + if( sei.m_explicitFractionPresentFlag ) { - sei_read_code( pDecodedMessageOutputStream, 8, uiVal, "pre_lut_num_val_minus1[c]" ); sei.m_preLutNumValMinus1[c] = (uiVal==0) ? 1 : uiVal; - sei.m_preLut[c].resize(sei.m_preLutNumValMinus1[c]+1); - if( uiVal> 0 ) - { - for ( int i=0 ; i<=sei.m_preLutNumValMinus1[c] ; i++ ) - { - sei_read_code( pDecodedMessageOutputStream, (( sei.m_colourRemapInputBitDepth + 7 ) >> 3 ) << 3, uiVal, "pre_lut_coded_value[c][i]" ); sei.m_preLut[c][i].codedValue = uiVal; - sei_read_code( pDecodedMessageOutputStream, (( sei.m_colourRemapBitDepth + 7 ) >> 3 ) << 3, uiVal, "pre_lut_target_value[c][i]" ); sei.m_preLut[c][i].targetValue = uiVal; - } - } - else // pre_lut_num_val_minus1[c] == 0 - { - sei.m_preLut[c][0].codedValue = 0; - sei.m_preLut[c][0].targetValue = 0; - sei.m_preLut[c][1].codedValue = (1 << sei.m_colourRemapInputBitDepth) - 1 ; - sei.m_preLut[c][1].targetValue = (1 << sei.m_colourRemapBitDepth) - 1 ; - } - } + int numSubPics = sps->getNumSubPics(); + sei.m_refLevelFraction[i].resize(numSubPics); - sei_read_flag( pDecodedMessageOutputStream, uiVal, "colour_remap_matrix_present_flag" ); sei.m_colourRemapMatrixPresentFlag = uiVal; - if( sei.m_colourRemapMatrixPresentFlag ) - { - sei_read_code( pDecodedMessageOutputStream, 4, uiVal, "log2_matrix_denom" ); sei.m_log2MatrixDenom = uiVal; - for ( int c=0 ; c<3 ; c++ ) - { - for ( int i=0 ; i<3 ; i++ ) - { - sei_read_svlc( pDecodedMessageOutputStream, iVal, "colour_remap_coeffs[c][i]" ); sei.m_colourRemapCoeffs[c][i] = iVal; - } - } - } - else // setting default matrix (I3) - { - sei.m_log2MatrixDenom = 10; - for ( int c=0 ; c<3 ; c++ ) - { - for ( int i=0 ; i<3 ; i++ ) - { - sei.m_colourRemapCoeffs[c][i] = (c==i) << sei.m_log2MatrixDenom; - } - } - } - for( int c=0 ; c<3 ; c++ ) - { - sei_read_code( pDecodedMessageOutputStream, 8, uiVal, "post_lut_num_val_minus1[c]" ); sei.m_postLutNumValMinus1[c] = (uiVal==0) ? 1 : uiVal; - sei.m_postLut[c].resize(sei.m_postLutNumValMinus1[c]+1); - if( uiVal > 0 ) + for( int j = 0; j < numSubPics; j++ ) { - for ( int i=0 ; i<=sei.m_postLutNumValMinus1[c] ; i++ ) - { - sei_read_code( pDecodedMessageOutputStream, (( sei.m_colourRemapBitDepth + 7 ) >> 3 ) << 3, uiVal, "post_lut_coded_value[c][i]" ); sei.m_postLut[c][i].codedValue = uiVal; - sei_read_code( pDecodedMessageOutputStream, (( sei.m_colourRemapBitDepth + 7 ) >> 3 ) << 3, uiVal, "post_lut_target_value[c][i]" ); sei.m_postLut[c][i].targetValue = uiVal; - } - } - else - { - sei.m_postLut[c][0].codedValue = 0; - sei.m_postLut[c][0].targetValue = 0; - sei.m_postLut[c][1].targetValue = (1 << sei.m_colourRemapBitDepth) - 1; - sei.m_postLut[c][1].codedValue = (1 << sei.m_colourRemapBitDepth) - 1; + sei_read_code( pDecodedMessageOutputStream, 8, val, "ref_level_fraction_minus1[i][j]" ); sei.m_refLevelFraction[i][j]= val; } } } } -void SEIReader::xParseSEIMasteringDisplayColourVolume(SEIMasteringDisplayColourVolume& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream) -{ - uint32_t code; - output_sei_message_header(sei, pDecodedMessageOutputStream, payloadSize); - - sei_read_code( pDecodedMessageOutputStream, 16, code, "display_primaries_x[0]" ); sei.values.primaries[0][0] = code; - sei_read_code( pDecodedMessageOutputStream, 16, code, "display_primaries_y[0]" ); sei.values.primaries[0][1] = code; - - sei_read_code( pDecodedMessageOutputStream, 16, code, "display_primaries_x[1]" ); sei.values.primaries[1][0] = code; - sei_read_code( pDecodedMessageOutputStream, 16, code, "display_primaries_y[1]" ); sei.values.primaries[1][1] = code; - - sei_read_code( pDecodedMessageOutputStream, 16, code, "display_primaries_x[2]" ); sei.values.primaries[2][0] = code; - sei_read_code( pDecodedMessageOutputStream, 16, code, "display_primaries_y[2]" ); sei.values.primaries[2][1] = code; - - - sei_read_code( pDecodedMessageOutputStream, 16, code, "white_point_x" ); sei.values.whitePoint[0] = code; - sei_read_code( pDecodedMessageOutputStream, 16, code, "white_point_y" ); sei.values.whitePoint[1] = code; - - sei_read_code( pDecodedMessageOutputStream, 32, code, "max_display_mastering_luminance" ); sei.values.maxLuminance = code; - sei_read_code( pDecodedMessageOutputStream, 32, code, "min_display_mastering_luminance" ); sei.values.minLuminance = code; -} - -#if U0033_ALTERNATIVE_TRANSFER_CHARACTERISTICS_SEI -void SEIReader::xParseSEIAlternativeTransferCharacteristics(SEIAlternativeTransferCharacteristics& sei, uint32_t payloadSize, std::ostream* pDecodedMessageOutputStream) +void SEIReader::xParseSEISampleAspectRatioInfo(SEISampleAspectRatioInfo& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream) { - uint32_t code; output_sei_message_header(sei, pDecodedMessageOutputStream, payloadSize); + uint32_t val; - sei_read_code(pDecodedMessageOutputStream, 8, code, "preferred_transfer_characteristics"); sei.m_preferredTransferCharacteristics = code; + sei_read_flag( pDecodedMessageOutputStream, val, "sari_cancel_flag" ); sei.m_sariCancelFlag = val; + if (!sei.m_sariCancelFlag) + { + sei_read_flag( pDecodedMessageOutputStream, val, "sari_persistence_flag" ); sei.m_sariPersistenceFlag = val; + sei_read_code( pDecodedMessageOutputStream, 8, val, "sari_aspect_ratio_idc" ); sei.m_sariAspectRatioIdc = val; + if (sei.m_sariAspectRatioIdc == 255) + { + sei_read_code( pDecodedMessageOutputStream, 16, val, "sari_sar_width" ); sei.m_sariSarWidth = val; + sei_read_code( pDecodedMessageOutputStream, 16, val, "sari_sar_height" ); sei.m_sariSarHeight = val; + } + } } -#endif -void SEIReader::xParseSEIGreenMetadataInfo(SEIGreenMetadataInfo& sei, uint32_t payloadSize, std::ostream* pDecodedMessageOutputStream) -{ - uint32_t code; - output_sei_message_header(sei, pDecodedMessageOutputStream, payloadSize); - - sei_read_code(pDecodedMessageOutputStream, 8, code, "green_metadata_type"); - sei.m_greenMetadataType = code; - sei_read_code(pDecodedMessageOutputStream, 8, code, "xsd_metric_type"); - sei.m_xsdMetricType = code; - - sei_read_code(pDecodedMessageOutputStream, 16, code, "xsd_metric_value"); - sei.m_xsdMetricValue = code; -} //! \} diff --git a/source/Lib/DecoderLib/SEIread.h b/source/Lib/DecoderLib/SEIread.h index 72892a7e6cd285a9dbb5b1f029fd291da81790a8..50988f70cfb87855d24968517629bfbe2b155f5e 100644 --- a/source/Lib/DecoderLib/SEIread.h +++ b/source/Lib/DecoderLib/SEIread.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -55,39 +55,36 @@ class SEIReader: public VLCReader public: SEIReader() {}; virtual ~SEIReader() {}; - void parseSEImessage(InputBitstream* bs, SEIMessages& seis, const NalUnitType nalUnitType, const SPS *sps, std::ostream *pDecodedMessageOutputStream); + void parseSEImessage(InputBitstream* bs, SEIMessages& seis, const NalUnitType nalUnitType, const uint32_t temporalId, const SPS *sps, HRD &hrd, std::ostream *pDecodedMessageOutputStream); protected: - void xReadSEImessage (SEIMessages& seis, const NalUnitType nalUnitType, const SPS *sps, std::ostream *pDecodedMessageOutputStream); + void xReadSEImessage (SEIMessages& seis, const NalUnitType nalUnitType, const uint32_t temporalId, const SPS *sps, HRD &hrd, std::ostream *pDecodedMessageOutputStream); void xParseSEIuserDataUnregistered (SEIuserDataUnregistered &sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream); - void xParseSEIActiveParameterSets (SEIActiveParameterSets &sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream); - void xParseSEIDecodingUnitInfo (SEIDecodingUnitInfo& sei, uint32_t payloadSize, const SPS *sps, std::ostream *pDecodedMessageOutputStream); + void xParseSEIDecodingUnitInfo (SEIDecodingUnitInfo& sei, uint32_t payloadSize, const SEIBufferingPeriod& bp, const uint32_t temporalId, std::ostream *pDecodedMessageOutputStream); void xParseSEIDecodedPictureHash (SEIDecodedPictureHash& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream); - void xParseSEIBufferingPeriod (SEIBufferingPeriod& sei, uint32_t payloadSize, const SPS *sps, std::ostream *pDecodedMessageOutputStream); - void xParseSEIPictureTiming (SEIPictureTiming& sei, uint32_t payloadSize, const SPS *sps, std::ostream *pDecodedMessageOutputStream); - void xParseSEIRecoveryPoint (SEIRecoveryPoint& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream); + void xParseSEIBufferingPeriod (SEIBufferingPeriod& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream); + void xParseSEIPictureTiming (SEIPictureTiming& sei, uint32_t payloadSize, const uint32_t temporalId, const SEIBufferingPeriod& bp, std::ostream *pDecodedMessageOutputStream); + void xParseSEIFrameFieldinfo (SEIFrameFieldInfo& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream); + void xParseSEIDependentRAPIndication (SEIDependentRAPIndication& sei, uint32_t payLoadSize, std::ostream *pDecodedMessageOutputStream); void xParseSEIFramePacking (SEIFramePacking& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream); - void xParseSEISegmentedRectFramePacking (SEISegmentedRectFramePacking& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream); - void xParseSEIDisplayOrientation (SEIDisplayOrientation &sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream); - void xParseSEITemporalLevel0Index (SEITemporalLevel0Index &sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream); - void xParseSEIRegionRefreshInfo (SEIGradualDecodingRefreshInfo &sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream); - void xParseSEINoDisplay (SEINoDisplay &sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream); - void xParseSEIToneMappingInfo (SEIToneMappingInfo& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream); - void xParseSEISOPDescription (SEISOPDescription &sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream); - void xParseSEIScalableNesting (SEIScalableNesting& sei, const NalUnitType nalUnitType, uint32_t payloadSize, const SPS *sps, std::ostream *pDecodedMessageOutputStream); -#if HEVC_TILES_WPP - void xParseSEITempMotionConstraintsTileSets (SEITempMotionConstrainedTileSets& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream); -#endif - void xParseSEITimeCode (SEITimeCode& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream); - void xParseSEIChromaResamplingFilterHint (SEIChromaResamplingFilterHint& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream); - void xParseSEIKneeFunctionInfo (SEIKneeFunctionInfo& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream); void xParseSEIMasteringDisplayColourVolume (SEIMasteringDisplayColourVolume& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream); - void xParseSEIColourRemappingInfo (SEIColourRemappingInfo& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream); #if U0033_ALTERNATIVE_TRANSFER_CHARACTERISTICS_SEI void xParseSEIAlternativeTransferCharacteristics(SEIAlternativeTransferCharacteristics& sei, uint32_t payLoadSize, std::ostream *pDecodedMessageOutputStream); #endif - void xParseSEIGreenMetadataInfo (SEIGreenMetadataInfo& sei, uint32_t payLoadSize, std::ostream *pDecodedMessageOutputStream); + void xParseSEIEquirectangularProjection (SEIEquirectangularProjection &sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream); + void xParseSEISphereRotation (SEISphereRotation &sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream); + void xParseSEIOmniViewport (SEIOmniViewport& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream); + void xParseSEIRegionWisePacking (SEIRegionWisePacking& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream); + void xParseSEIGeneralizedCubemapProjection (SEIGeneralizedCubemapProjection &sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream); + void xParseSEISubpictureLevelInfo (SEISubpicureLevelInfo& sei, const SPS *sps, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream); + void xParseSEISampleAspectRatioInfo (SEISampleAspectRatioInfo& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream); + void xParseSEIUserDataRegistered (SEIUserDataRegistered& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream); + void xParseSEIFilmGrainCharacteristics (SEIFilmGrainCharacteristics& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream); + void xParseSEIContentLightLevelInfo (SEIContentLightLevelInfo& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream); + void xParseSEIAmbientViewingEnvironment (SEIAmbientViewingEnvironment& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream); + void xParseSEIContentColourVolume (SEIContentColourVolume& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream); + void sei_read_scode(std::ostream *pOS, uint32_t length, int& code, const char *pSymbolName); void sei_read_code(std::ostream *pOS, uint32_t uiLength, uint32_t& ruiCode, const char *pSymbolName); void sei_read_uvlc(std::ostream *pOS, uint32_t& ruiCode, const char *pSymbolName); void sei_read_svlc(std::ostream *pOS, int& ruiCode, const char *pSymbolName); diff --git a/source/Lib/DecoderLib/VLCReader.cpp b/source/Lib/DecoderLib/VLCReader.cpp index 896ca3944445b54420348ac33381dbe80de66f1f..19bca72393d5cc00328ea8c9ff6faf64e2352dc0 100644 --- a/source/Lib/DecoderLib/VLCReader.cpp +++ b/source/Lib/DecoderLib/VLCReader.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * -* Copyright (c) 2010-2019, ITU/ISO/IEC +* Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -47,7 +47,6 @@ #endif #include "CommonLib/AdaptiveLoopFilter.h" - #if ENABLE_TRACING void VLCReader::xReadCodeTr(uint32_t length, uint32_t& rValue, const char *pSymbolName) @@ -104,6 +103,31 @@ void xTraceFillerData () #endif +#if RExt__DECODER_DEBUG_BIT_STATISTICS || ENABLE_TRACING +void VLCReader::xReadSCode (uint32_t length, int& value, const char *pSymbolName) +#else +void VLCReader::xReadSCode (uint32_t length, int& value) +#endif +{ + uint32_t val; + assert ( length > 0 && length<=32); + m_pcBitstream->read (length, val); + value= length>=32 ? int(val) : ( (-int( val & (uint32_t(1)<<(length-1)))) | int(val) ); + +#if RExt__DECODER_DEBUG_BIT_STATISTICS + CodingStatistics::IncrementStatisticEP(pSymbolName, length, value); +#endif +#if ENABLE_TRACING + if (length < 10) + { + DTRACE( g_trace_ctx, D_HEADER, "%-50s i(%d) : %d\n", pSymbolName, length, value ); + } + else + { + DTRACE( g_trace_ctx, D_HEADER, "%-50s i(%d) : %d\n", pSymbolName, length, value ); + } +#endif +} // ==================================================================================================================== // Protected member functions @@ -114,7 +138,7 @@ void VLCReader::xReadCode (uint32_t uiLength, uint32_t& ruiCode, const char *pSy void VLCReader::xReadCode (uint32_t uiLength, uint32_t& ruiCode) #endif { - CHECK( uiLength == 0, "Reading a code of lenght '0'" ); + CHECK( uiLength == 0, "Reading a code of length '0'" ); m_pcBitstream->read (uiLength, ruiCode); #if RExt__DECODER_DEBUG_BIT_STATISTICS CodingStatistics::IncrementStatisticEP(pSymbolName, uiLength, ruiCode); @@ -270,106 +294,118 @@ HLSyntaxReader::~HLSyntaxReader() // Public member functions // ==================================================================================================================== -void HLSyntaxReader::parseShortTermRefPicSet( SPS* sps, ReferencePictureSet* rps, int idx ) +void HLSyntaxReader::copyRefPicList(SPS* sps, ReferencePictureList* source_rpl, ReferencePictureList* dest_rp) { - uint32_t code; - uint32_t interRPSPred; - if (idx > 0) + dest_rp->setNumberOfShorttermPictures(source_rpl->getNumberOfShorttermPictures()); + + dest_rp->setNumberOfInterLayerPictures( sps->getInterLayerPresentFlag() ? dest_rp->getNumberOfInterLayerPictures() : 0 ); + + if( sps->getLongTermRefsPresent() ) { - READ_FLAG(interRPSPred, "inter_ref_pic_set_prediction_flag"); rps->setInterRPSPrediction(interRPSPred); + dest_rp->setNumberOfLongtermPictures( dest_rp->getNumberOfLongtermPictures() ); } else + dest_rp->setNumberOfLongtermPictures(0); + + uint32_t numRefPic = dest_rp->getNumberOfShorttermPictures() + dest_rp->getNumberOfLongtermPictures(); + + for( int ii = 0; ii < numRefPic; ii++ ) + { + dest_rp->setRefPicIdentifier( ii, source_rpl->getRefPicIdentifier( ii ), source_rpl->isRefPicLongterm( ii ), source_rpl->isInterLayerRefPic( ii ), source_rpl->getInterLayerRefPicIdx( ii ) ); + } +} + +void HLSyntaxReader::parseRefPicList(SPS* sps, ReferencePictureList* rpl) +{ + uint32_t code; + READ_UVLC(code, "num_ref_entries[ listIdx ][ rplsIdx ]"); + uint32_t numRefPic = code; + uint32_t numStrp = 0; + uint32_t numLtrp = 0; + uint32_t numIlrp = 0; + + if (sps->getLongTermRefsPresent()) { - interRPSPred = false; - rps->setInterRPSPrediction(false); + READ_FLAG(code, "ltrp_in_slice_header_flag[ listIdx ][ rplsIdx ]"); + rpl->setLtrpInSliceHeaderFlag(code); } - if (interRPSPred) + bool isLongTerm; + int prevDelta = MAX_INT; + int deltaValue = 0; + bool firstSTRP = true; + + rpl->setInterLayerPresentFlag( sps->getInterLayerPresentFlag() ); + + for (int ii = 0; ii < numRefPic; ii++) { - uint32_t bit; - if(idx == sps->getRPSList()->getNumberOfReferencePictureSets()) + uint32_t isInterLayerRefPic = 0; + + if( rpl->getInterLayerPresentFlag() ) { - READ_UVLC(code, "delta_idx_minus1" ); // delta index of the Reference Picture Set used for prediction minus 1 + READ_FLAG( isInterLayerRefPic, "inter_layer_ref_pic_flag[ listIdx ][ rplsIdx ][ i ]" ); + + if( isInterLayerRefPic ) + { + READ_UVLC( code, "ilrp_idx[ listIdx ][ rplsIdx ][ i ]" ); + rpl->setRefPicIdentifier( ii, 0, true, true, code ); + numIlrp++; + } } - else + + if( !isInterLayerRefPic ) { - code = 0; + isLongTerm = false; + if (sps->getLongTermRefsPresent()) + { + READ_FLAG(code, "st_ref_pic_flag[ listIdx ][ rplsIdx ][ i ]"); + isLongTerm = (code == 1) ? false : true; } - rps->setDeltaRIdxMinus1(code); // th we need that for proper transcoding - CHECK(code > idx-1, "Code exceeds boundary"); // delta_idx_minus1 shall not be larger than idx-1, otherwise we will predict from a negative row position that does not exist. When idx equals 0 there is no legal value and interRPSPred must be zero. See J0185-r2 - int rIdx = idx - 1 - code; - CHECK(rIdx > idx-1 || rIdx < 0, "Invalid index"); // Made assert tighter; if rIdx = idx then prediction is done from itself. rIdx must belong to range 0, idx-1, inclusive, see J0185-r2 - ReferencePictureSet* rpsRef = sps->getRPSList()->getReferencePictureSet(rIdx); - int k = 0, k0 = 0, k1 = 0; - READ_CODE(1, bit, "delta_rps_sign"); // delta_RPS_sign - READ_UVLC(code, "abs_delta_rps_minus1"); // absolute delta RPS minus 1 - int deltaRPS = (1 - 2 * bit) * (code + 1); // delta_RPS - - rps->setDeltaRPS( deltaRPS ); // th we need that for proper transcoding + else + isLongTerm = false; - for(int j=0 ; j <= rpsRef->getNumberOfPictures(); j++) + if (!isLongTerm) { - READ_CODE(1, bit, "used_by_curr_pic_flag" ); //first bit is "1" if Idc is 1 - int refIdc = bit; - if (refIdc == 0) + READ_UVLC(code, "abs_delta_poc_st[ listIdx ][ rplsIdx ][ i ]"); + if( !sps->getUseWP() && !sps->getUseWPBiPred() ) { - READ_CODE(1, bit, "use_delta_flag" ); //second bit is "1" if Idc is 2, "0" otherwise. - refIdc = bit<<1; //second bit is "1" if refIdc is 2, "0" if refIdc = 0. + code++; } - if (refIdc == 1 || refIdc == 2) + int readValue = code; + if (readValue > 0) + READ_FLAG(code, "strp_entry_sign_flag[ listIdx ][ rplsIdx ][ i ]"); + else + code = 1; + readValue = (code) ? readValue : 0 - readValue; //true means positive delta POC -- false otherwise + if (firstSTRP) { - int deltaPOC = deltaRPS + ((j < rpsRef->getNumberOfPictures())? rpsRef->getDeltaPOC(j) : 0); - rps->setDeltaPOC(k, deltaPOC); - rps->setUsed(k, (refIdc == 1)); - - if (deltaPOC < 0) - { - k0++; - } - else - { - k1++; - } - k++; + firstSTRP = false; + prevDelta = deltaValue = readValue; + } + else + { + deltaValue = prevDelta + readValue; + prevDelta = deltaValue; } - rps->setRefIdc(j,refIdc); + + rpl->setRefPicIdentifier( ii, deltaValue, isLongTerm, false, 0 ); + numStrp++; } - rps->setNumRefIdc(rpsRef->getNumberOfPictures()+1); - rps->setNumberOfPictures(k); - rps->setNumberOfNegativePictures(k0); - rps->setNumberOfPositivePictures(k1); - rps->sortDeltaPOC(); - } - else - { - READ_UVLC(code, "num_negative_pics"); rps->setNumberOfNegativePictures(code); - READ_UVLC(code, "num_positive_pics"); rps->setNumberOfPositivePictures(code); - int prev = 0; - int poc; - for(int j=0 ; j < rps->getNumberOfNegativePictures(); j++) + else { - READ_UVLC(code, "delta_poc_s0_minus1"); - poc = prev-code-1; - prev = poc; - rps->setDeltaPOC(j,poc); - READ_FLAG(code, "used_by_curr_pic_s0_flag"); rps->setUsed(j,code); + if (!rpl->getLtrpInSliceHeaderFlag()) + READ_CODE(sps->getBitsForPOC(), code, "poc_lsb_lt[listIdx][rplsIdx][j]"); + rpl->setRefPicIdentifier( ii, code, isLongTerm, false, 0 ); + numLtrp++; } - prev = 0; - for(int j=rps->getNumberOfNegativePictures(); j < rps->getNumberOfNegativePictures()+rps->getNumberOfPositivePictures(); j++) - { - READ_UVLC(code, "delta_poc_s1_minus1"); - poc = prev+code+1; - prev = poc; - rps->setDeltaPOC(j,poc); - READ_FLAG(code, "used_by_curr_pic_s1_flag"); rps->setUsed(j,code); } - rps->setNumberOfPictures(rps->getNumberOfNegativePictures()+rps->getNumberOfPositivePictures()); } - - rps->printDeltaPOC(); + rpl->setNumberOfShorttermPictures(numStrp); + rpl->setNumberOfLongtermPictures(numLtrp); + rpl->setNumberOfInterLayerPictures( numIlrp ); } -void HLSyntaxReader::parsePPS( PPS* pcPPS ) +void HLSyntaxReader::parsePPS( PPS* pcPPS, ParameterSetManager *parameterSetManager ) { #if ENABLE_TRACING xTracePPSHeader (); @@ -382,19 +418,167 @@ void HLSyntaxReader::parsePPS( PPS* pcPPS ) CHECK(uiCode > 63, "PPS id exceeds boundary (63)"); pcPPS->setPPSId (uiCode); - READ_UVLC( uiCode, "pps_seq_parameter_set_id"); - CHECK(uiCode > 15, "SPS id exceeds boundary (15)"); + READ_CODE(4, uiCode, "pps_seq_parameter_set_id"); pcPPS->setSPSId (uiCode); -#if HEVC_DEPENDENT_SLICES - READ_FLAG( uiCode, "dependent_slice_segments_enabled_flag" ); pcPPS->setDependentSliceSegmentsEnabledFlag ( uiCode == 1 ); -#endif + READ_UVLC( uiCode, "pic_width_in_luma_samples" ); pcPPS->setPicWidthInLumaSamples( uiCode ); + READ_UVLC( uiCode, "pic_height_in_luma_samples" ); pcPPS->setPicHeightInLumaSamples( uiCode ); + + READ_FLAG( uiCode, "conformance_window_flag" ); + if( uiCode != 0 ) + { + Window &conf = pcPPS->getConformanceWindow(); + READ_UVLC( uiCode, "conf_win_left_offset" ); conf.setWindowLeftOffset( uiCode ); + READ_UVLC( uiCode, "conf_win_right_offset" ); conf.setWindowRightOffset( uiCode ); + READ_UVLC( uiCode, "conf_win_top_offset" ); conf.setWindowTopOffset( uiCode ); + READ_UVLC( uiCode, "conf_win_bottom_offset" ); conf.setWindowBottomOffset( uiCode ); + } + + READ_FLAG( uiCode, "scaling_window_flag" ); + if( uiCode != 0 ) + { + Window &scalingWindow = pcPPS->getScalingWindow(); + READ_UVLC( uiCode, "scaling_win_left_offset" ); scalingWindow.setWindowLeftOffset( uiCode ); + READ_UVLC( uiCode, "scaling_win_right_offset" ); scalingWindow.setWindowRightOffset( uiCode ); + READ_UVLC( uiCode, "scaling_win_top_offset" ); scalingWindow.setWindowTopOffset( uiCode ); + READ_UVLC( uiCode, "scaling_win_bottom_offset" ); scalingWindow.setWindowBottomOffset( uiCode ); + } READ_FLAG( uiCode, "output_flag_present_flag" ); pcPPS->setOutputFlagPresentFlag( uiCode==1 ); - READ_CODE(3, uiCode, "num_extra_slice_header_bits"); pcPPS->setNumExtraSliceHeaderBits(uiCode); + READ_FLAG(uiCode, "pps_subpic_id_signalling_present_flag"); pcPPS->setSubPicIdSignallingPresentFlag( uiCode != 0 ); + if( pcPPS->getSubPicIdSignallingPresentFlag() ) + { + READ_UVLC( uiCode, "pps_num_subpics_minus1" ); pcPPS->setNumSubPics( uiCode + 1 ); + CHECK( uiCode > MAX_NUM_SUB_PICS-1, "Number of sub-pictures exceeds limit"); + + READ_UVLC( uiCode, "pps_subpic_id_len_minus1" ); pcPPS->setSubPicIdLen( uiCode + 1 ); + CHECK( uiCode > 15, "Invalid pps_subpic_id_len_minus1 signalled"); + + for( int picIdx = 0; picIdx < pcPPS->getNumSubPics( ); picIdx++ ) + { + READ_CODE( pcPPS->getSubPicIdLen( ), uiCode, "pps_subpic_id[i]" ); pcPPS->setSubPicId( picIdx, uiCode ); + } + } + else + { + for( int picIdx = 0; picIdx < MAX_NUM_SUB_PICS; picIdx++ ) + { + pcPPS->setSubPicId( picIdx, picIdx ); + } + } + + + READ_FLAG( uiCode, "no_pic_partition_flag" ); pcPPS->setNoPicPartitionFlag( uiCode == 1 ); + if(!pcPPS->getNoPicPartitionFlag()) + { + int colIdx, rowIdx; + pcPPS->resetTileSliceInfo(); + + // CTU size - required to match size in SPS + READ_CODE(2, uiCode, "pps_log2_ctu_size_minus5"); pcPPS->setLog2CtuSize(uiCode + 5); + CHECK(uiCode > 2, "pps_log2_ctu_size_minus5 must be less than or equal to 2"); + + // number of explicit tile columns/rows + READ_UVLC( uiCode, "num_exp_tile_columns_minus1" ); pcPPS->setNumExpTileColumns( uiCode + 1 ); + READ_UVLC( uiCode, "num_exp_tile_rows_minus1" ); pcPPS->setNumExpTileRows( uiCode + 1 ); + CHECK(pcPPS->getNumExpTileColumns() > MAX_TILE_COLS, "Number of explicit tile columns exceeds valid range"); + CHECK(pcPPS->getNumExpTileRows() > MAX_TILE_ROWS, "Number of explicit tile rows exceeds valid range"); + + // tile sizes + for( colIdx = 0; colIdx < pcPPS->getNumExpTileColumns(); colIdx++ ) + { + READ_UVLC( uiCode, "tile_column_width_minus1[i]" ); pcPPS->addTileColumnWidth( uiCode + 1 ); + } + for( rowIdx = 0; rowIdx < pcPPS->getNumExpTileRows(); rowIdx++ ) + { + READ_UVLC( uiCode, "tile_row_height_minus1[i]" ); pcPPS->addTileRowHeight( uiCode + 1 ); + } + pcPPS->initTiles(); + + // rectangular slice signalling + READ_CODE(1, uiCode, "rect_slice_flag"); pcPPS->setRectSliceFlag( uiCode == 1 ); + if (pcPPS->getRectSliceFlag()) + { + READ_FLAG(uiCode, "single_slice_per_subpic_flag"); pcPPS->setSingleSlicePerSubPicFlag(uiCode == 1); + } + if (pcPPS->getRectSliceFlag() & !(pcPPS->getSingleSlicePerSubPicFlag())) + { + int32_t tileIdx = 0; + + READ_UVLC( uiCode, "num_slices_in_pic_minus1" ); pcPPS->setNumSlicesInPic( uiCode + 1 ); + CHECK(pcPPS->getNumSlicesInPic() > MAX_SLICES, "Number of slices in picture exceeds valid range"); + READ_CODE(1, uiCode, "tile_idx_delta_present_flag"); pcPPS->setTileIdxDeltaPresentFlag( uiCode == 1 ); + pcPPS->initRectSlices(); + + // read rectangular slice parameters + for( int i = 0; i < pcPPS->getNumSlicesInPic()-1; i++ ) + { + pcPPS->setSliceTileIdx( i, tileIdx ); + + // complete tiles within a single slice + READ_UVLC( uiCode, "slice_width_in_tiles_minus1[i]" ); pcPPS->setSliceWidthInTiles ( i, uiCode + 1 ); +#if JVET_Q0480_RASTER_RECT_SLICES + if( pcPPS->getTileIdxDeltaPresentFlag() || ( (tileIdx % pcPPS->getNumTileColumns()) == 0 ) ) + { + READ_UVLC( uiCode, "slice_height_in_tiles_minus1[i]" ); pcPPS->setSliceHeightInTiles( i, uiCode + 1 ); + } + else + { + pcPPS->setSliceHeightInTiles( i, pcPPS->getSliceHeightInTiles(i-1) ); + } +#else + READ_UVLC( uiCode, "slice_height_in_tiles_minus1[i]" ); pcPPS->setSliceHeightInTiles( i, uiCode + 1 ); +#endif + + // multiple slices within a single tile special case + if( pcPPS->getSliceWidthInTiles( i ) == 1 && pcPPS->getSliceHeightInTiles( i ) == 1 ) + { + READ_UVLC( uiCode, "num_slices_in_tile_minus1[i]" ); pcPPS->setNumSlicesInTile( i, uiCode + 1 ); + uint32_t numSlicesInTile = pcPPS->getNumSlicesInTile( i ); + for( int j = 0; j < numSlicesInTile-1; j++ ) + { + READ_UVLC( uiCode, "slice_height_in_ctu_minus1[i]" ); pcPPS->setSliceHeightInCtu( i, uiCode + 1 ); + i++; + pcPPS->setSliceWidthInTiles ( i, 1 ); + pcPPS->setSliceHeightInTiles( i, 1 ); + pcPPS->setNumSlicesInTile ( i, numSlicesInTile ); + pcPPS->setSliceTileIdx ( i, tileIdx ); + } + } + + // tile index offset to start of next slice + if( i < pcPPS->getNumSlicesInPic()-1 ) + { + if( pcPPS->getTileIdxDeltaPresentFlag() ) + { + int32_t tileIdxDelta; + READ_SVLC( tileIdxDelta, "tile_idx_delta[i]" ); + tileIdx += tileIdxDelta; + CHECK( tileIdx < 0 || tileIdx >= pcPPS->getNumTiles(), "Invalid tile_idx_delta."); + } + else + { + tileIdx += pcPPS->getSliceWidthInTiles( i ); + if( tileIdx % pcPPS->getNumTileColumns() == 0) + { + tileIdx += (pcPPS->getSliceHeightInTiles( i ) - 1) * pcPPS->getNumTileColumns(); + } + } + } + } + pcPPS->setSliceTileIdx(pcPPS->getNumSlicesInPic()-1, tileIdx ); + + // initialize mapping between rectangular slices and CTUs + pcPPS->initRectSliceMap(); + } + // loop filtering across slice/tile controls + READ_CODE(1, uiCode, "loop_filter_across_tiles_enabled_flag"); pcPPS->setLoopFilterAcrossTilesEnabledFlag( uiCode == 1 ); + READ_CODE(1, uiCode, "loop_filter_across_slices_enabled_flag"); pcPPS->setLoopFilterAcrossSlicesEnabledFlag( uiCode == 1 ); + } + READ_FLAG(uiCode, "entropy_coding_sync_enabled_flag"); pcPPS->setEntropyCodingSyncEnabledFlag(uiCode == 1); READ_FLAG( uiCode, "cabac_init_present_flag" ); pcPPS->setCabacInitPresentFlag( uiCode ? true : false ); READ_UVLC(uiCode, "num_ref_idx_l0_default_active_minus1"); @@ -405,21 +589,15 @@ void HLSyntaxReader::parsePPS( PPS* pcPPS ) CHECK(uiCode > 14, "Invalid code read"); pcPPS->setNumRefIdxL1DefaultActive(uiCode+1); + READ_FLAG(uiCode, "rpl1_idx_present_flag"); + pcPPS->setRpl1IdxPresentFlag(uiCode); + + READ_SVLC(iCode, "init_qp_minus26" ); pcPPS->setPicInitQPMinus26(iCode); - READ_FLAG( uiCode, "constrained_intra_pred_flag" ); pcPPS->setConstrainedIntraPred( uiCode ? true : false ); - READ_FLAG( uiCode, "transform_skip_enabled_flag" ); - pcPPS->setUseTransformSkip ( uiCode ? true : false ); + READ_UVLC(uiCode, "log2_transform_skip_max_size_minus2"); + pcPPS->setLog2MaxTransformSkipBlockSize(uiCode + 2); READ_FLAG( uiCode, "cu_qp_delta_enabled_flag" ); pcPPS->setUseDQP( uiCode ? true : false ); - if( pcPPS->getUseDQP() ) - { - READ_UVLC( uiCode, "cu_qp_delta_subdiv" ); - pcPPS->setCuQpDeltaSubdiv( uiCode ); - } - else - { - pcPPS->setCuQpDeltaSubdiv( 0 ); - } READ_SVLC( iCode, "pps_cb_qp_offset"); pcPPS->setQpOffset(COMPONENT_Cb, iCode); CHECK( pcPPS->getQpOffset(COMPONENT_Cb) < -12, "Invalid Cb QP offset" ); @@ -430,62 +608,67 @@ void HLSyntaxReader::parsePPS( PPS* pcPPS ) CHECK( pcPPS->getQpOffset(COMPONENT_Cr) < -12, "Invalid Cr QP offset" ); CHECK( pcPPS->getQpOffset(COMPONENT_Cr) > 12, "Invalid Cr QP offset" ); + READ_FLAG(uiCode, "pps_joint_cbcr_qp_offset_present_flag"); + pcPPS->setJointCbCrQpOffsetPresentFlag(uiCode ? true : false); + + if (pcPPS->getJointCbCrQpOffsetPresentFlag()) + { + READ_SVLC(iCode, "pps_joint_cbcr_qp_offset_value"); + } + else + { + iCode = 0; + } + pcPPS->setQpOffset(JOINT_CbCr, iCode); + + CHECK( pcPPS->getQpOffset(JOINT_CbCr) < -12, "Invalid CbCr QP offset" ); + CHECK( pcPPS->getQpOffset(JOINT_CbCr) > 12, "Invalid CbCr QP offset" ); + CHECK(MAX_NUM_COMPONENT>3, "Invalid maximal number of components"); READ_FLAG( uiCode, "pps_slice_chroma_qp_offsets_present_flag" ); pcPPS->setSliceChromaQpFlag( uiCode ? true : false ); - READ_FLAG( uiCode, "weighted_pred_flag" ); // Use of Weighting Prediction (P_SLICE) - pcPPS->setUseWP( uiCode==1 ); - READ_FLAG( uiCode, "weighted_bipred_flag" ); // Use of Bi-Directional Weighting Prediction (B_SLICE) - pcPPS->setWPBiPred( uiCode==1 ); - - READ_FLAG( uiCode, "transquant_bypass_enabled_flag"); - pcPPS->setTransquantBypassEnabledFlag(uiCode ? true : false); -#if HEVC_TILES_WPP - READ_FLAG( uiCode, "tiles_enabled_flag" ); pcPPS->setTilesEnabledFlag( uiCode == 1 ); -#endif -#if HEVC_TILES_WPP - READ_FLAG( uiCode, "entropy_coding_sync_enabled_flag" ); pcPPS->setEntropyCodingSyncEnabledFlag( uiCode == 1 ); - - if( pcPPS->getTilesEnabledFlag() ) + READ_FLAG( uiCode, "pps_cu_chroma_qp_offset_enabled_flag"); + if (uiCode == 0) { - READ_UVLC ( uiCode, "num_tile_columns_minus1" ); pcPPS->setNumTileColumnsMinus1( uiCode ); - READ_UVLC ( uiCode, "num_tile_rows_minus1" ); pcPPS->setNumTileRowsMinus1( uiCode ); - READ_FLAG ( uiCode, "uniform_spacing_flag" ); pcPPS->setTileUniformSpacingFlag( uiCode == 1 ); - - const uint32_t tileColumnsMinus1 = pcPPS->getNumTileColumnsMinus1(); - const uint32_t tileRowsMinus1 = pcPPS->getNumTileRowsMinus1(); + pcPPS->clearChromaQpOffsetList(); + } + else + { + uint32_t tableSizeMinus1 = 0; + READ_UVLC(tableSizeMinus1, "chroma_qp_offset_list_len_minus1"); + CHECK(tableSizeMinus1 >= MAX_QP_OFFSET_LIST_SIZE, "Table size exceeds maximum"); - if ( !pcPPS->getTileUniformSpacingFlag()) + for (int cuChromaQpOffsetIdx = 0; cuChromaQpOffsetIdx <= (tableSizeMinus1); cuChromaQpOffsetIdx++) { - if (tileColumnsMinus1 > 0) + int cbOffset; + int crOffset; + int jointCbCrOffset; + READ_SVLC(cbOffset, "cb_qp_offset_list[i]"); + CHECK(cbOffset < -12 || cbOffset > 12, "Invalid chroma QP offset"); + READ_SVLC(crOffset, "cr_qp_offset_list[i]"); + CHECK(crOffset < -12 || crOffset > 12, "Invalid chroma QP offset"); + if (pcPPS->getJointCbCrQpOffsetPresentFlag()) { - std::vector<int> columnWidth(tileColumnsMinus1); - for(uint32_t i = 0; i < tileColumnsMinus1; i++) - { - READ_UVLC( uiCode, "column_width_minus1" ); - columnWidth[i] = uiCode+1; - } - pcPPS->setTileColumnWidth(columnWidth); + READ_SVLC(jointCbCrOffset, "joint_cbcr_qp_offset_list[i]"); } - - if (tileRowsMinus1 > 0) + else { - std::vector<int> rowHeight (tileRowsMinus1); - for(uint32_t i = 0; i < tileRowsMinus1; i++) - { - READ_UVLC( uiCode, "row_height_minus1" ); - rowHeight[i] = uiCode + 1; - } - pcPPS->setTileRowHeight(rowHeight); + jointCbCrOffset = 0; } + CHECK(jointCbCrOffset < -12 || jointCbCrOffset > 12, "Invalid chroma QP offset"); + // table uses +1 for index (see comment inside the function) + pcPPS->setChromaQpOffsetListEntry(cuChromaQpOffsetIdx + 1, cbOffset, crOffset, jointCbCrOffset); } - CHECK((tileColumnsMinus1 + tileRowsMinus1) == 0, "Invalid tile configuration"); - READ_FLAG ( uiCode, "loop_filter_across_tiles_enabled_flag" ); pcPPS->setLoopFilterAcrossTilesEnabledFlag( uiCode ? true : false ); + CHECK(pcPPS->getChromaQpOffsetListLen() != tableSizeMinus1 + 1, "Invalid chroma QP offset list length"); } -#endif - READ_FLAG( uiCode, "pps_loop_filter_across_slices_enabled_flag" ); pcPPS->setLoopFilterAcrossSlicesEnabledFlag( uiCode ? true : false ); + + READ_FLAG( uiCode, "weighted_pred_flag" ); // Use of Weighting Prediction (P_SLICE) + pcPPS->setUseWP( uiCode==1 ); + READ_FLAG( uiCode, "weighted_bipred_flag" ); // Use of Bi-Directional Weighting Prediction (B_SLICE) + pcPPS->setWPBiPred( uiCode==1 ); + READ_FLAG( uiCode, "deblocking_filter_control_present_flag" ); pcPPS->setDeblockingFilterControlPresentFlag( uiCode ? true : false ); if(pcPPS->getDeblockingFilterControlPresentFlag()) { @@ -497,21 +680,31 @@ void HLSyntaxReader::parsePPS( PPS* pcPPS ) READ_SVLC ( iCode, "pps_tc_offset_div2" ); pcPPS->setDeblockingFilterTcOffsetDiv2( iCode ); } } -#if HEVC_USE_SCALING_LISTS - READ_FLAG( uiCode, "pps_scaling_list_data_present_flag" ); pcPPS->setScalingListPresentFlag( uiCode ? true : false ); - if(pcPPS->getScalingListPresentFlag ()) + READ_FLAG( uiCode, "constant_slice_header_params_enabled_flag"); pcPPS->setConstantSliceHeaderParamsEnabledFlag(uiCode); + if ( pcPPS->getConstantSliceHeaderParamsEnabledFlag() ) { + READ_CODE( 2, uiCode, "pps_dep_quant_enabled_idc"); pcPPS->setPPSDepQuantEnabledIdc(uiCode); + READ_CODE( 2, uiCode, "pps_ref_pic_list_sps_idc[0]"); pcPPS->setPPSRefPicListSPSIdc0(uiCode); + READ_CODE( 2, uiCode, "pps_ref_pic_list_sps_idc[1]"); pcPPS->setPPSRefPicListSPSIdc1(uiCode); + READ_CODE( 2, uiCode, "pps_mvd_l1_zero_idc"); pcPPS->setPPSMvdL1ZeroIdc(uiCode); + READ_CODE( 2, uiCode, "pps_collocated_from_l0_idc"); pcPPS->setPPSCollocatedFromL0Idc(uiCode); + READ_UVLC( uiCode, "pps_six_minus_max_num_merge_cand_plus1"); pcPPS->setPPSSixMinusMaxNumMergeCandPlus1(uiCode); + READ_UVLC( uiCode, "pps_max_num_merge_cand_minus_max_num_triangle_cand_plus1");pcPPS->setPPSMaxNumMergeCandMinusMaxNumTriangleCandPlus1(uiCode); + } + else { - parseScalingList( &(pcPPS->getScalingList()) ); + pcPPS->setPPSDepQuantEnabledIdc(0); + pcPPS->setPPSRefPicListSPSIdc0(0); + pcPPS->setPPSRefPicListSPSIdc1(0); + pcPPS->setPPSMvdL1ZeroIdc(0); + pcPPS->setPPSCollocatedFromL0Idc(0); + pcPPS->setPPSSixMinusMaxNumMergeCandPlus1(0); + pcPPS->setPPSMaxNumMergeCandMinusMaxNumTriangleCandPlus1(0); } -#endif - READ_FLAG( uiCode, "lists_modification_present_flag"); - pcPPS->setListsModificationPresentFlag(uiCode); - READ_UVLC( uiCode, "log2_parallel_merge_level_minus2"); - pcPPS->setLog2ParallelMergeLevelMinus2 (uiCode); - - READ_FLAG( uiCode, "slice_segment_header_extension_present_flag"); + READ_FLAG( uiCode, "picture_header_extension_present_flag"); + pcPPS->setPictureHeaderExtensionPresentFlag(uiCode); + READ_FLAG( uiCode, "slice_header_extension_present_flag"); pcPPS->setSliceHeaderExtensionPresentFlag(uiCode); @@ -548,42 +741,9 @@ void HLSyntaxReader::parsePPS( PPS* pcPPS ) PPSRExt &ppsRangeExtension = pcPPS->getPpsRangeExtension(); CHECK(bSkipTrailingExtensionBits, "Invalid state"); - if (pcPPS->getUseTransformSkip()) - { - READ_UVLC( uiCode, "log2_max_transform_skip_block_size_minus2"); - ppsRangeExtension.setLog2MaxTransformSkipBlockSize(uiCode+2); - } - READ_FLAG( uiCode, "cross_component_prediction_enabled_flag"); ppsRangeExtension.setCrossComponentPredictionEnabledFlag(uiCode != 0); - READ_FLAG( uiCode, "chroma_qp_offset_list_enabled_flag"); - if (uiCode == 0) - { - ppsRangeExtension.clearChromaQpOffsetList(); - ppsRangeExtension.setCuChromaQpOffsetSubdiv(0); - } - else - { - READ_UVLC(uiCode, "cu_chroma_qp_offset_subdiv"); ppsRangeExtension.setCuChromaQpOffsetSubdiv(uiCode); - uint32_t tableSizeMinus1 = 0; - READ_UVLC(tableSizeMinus1, "chroma_qp_offset_list_len_minus1"); - CHECK(tableSizeMinus1 >= MAX_QP_OFFSET_LIST_SIZE, "Table size exceeds maximum"); - - for (int cuChromaQpOffsetIdx = 0; cuChromaQpOffsetIdx <= (tableSizeMinus1); cuChromaQpOffsetIdx++) - { - int cbOffset; - int crOffset; - READ_SVLC(cbOffset, "cb_qp_offset_list[i]"); - CHECK(cbOffset < -12 || cbOffset > 12, "Invalid chroma QP offset"); - READ_SVLC(crOffset, "cr_qp_offset_list[i]"); - CHECK(crOffset < -12 || crOffset > 12, "Invalid chroma QP offset"); - // table uses +1 for index (see comment inside the function) - ppsRangeExtension.setChromaQpOffsetListEntry(cuChromaQpOffsetIdx+1, cbOffset, crOffset); - } - CHECK(ppsRangeExtension.getChromaQpOffsetListLen() != tableSizeMinus1 + 1, "Invalid chroma QP offset list lenght"); - } - READ_UVLC( uiCode, "log2_sao_offset_scale_luma"); ppsRangeExtension.setLog2SaoOffsetScale(CHANNEL_TYPE_LUMA, uiCode); READ_UVLC( uiCode, "log2_sao_offset_scale_chroma"); @@ -607,7 +767,7 @@ void HLSyntaxReader::parsePPS( PPS* pcPPS ) xReadRbspTrailingBits(); } -void HLSyntaxReader::parseAPS(APS* aps) +void HLSyntaxReader::parseAPS( APS* aps ) { #if ENABLE_TRACING xTraceAPSHeader(); @@ -618,37 +778,134 @@ void HLSyntaxReader::parseAPS(APS* aps) READ_CODE(5, code, "adaptation_parameter_set_id"); aps->setAPSId(code); - AlfSliceParam param = aps->getAlfAPSParam(); - param.enabledFlag[COMPONENT_Y] = true; + READ_CODE(3, code, "aps_params_type"); + aps->setAPSType( ApsType(code) ); + if( code == ALF_APS ) + { + parseAlfAps( aps ); + } + else if( code == LMCS_APS ) + { + parseLmcsAps( aps ); + } + else if( code == SCALING_LIST_APS ) + { + parseScalingListAps( aps ); + } + READ_FLAG(code, "aps_extension_flag"); + if (code) + { + while (xMoreRbspData()) + { + READ_FLAG(code, "aps_extension_data_flag"); + } + } + xReadRbspTrailingBits(); +} + +void HLSyntaxReader::parseAlfAps( APS* aps ) +{ + uint32_t code; - int alfChromaIdc = truncatedUnaryEqProb(3); //alf_chroma_idc - param.enabledFlag[COMPONENT_Cb] = alfChromaIdc >> 1; - param.enabledFlag[COMPONENT_Cr] = alfChromaIdc & 1; + AlfParam param = aps->getAlfAPSParam(); + param.reset(); + param.enabledFlag[COMPONENT_Y] = param.enabledFlag[COMPONENT_Cb] = param.enabledFlag[COMPONENT_Cr] = true; + READ_FLAG(code, "alf_luma_new_filter"); + param.newFilterFlag[CHANNEL_TYPE_LUMA] = code; + READ_FLAG(code, "alf_chroma_new_filter"); + param.newFilterFlag[CHANNEL_TYPE_CHROMA] = code; - xReadTruncBinCode(code, MAX_NUM_ALF_CLASSES); //number_of_filters_minus1 - param.numLumaFilters = code + 1; - if (param.numLumaFilters > 1) + CHECK(param.newFilterFlag[CHANNEL_TYPE_LUMA] == 0 && param.newFilterFlag[CHANNEL_TYPE_CHROMA] == 0, + "bitstream conformance error, alf_luma_filter_signal_flag and alf_chroma_filter_signal_flag shall not equal to zero at the same time"); + + if (param.newFilterFlag[CHANNEL_TYPE_LUMA]) { - for (int i = 0; i < MAX_NUM_ALF_CLASSES; i++) + READ_FLAG(code, "alf_luma_clip"); +#if JVET_Q0249_ALF_CHROMA_CLIPFLAG + param.nonLinearFlag[CHANNEL_TYPE_LUMA] = code ? true : false; +#else + param.nonLinearFlag[CHANNEL_TYPE_LUMA][0] = code ? true : false; +#endif + READ_UVLC(code, "alf_luma_num_filters_signalled_minus1"); + param.numLumaFilters = code + 1; + if (param.numLumaFilters > 1) + { + const int length = ceilLog2(param.numLumaFilters); + for (int i = 0; i < MAX_NUM_ALF_CLASSES; i++) + { + READ_CODE(length, code, "alf_luma_coeff_delta_idx"); + param.filterCoeffDeltaIdx[i] = code; + } + } + else { - xReadTruncBinCode(code, param.numLumaFilters); - param.filterCoeffDeltaIdx[i] = code; + memset(param.filterCoeffDeltaIdx, 0, sizeof(param.filterCoeffDeltaIdx)); } + alfFilter( param, false, 0 ); } - else + if (param.newFilterFlag[CHANNEL_TYPE_CHROMA]) { - memset(param.filterCoeffDeltaIdx, 0, sizeof(param.filterCoeffDeltaIdx)); +#if JVET_Q0249_ALF_CHROMA_CLIPFLAG + READ_FLAG(code, "alf_nonlinear_enable_flag_chroma"); + param.nonLinearFlag[CHANNEL_TYPE_CHROMA] = code ? true : false; +#endif + + if( MAX_NUM_ALF_ALTERNATIVES_CHROMA > 1 ) + READ_UVLC( code, "alf_chroma_num_alts_minus1" ); + else + code = 0; + + param.numAlternativesChroma = code + 1; + + for( int altIdx=0; altIdx < param.numAlternativesChroma; ++altIdx ) + { +#if !JVET_Q0249_ALF_CHROMA_CLIPFLAG + READ_FLAG(code, "alf_nonlinear_enable_flag_chroma"); + param.nonLinearFlag[CHANNEL_TYPE_CHROMA][altIdx] = code ? true : false; +#endif + alfFilter( param, true, altIdx ); + } } + aps->setAlfAPSParam(param); +} - alfFilter(param, false); +void HLSyntaxReader::parseLmcsAps( APS* aps ) +{ + uint32_t code; - if (alfChromaIdc) + SliceReshapeInfo& info = aps->getReshaperAPSInfo(); + memset(info.reshaperModelBinCWDelta, 0, PIC_CODE_CW_BINS * sizeof(int)); + READ_UVLC(code, "lmcs_min_bin_idx"); info.reshaperModelMinBinIdx = code; + READ_UVLC(code, "lmcs_delta_max_bin_idx"); info.reshaperModelMaxBinIdx = PIC_CODE_CW_BINS - 1 - code; + READ_UVLC(code, "lmcs_delta_cw_prec_minus1"); info.maxNbitsNeededDeltaCW = code + 1; + assert(info.maxNbitsNeededDeltaCW > 0); + for (uint32_t i = info.reshaperModelMinBinIdx; i <= info.reshaperModelMaxBinIdx; i++) { - alfFilter(param, true); + READ_CODE(info.maxNbitsNeededDeltaCW, code, "lmcs_delta_abs_cw[ i ]"); + int absCW = code; + if (absCW > 0) + { + READ_CODE(1, code, "lmcs_delta_sign_cw_flag[ i ]"); + } + int signCW = code; + info.reshaperModelBinCWDelta[i] = (1 - 2 * signCW) * absCW; } - aps->setAlfAPSParam(param); + READ_CODE(3, code, "lmcs_delta_abs_crs"); + int absCW = code; + if (absCW > 0) + { + READ_CODE(1, code, "lmcs_delta_sign_crs_flag"); + } + int signCW = code; + info.chrResScalingOffset = (1 - 2 * signCW) * absCW; - xReadRbspTrailingBits(); + aps->setReshaperAPSInfo(info); +} + +void HLSyntaxReader::parseScalingListAps( APS* aps ) +{ + ScalingList& info = aps->getScalingList(); + parseScalingList( &info ); } void HLSyntaxReader::parseVUI(VUI* pcVUI, SPS *pcSPS) @@ -656,212 +913,136 @@ void HLSyntaxReader::parseVUI(VUI* pcVUI, SPS *pcSPS) #if ENABLE_TRACING DTRACE( g_trace_ctx, D_HEADER, "----------- vui_parameters -----------\n"); #endif - uint32_t uiCode; - READ_FLAG( uiCode, "aspect_ratio_info_present_flag"); pcVUI->setAspectRatioInfoPresentFlag(uiCode); + + uint32_t symbol; + + READ_FLAG( symbol, "aspect_ratio_info_present_flag"); pcVUI->setAspectRatioInfoPresentFlag(symbol); if (pcVUI->getAspectRatioInfoPresentFlag()) { - READ_CODE(8, uiCode, "aspect_ratio_idc"); pcVUI->setAspectRatioIdc(uiCode); + READ_FLAG( symbol, "aspect_ratio_constant_flag"); pcVUI->setAspectRatioConstantFlag(symbol); + READ_CODE(8, symbol, "aspect_ratio_idc"); pcVUI->setAspectRatioIdc(symbol); if (pcVUI->getAspectRatioIdc() == 255) { - READ_CODE(16, uiCode, "sar_width"); pcVUI->setSarWidth(uiCode); - READ_CODE(16, uiCode, "sar_height"); pcVUI->setSarHeight(uiCode); + READ_CODE(16, symbol, "sar_width"); pcVUI->setSarWidth(symbol); + READ_CODE(16, symbol, "sar_height"); pcVUI->setSarHeight(symbol); } } - READ_FLAG( uiCode, "overscan_info_present_flag"); pcVUI->setOverscanInfoPresentFlag(uiCode); - if (pcVUI->getOverscanInfoPresentFlag()) + READ_FLAG( symbol, "colour_description_present_flag"); pcVUI->setColourDescriptionPresentFlag(symbol); + if (pcVUI->getColourDescriptionPresentFlag()) { - READ_FLAG( uiCode, "overscan_appropriate_flag"); pcVUI->setOverscanAppropriateFlag(uiCode); + READ_CODE(8, symbol, "colour_primaries"); pcVUI->setColourPrimaries(symbol); + READ_CODE(8, symbol, "transfer_characteristics"); pcVUI->setTransferCharacteristics(symbol); + READ_CODE(8, symbol, "matrix_coeffs"); pcVUI->setMatrixCoefficients(symbol); + READ_FLAG( symbol, "video_full_range_flag"); pcVUI->setVideoFullRangeFlag(symbol); } - READ_FLAG( uiCode, "video_signal_type_present_flag"); pcVUI->setVideoSignalTypePresentFlag(uiCode); - if (pcVUI->getVideoSignalTypePresentFlag()) - { - READ_CODE(3, uiCode, "video_format"); pcVUI->setVideoFormat(uiCode); - READ_FLAG( uiCode, "video_full_range_flag"); pcVUI->setVideoFullRangeFlag(uiCode); - READ_FLAG( uiCode, "colour_description_present_flag"); pcVUI->setColourDescriptionPresentFlag(uiCode); - if (pcVUI->getColourDescriptionPresentFlag()) - { - READ_CODE(8, uiCode, "colour_primaries"); pcVUI->setColourPrimaries(uiCode); - READ_CODE(8, uiCode, "transfer_characteristics"); pcVUI->setTransferCharacteristics(uiCode); - READ_CODE(8, uiCode, "matrix_coeffs"); pcVUI->setMatrixCoefficients(uiCode); - } - } + READ_FLAG( symbol, "field_seq_flag"); pcVUI->setFieldSeqFlag(symbol); - READ_FLAG( uiCode, "chroma_loc_info_present_flag"); pcVUI->setChromaLocInfoPresentFlag(uiCode); + READ_FLAG( symbol, "chroma_loc_info_present_flag"); pcVUI->setChromaLocInfoPresentFlag(symbol); if (pcVUI->getChromaLocInfoPresentFlag()) { - READ_UVLC( uiCode, "chroma_sample_loc_type_top_field" ); pcVUI->setChromaSampleLocTypeTopField(uiCode); - READ_UVLC( uiCode, "chroma_sample_loc_type_bottom_field" ); pcVUI->setChromaSampleLocTypeBottomField(uiCode); - } - - READ_FLAG( uiCode, "neutral_chroma_indication_flag"); pcVUI->setNeutralChromaIndicationFlag(uiCode); - - READ_FLAG( uiCode, "field_seq_flag"); pcVUI->setFieldSeqFlag(uiCode); - - READ_FLAG(uiCode, "frame_field_info_present_flag"); pcVUI->setFrameFieldInfoPresentFlag(uiCode); - - READ_FLAG( uiCode, "default_display_window_flag"); - if (uiCode != 0) - { - Window &defDisp = pcVUI->getDefaultDisplayWindow(); - READ_UVLC( uiCode, "def_disp_win_left_offset" ); defDisp.setWindowLeftOffset ( uiCode * SPS::getWinUnitX( pcSPS->getChromaFormatIdc()) ); - READ_UVLC( uiCode, "def_disp_win_right_offset" ); defDisp.setWindowRightOffset ( uiCode * SPS::getWinUnitX( pcSPS->getChromaFormatIdc()) ); - READ_UVLC( uiCode, "def_disp_win_top_offset" ); defDisp.setWindowTopOffset ( uiCode * SPS::getWinUnitY( pcSPS->getChromaFormatIdc()) ); - READ_UVLC( uiCode, "def_disp_win_bottom_offset" ); defDisp.setWindowBottomOffset( uiCode * SPS::getWinUnitY( pcSPS->getChromaFormatIdc()) ); - } - - TimingInfo *timingInfo = pcVUI->getTimingInfo(); - READ_FLAG( uiCode, "vui_timing_info_present_flag"); timingInfo->setTimingInfoPresentFlag (uiCode ? true : false); - if(timingInfo->getTimingInfoPresentFlag()) - { - READ_CODE( 32, uiCode, "vui_num_units_in_tick"); timingInfo->setNumUnitsInTick (uiCode); - READ_CODE( 32, uiCode, "vui_time_scale"); timingInfo->setTimeScale (uiCode); - READ_FLAG( uiCode, "vui_poc_proportional_to_timing_flag"); timingInfo->setPocProportionalToTimingFlag(uiCode ? true : false); - if(timingInfo->getPocProportionalToTimingFlag()) + if(pcVUI->getFieldSeqFlag()) { - READ_UVLC( uiCode, "vui_num_ticks_poc_diff_one_minus1"); timingInfo->setNumTicksPocDiffOneMinus1 (uiCode); + READ_UVLC( symbol, "chroma_sample_loc_type_top_field" ); pcVUI->setChromaSampleLocTypeTopField(symbol); + READ_UVLC( symbol, "chroma_sample_loc_type_bottom_field" ); pcVUI->setChromaSampleLocTypeBottomField(symbol); } - - READ_FLAG( uiCode, "vui_hrd_parameters_present_flag"); pcVUI->setHrdParametersPresentFlag(uiCode); - if( pcVUI->getHrdParametersPresentFlag() ) + else { - parseHrdParameters( pcVUI->getHrdParameters(), 1, pcSPS->getMaxTLayers() - 1 ); + READ_UVLC( symbol, "chroma_sample_loc_type" ); pcVUI->setChromaSampleLocType(symbol); } } - READ_FLAG( uiCode, "bitstream_restriction_flag"); pcVUI->setBitstreamRestrictionFlag(uiCode); - if (pcVUI->getBitstreamRestrictionFlag()) + READ_FLAG( symbol, "overscan_info_present_flag"); pcVUI->setOverscanInfoPresentFlag(symbol); + if (pcVUI->getOverscanInfoPresentFlag()) { -#if HEVC_TILES_WPP - READ_FLAG( uiCode, "tiles_fixed_structure_flag"); pcVUI->setTilesFixedStructureFlag(uiCode); -#endif - READ_FLAG( uiCode, "motion_vectors_over_pic_boundaries_flag"); pcVUI->setMotionVectorsOverPicBoundariesFlag(uiCode); - READ_FLAG( uiCode, "restricted_ref_pic_lists_flag"); pcVUI->setRestrictedRefPicListsFlag(uiCode); - READ_UVLC( uiCode, "min_spatial_segmentation_idc"); pcVUI->setMinSpatialSegmentationIdc(uiCode); - CHECK(uiCode >= 4096, "Invalid code signalled"); - READ_UVLC( uiCode, "max_bytes_per_pic_denom" ); pcVUI->setMaxBytesPerPicDenom(uiCode); - READ_UVLC( uiCode, "max_bits_per_min_cu_denom" ); pcVUI->setMaxBitsPerMinCuDenom(uiCode); - READ_UVLC( uiCode, "log2_max_mv_length_horizontal" ); pcVUI->setLog2MaxMvLengthHorizontal(uiCode); - READ_UVLC( uiCode, "log2_max_mv_length_vertical" ); pcVUI->setLog2MaxMvLengthVertical(uiCode); + READ_FLAG( symbol, "overscan_appropriate_flag"); pcVUI->setOverscanAppropriateFlag(symbol); } } -void HLSyntaxReader::parseHrdParameters(HRD *hrd, bool commonInfPresentFlag, uint32_t maxNumSubLayersMinus1) +void HLSyntaxReader::parseHrdParameters(HRDParameters *hrd, uint32_t firstSubLayer, uint32_t maxNumSubLayersMinus1) { - uint32_t uiCode; - if( commonInfPresentFlag ) + uint32_t symbol; + READ_FLAG( symbol, "general_nal_hrd_parameters_present_flag" ); hrd->setNalHrdParametersPresentFlag( symbol == 1 ? true : false ); + READ_FLAG( symbol, "general_vcl_hrd_parameters_present_flag" ); hrd->setVclHrdParametersPresentFlag( symbol == 1 ? true : false ); + READ_FLAG( symbol, "general_decoding_unit_hrd_params_present_flag" ); hrd->setGeneralDecodingUnitHrdParamsPresentFlag( symbol == 1 ? true : false ); + + if( hrd->getGeneralDecodingUnitHrdParamsPresentFlag() ) { - READ_FLAG( uiCode, "nal_hrd_parameters_present_flag" ); hrd->setNalHrdParametersPresentFlag( uiCode == 1 ? true : false ); - READ_FLAG( uiCode, "vcl_hrd_parameters_present_flag" ); hrd->setVclHrdParametersPresentFlag( uiCode == 1 ? true : false ); - if( hrd->getNalHrdParametersPresentFlag() || hrd->getVclHrdParametersPresentFlag() ) - { - READ_FLAG( uiCode, "sub_pic_hrd_params_present_flag" ); hrd->setSubPicCpbParamsPresentFlag( uiCode == 1 ? true : false ); - if( hrd->getSubPicCpbParamsPresentFlag() ) - { - READ_CODE( 8, uiCode, "tick_divisor_minus2" ); hrd->setTickDivisorMinus2( uiCode ); - READ_CODE( 5, uiCode, "du_cpb_removal_delay_increment_length_minus1" ); hrd->setDuCpbRemovalDelayLengthMinus1( uiCode ); - READ_FLAG( uiCode, "sub_pic_cpb_params_in_pic_timing_sei_flag" ); hrd->setSubPicCpbParamsInPicTimingSEIFlag( uiCode == 1 ? true : false ); - READ_CODE( 5, uiCode, "dpb_output_delay_du_length_minus1" ); hrd->setDpbOutputDelayDuLengthMinus1( uiCode ); - } - READ_CODE( 4, uiCode, "bit_rate_scale" ); hrd->setBitRateScale( uiCode ); - READ_CODE( 4, uiCode, "cpb_size_scale" ); hrd->setCpbSizeScale( uiCode ); - if( hrd->getSubPicCpbParamsPresentFlag() ) - { - READ_CODE( 4, uiCode, "cpb_size_du_scale" ); hrd->setDuCpbSizeScale( uiCode ); - } - READ_CODE( 5, uiCode, "initial_cpb_removal_delay_length_minus1" ); hrd->setInitialCpbRemovalDelayLengthMinus1( uiCode ); - READ_CODE( 5, uiCode, "au_cpb_removal_delay_length_minus1" ); hrd->setCpbRemovalDelayLengthMinus1( uiCode ); - READ_CODE( 5, uiCode, "dpb_output_delay_length_minus1" ); hrd->setDpbOutputDelayLengthMinus1( uiCode ); - } + READ_CODE( 8, symbol, "tick_divisor_minus2" ); hrd->setTickDivisorMinus2( symbol ); + } + READ_CODE( 4, symbol, "bit_rate_scale" ); hrd->setBitRateScale( symbol ); + READ_CODE( 4, symbol, "cpb_size_scale" ); hrd->setCpbSizeScale( symbol ); + if( hrd->getGeneralDecodingUnitHrdParamsPresentFlag() ) + { + READ_CODE( 4, symbol, "cpb_size_du_scale" ); hrd->setCpbSizeDuScale( symbol ); } - int i, j, nalOrVcl; - for( i = 0; i <= maxNumSubLayersMinus1; i ++ ) + + for( int i = firstSubLayer; i <= maxNumSubLayersMinus1; i ++ ) { - READ_FLAG( uiCode, "fixed_pic_rate_general_flag" ); hrd->setFixedPicRateFlag( i, uiCode == 1 ? true : false ); + READ_FLAG( symbol, "fixed_pic_rate_general_flag" ); hrd->setFixedPicRateFlag( i, symbol == 1 ? true : false ); if( !hrd->getFixedPicRateFlag( i ) ) { - READ_FLAG( uiCode, "fixed_pic_rate_within_cvs_flag" ); hrd->setFixedPicRateWithinCvsFlag( i, uiCode == 1 ? true : false ); + READ_FLAG( symbol, "fixed_pic_rate_within_cvs_flag" ); hrd->setFixedPicRateWithinCvsFlag( i, symbol == 1 ? true : false ); } else { hrd->setFixedPicRateWithinCvsFlag( i, true ); } - hrd->setLowDelayHrdFlag( i, 0 ); // Infered to be 0 when not present - hrd->setCpbCntMinus1 ( i, 0 ); // Infered to be 0 when not present + hrd->setLowDelayHrdFlag( i, false ); // Inferred to be 0 when not present + hrd->setCpbCntMinus1 ( i, 0 ); // Inferred to be 0 when not present if( hrd->getFixedPicRateWithinCvsFlag( i ) ) { - READ_UVLC( uiCode, "elemental_duration_in_tc_minus1" ); hrd->setPicDurationInTcMinus1( i, uiCode ); + READ_UVLC( symbol, "elemental_duration_in_tc_minus1" ); hrd->setPicDurationInTcMinus1( i, symbol ); } else { - READ_FLAG( uiCode, "low_delay_hrd_flag" ); hrd->setLowDelayHrdFlag( i, uiCode == 1 ? true : false ); + READ_FLAG( symbol, "low_delay_hrd_flag" ); hrd->setLowDelayHrdFlag( i, symbol == 1 ? true : false ); } if (!hrd->getLowDelayHrdFlag( i )) { - READ_UVLC( uiCode, "cpb_cnt_minus1" ); hrd->setCpbCntMinus1( i, uiCode ); + READ_UVLC( symbol, "cpb_cnt_minus1" ); hrd->setCpbCntMinus1( i, symbol ); } - for( nalOrVcl = 0; nalOrVcl < 2; nalOrVcl ++ ) + for( int nalOrVcl = 0; nalOrVcl < 2; nalOrVcl ++ ) { if( ( ( nalOrVcl == 0 ) && ( hrd->getNalHrdParametersPresentFlag() ) ) || ( ( nalOrVcl == 1 ) && ( hrd->getVclHrdParametersPresentFlag() ) ) ) { - for( j = 0; j <= ( hrd->getCpbCntMinus1( i ) ); j ++ ) + for( int j = 0; j <= ( hrd->getCpbCntMinus1( i ) ); j ++ ) { - READ_UVLC( uiCode, "bit_rate_value_minus1" ); hrd->setBitRateValueMinus1( i, j, nalOrVcl, uiCode ); - READ_UVLC( uiCode, "cpb_size_value_minus1" ); hrd->setCpbSizeValueMinus1( i, j, nalOrVcl, uiCode ); - if( hrd->getSubPicCpbParamsPresentFlag() ) - { - READ_UVLC( uiCode, "cpb_size_du_value_minus1" ); hrd->setDuCpbSizeValueMinus1( i, j, nalOrVcl, uiCode ); - READ_UVLC( uiCode, "bit_rate_du_value_minus1" ); hrd->setDuBitRateValueMinus1( i, j, nalOrVcl, uiCode ); - } - READ_FLAG( uiCode, "cbr_flag" ); hrd->setCbrFlag( i, j, nalOrVcl, uiCode == 1 ? true : false ); + READ_UVLC( symbol, "bit_rate_value_minus1" ); hrd->setBitRateValueMinus1( i, j, nalOrVcl, symbol ); + READ_UVLC( symbol, "cpb_size_value_minus1" ); hrd->setCpbSizeValueMinus1( i, j, nalOrVcl, symbol ); + READ_FLAG( symbol, "cbr_flag" ); hrd->setCbrFlag( i, j, nalOrVcl, symbol == 1 ? true : false ); } } } } -} - -void HLSyntaxReader::parseReshaper(SliceReshapeInfo& info, const SPS* pcSPS, const bool isIntra) -{ - unsigned symbol = 0; - READ_FLAG(symbol, "tile_group_reshaper_model_present_flag"); info.setSliceReshapeModelPresentFlag(symbol == 1); - if (info.getSliceReshapeModelPresentFlag()) + for (int i = 0; i < firstSubLayer; i++) { - memset(info.reshaperModelBinCWDelta, 0, PIC_CODE_CW_BINS * sizeof(int)); - READ_UVLC(symbol, "reshaper_model_min_bin_idx"); info.reshaperModelMinBinIdx = symbol; - READ_UVLC(symbol, "reshaper_model_delta_max_bin_idx"); info.reshaperModelMaxBinIdx = PIC_CODE_CW_BINS - 1 - symbol; - READ_UVLC(symbol, "reshaper_model_bin_delta_abs_cw_prec_minus1"); info.maxNbitsNeededDeltaCW = symbol + 1; - assert(info.maxNbitsNeededDeltaCW > 0); - for (uint32_t i = info.reshaperModelMinBinIdx; i <= info.reshaperModelMaxBinIdx; i++) + for (int nalOrVcl = 0; nalOrVcl < 2; nalOrVcl++) { - READ_CODE(info.maxNbitsNeededDeltaCW, symbol, "reshaper_model_bin_delta_abs_CW"); - int absCW = symbol; - if (absCW > 0) + if( ( ( nalOrVcl == 0 ) && ( hrd->getNalHrdParametersPresentFlag() ) ) || + ( ( nalOrVcl == 1 ) && ( hrd->getVclHrdParametersPresentFlag() ) ) ) { - READ_CODE(1, symbol, "reshaper_model_bin_delta_sign_CW_flag"); + for (int j = 0; j <= (hrd->getCpbCntMinus1(i)); j++) + { + uint32_t bitRate = hrd->getBitRateValueMinus1(maxNumSubLayersMinus1, j, nalOrVcl); + hrd->setBitRateValueMinus1(i, j, nalOrVcl, bitRate); + uint32_t cpbSize = hrd->getCpbSizeValueMinus1(maxNumSubLayersMinus1, j, nalOrVcl); + hrd->setCpbSizeValueMinus1(i, j, nalOrVcl, cpbSize); + bool flag = hrd->getCbrFlag(maxNumSubLayersMinus1, j, nalOrVcl); + hrd->setCbrFlag(i, j, nalOrVcl, flag); + } } - int signCW = symbol; - info.reshaperModelBinCWDelta[i] = (1 - 2 * signCW) * absCW; - } - } - READ_FLAG(symbol, "tile_group_reshaper_enable_flag"); info.setUseSliceReshaper(symbol == 1); - if (info.getUseSliceReshaper()) - { - if (!(pcSPS->getUseDualITree() && isIntra)) - { - READ_FLAG(symbol, "slice_reshaper_ChromaAdj"); info.setSliceReshapeChromaAdj(symbol); - } - else - { - info.setSliceReshapeChromaAdj(0); } } } + + void HLSyntaxReader::parseSPS(SPS* pcSPS) { uint32_t uiCode; @@ -869,49 +1050,8 @@ void HLSyntaxReader::parseSPS(SPS* pcSPS) #if ENABLE_TRACING xTraceSPSHeader (); #endif -#if HEVC_VPS - READ_CODE( 4, uiCode, "sps_video_parameter_set_id"); pcSPS->setVPSId ( uiCode ); -#endif -#if !JVET_M0101_HLS - READ_UVLC( uiCode, "sps_seq_parameter_set_id" ); pcSPS->setSPSId( uiCode ); - CHECK(uiCode > 15, "Invalid SPS id signalled"); - - READ_FLAG(uiCode, "intra_only_constraint_flag"); pcSPS->setIntraOnlyConstraintFlag(uiCode > 0 ? true : false); - READ_CODE(4, uiCode, "max_bitdepth_constraint_idc"); pcSPS->setMaxBitDepthConstraintIdc(uiCode); - READ_CODE(2, uiCode, "max_chroma_format_constraint_idc"); pcSPS->setMaxChromaFormatConstraintIdc(uiCode); - READ_FLAG(uiCode, "frame_only_constraint_flag"); pcSPS->setFrameConstraintFlag(uiCode > 0 ? true : false); - READ_FLAG(uiCode, "no_qtbtt_dual_tree_intra_constraint_flag"); pcSPS->setNoQtbttDualTreeIntraConstraintFlag(uiCode > 0 ? true : false); - READ_FLAG(uiCode, "no_sao_constraint_flag"); pcSPS->setNoSaoConstraintFlag(uiCode > 0 ? true : false); - READ_FLAG(uiCode, "no_alf_constraint_flag"); pcSPS->setNoAlfConstraintFlag(uiCode > 0 ? true : false); - READ_FLAG(uiCode, "no_pcm_constraint_flag"); pcSPS->setNoPcmConstraintFlag(uiCode > 0 ? true : false); - READ_FLAG(uiCode, "no_ref_wraparound_constraint_flag"); pcSPS->setNoRefWraparoundConstraintFlag(uiCode > 0 ? true : false); - READ_FLAG(uiCode, "no_temporal_mvp_constraint_flag"); pcSPS->setNoTemporalMvpConstraintFlag(uiCode > 0 ? true : false); - READ_FLAG(uiCode, "no_sbtmvp_constraint_flag"); pcSPS->setNoSbtmvpConstraintFlag(uiCode > 0 ? true : false); - READ_FLAG(uiCode, "no_amvr_constraint_flag"); pcSPS->setNoAmvrConstraintFlag(uiCode > 0 ? true : false); - READ_FLAG(uiCode, "no_bdof_constraint_flag"); pcSPS->setNoBdofConstraintFlag(uiCode > 0 ? true : false); - READ_FLAG(uiCode, "no_cclm_constraint_flag"); pcSPS->setNoCclmConstraintFlag(uiCode > 0 ? true : false); - READ_FLAG(uiCode, "no_mts_constraint_flag"); pcSPS->setNoMtsConstraintFlag(uiCode > 0 ? true : false); - READ_FLAG(uiCode, "no_affine_motion_constraint_flag"); pcSPS->setNoAffineMotionConstraintFlag(uiCode > 0 ? true : false); - READ_FLAG(uiCode, "no_gbi_constraint_flag"); pcSPS->setNoGbiConstraintFlag(uiCode > 0 ? true : false); - READ_FLAG(uiCode, "no_mh_intra_constraint_flag"); pcSPS->setNoMhIntraConstraintFlag(uiCode > 0 ? true : false); - READ_FLAG(uiCode, "no_triangle_constraint_flag"); pcSPS->setNoTriangleConstraintFlag(uiCode > 0 ? true : false); - READ_FLAG(uiCode, "no_ladf_constraint_flag"); pcSPS->setNoLadfConstraintFlag(uiCode > 0 ? true : false); - READ_FLAG(uiCode, "no_curr_pic_ref_constraint_flag"); pcSPS->setNoCurrPicRefConstraintFlag(uiCode > 0 ? true : false); - READ_FLAG(uiCode, "no_qp_delta_constraint_flag"); pcSPS->setNoQpDeltaConstraintFlag(uiCode > 0 ? true : false); - READ_FLAG(uiCode, "no_dep_quant_constraint_flag"); pcSPS->setNoDepQuantConstraintFlag(uiCode > 0 ? true : false); - READ_FLAG(uiCode, "no_sign_data_hiding_constraint_flag"); pcSPS->setNoSignDataHidingConstraintFlag(uiCode > 0 ? true : false); - - // KJS: Marakech decision: sub-layers added back - READ_CODE( 3, uiCode, "sps_max_sub_layers_minus1" ); pcSPS->setMaxTLayers ( uiCode+1 ); - CHECK(uiCode > 6, "Invalid maximum number of T-layer signalled"); - READ_FLAG( uiCode, "sps_temporal_id_nesting_flag" ); pcSPS->setTemporalIdNestingFlag ( uiCode > 0 ? true : false ); - if ( pcSPS->getMaxTLayers() == 1 ) - { - // sps_temporal_id_nesting_flag must be 1 when sps_max_sub_layers_minus1 is 0 - CHECK( uiCode != 1, "Invalid maximum number of T-layers" ); - } - parsePTL(pcSPS->getPTL(), true, pcSPS->getMaxTLayers() - 1); -#else + READ_CODE( 4, uiCode, "sps_decoding_parameter_set_id"); pcSPS->setDecodingParameterSetId( uiCode ); + READ_CODE( 4, uiCode, "sps_video_parameter_set_id" ); pcSPS->setVPSId( uiCode ); READ_CODE(3, uiCode, "sps_max_sub_layers_minus1"); pcSPS->setMaxTLayers (uiCode + 1); CHECK(uiCode > 6, "Invalid maximum number of T-layer signalled"); READ_CODE(5, uiCode, "sps_reserved_zero_5bits"); @@ -919,55 +1059,100 @@ void HLSyntaxReader::parseSPS(SPS* pcSPS) parseProfileTierLevel(pcSPS->getProfileTierLevel(), pcSPS->getMaxTLayers() - 1); - READ_UVLC(uiCode, "sps_seq_parameter_set_id"); pcSPS->setSPSId(uiCode); -#endif + READ_FLAG(uiCode, "gdr_enabled_flag"); + pcSPS->setGDREnabledFlag(uiCode); - READ_UVLC( uiCode, "chroma_format_idc" ); pcSPS->setChromaFormatIdc( ChromaFormat(uiCode) ); - CHECK(uiCode > 3, "Invalid chroma format signalled"); + READ_CODE(4, uiCode, "sps_seq_parameter_set_id"); pcSPS->setSPSId(uiCode); - // KJS: ENABLE_CHROMA_422 does not exist anymore o.O - if( pcSPS->getChromaFormatIdc() == CHROMA_422 ) - { - EXIT( "Error: 4:2:2 chroma sampling format not supported with current compiler setting." - "\n Set compiler flag \"ENABLE_CHROMA_422\" equal to 1 for enabling 4:2:2.\n" ); - } + READ_CODE(2, uiCode, "chroma_format_idc"); pcSPS->setChromaFormatIdc( ChromaFormat(uiCode) ); if( pcSPS->getChromaFormatIdc() == CHROMA_444 ) { READ_FLAG( uiCode, "separate_colour_plane_flag"); CHECK(uiCode != 0, "Invalid code"); + pcSPS->setSeparateColourPlaneFlag( uiCode != 0 ); } - READ_UVLC ( uiCode, "pic_width_in_luma_samples" ); pcSPS->setPicWidthInLumaSamples ( uiCode ); - READ_UVLC ( uiCode, "pic_height_in_luma_samples" ); pcSPS->setPicHeightInLumaSamples( uiCode ); + READ_FLAG( uiCode, "ref_pic_resampling_enabled_flag" ); pcSPS->setRprEnabledFlag( uiCode ); + + READ_UVLC( uiCode, "pic_width_max_in_luma_samples" ); pcSPS->setMaxPicWidthInLumaSamples( uiCode ); + READ_UVLC( uiCode, "pic_height_max_in_luma_samples" ); pcSPS->setMaxPicHeightInLumaSamples( uiCode ); + + READ_CODE(2, uiCode, "sps_log2_ctu_size_minus5"); pcSPS->setCTUSize(1 << (uiCode + 5)); + CHECK(uiCode > 2, "sps_log2_ctu_size_minus5 must be less than or equal to 2"); + unsigned ctbLog2SizeY = uiCode + 5; + pcSPS->setMaxCodingDepth(uiCode+3); + pcSPS->setLog2DiffMaxMinCodingBlockSize(uiCode+3); + pcSPS->setMaxCUWidth(pcSPS->getCTUSize()); + pcSPS->setMaxCUHeight(pcSPS->getCTUSize()); + READ_FLAG( uiCode, "subpics_present_flag" ); pcSPS->setSubPicPresentFlag(uiCode); - // KJS: not removing yet - READ_FLAG( uiCode, "conformance_window_flag"); - if (uiCode != 0) + if (pcSPS->getSubPicPresentFlag()) + { + READ_CODE(8, uiCode, "sps_num_subpics_minus1"); pcSPS->setNumSubPics(uiCode + 1); + for (int picIdx = 0; picIdx < pcSPS->getNumSubPics(); picIdx++) + { + READ_CODE(std::max(1, ceilLog2(((pcSPS->getMaxPicWidthInLumaSamples() + pcSPS->getCTUSize() - 1) >> floorLog2(pcSPS->getCTUSize())))), uiCode, "subpic_ctu_top_left_x[ i ]"); + pcSPS->setSubPicCtuTopLeftX(picIdx, uiCode); + READ_CODE(std::max(1, ceilLog2(((pcSPS->getMaxPicHeightInLumaSamples() + pcSPS->getCTUSize() - 1) >> floorLog2(pcSPS->getCTUSize())))), uiCode, "subpic_ctu_top_left_y[ i ]"); + pcSPS->setSubPicCtuTopLeftY(picIdx, uiCode); + READ_CODE(std::max(1, ceilLog2(((pcSPS->getMaxPicWidthInLumaSamples() + pcSPS->getCTUSize() - 1) >> floorLog2(pcSPS->getCTUSize())))), uiCode, "subpic_width_minus1[ i ]"); + pcSPS->setSubPicWidth(picIdx, uiCode + 1); + READ_CODE(std::max(1, ceilLog2(((pcSPS->getMaxPicHeightInLumaSamples() + pcSPS->getCTUSize() - 1) >> floorLog2(pcSPS->getCTUSize())))), uiCode, "subpic_height_minus1[ i ]"); + pcSPS->setSubPicHeight(picIdx, uiCode + 1); + READ_FLAG(uiCode, "subpic_treated_as_pic_flag[ i ]"); + pcSPS->setSubPicTreatedAsPicFlag(picIdx, uiCode); + READ_FLAG(uiCode, "loop_filter_across_subpic_enabled_flag[ i ]"); + pcSPS->setLoopFilterAcrossSubpicEnabledFlag(picIdx, uiCode); + } + } + READ_FLAG(uiCode, "sps_subpic_id_present_flag"); pcSPS->setSubPicIdPresentFlag( uiCode != 0 ); + if( pcSPS->getSubPicIdPresentFlag() ) + { + READ_FLAG(uiCode, "sps_subpic_id_signalling_present_flag"); pcSPS->setSubPicIdSignallingPresentFlag( uiCode != 0 ); + if( pcSPS->getSubPicIdSignallingPresentFlag() ) + { + READ_UVLC( uiCode, "sps_subpic_id_len_minus1" ); pcSPS->setSubPicIdLen( uiCode + 1 ); + CHECK( uiCode > 15, "Invalid sps_subpic_id_len_minus1 signalled"); + for( int picIdx = 0; picIdx < pcSPS->getNumSubPics( ); picIdx++ ) + { + READ_CODE( pcSPS->getSubPicIdLen( ), uiCode, "sps_subpic_id[i]" ); pcSPS->setSubPicId( picIdx, uiCode ); + } + } + } + if( pcSPS->getSubPicIdPresentFlag() == false || pcSPS->getSubPicIdSignallingPresentFlag() == false ) { - Window &conf = pcSPS->getConformanceWindow(); - READ_UVLC( uiCode, "conf_win_left_offset" ); conf.setWindowLeftOffset ( uiCode * SPS::getWinUnitX( pcSPS->getChromaFormatIdc() ) ); - READ_UVLC( uiCode, "conf_win_right_offset" ); conf.setWindowRightOffset ( uiCode * SPS::getWinUnitX( pcSPS->getChromaFormatIdc() ) ); - READ_UVLC( uiCode, "conf_win_top_offset" ); conf.setWindowTopOffset ( uiCode * SPS::getWinUnitY( pcSPS->getChromaFormatIdc() ) ); - READ_UVLC( uiCode, "conf_win_bottom_offset" ); conf.setWindowBottomOffset( uiCode * SPS::getWinUnitY( pcSPS->getChromaFormatIdc() ) ); + for( int picIdx = 0; picIdx < pcSPS->getNumSubPics( ); picIdx++ ) + { + pcSPS->setSubPicId( picIdx, picIdx ); + } } - READ_UVLC( uiCode, "bit_depth_luma_minus8" ); - CHECK(uiCode > 8, "Invalid luma bit depth signalled"); + READ_UVLC( uiCode, "bit_depth_minus8" ); + CHECK(uiCode > 8, "Invalid bit depth signalled"); pcSPS->setBitDepth(CHANNEL_TYPE_LUMA, 8 + uiCode); - + pcSPS->setBitDepth(CHANNEL_TYPE_CHROMA, 8 + uiCode); pcSPS->setQpBDOffset(CHANNEL_TYPE_LUMA, (int) (6*uiCode) ); + pcSPS->setQpBDOffset(CHANNEL_TYPE_CHROMA, (int) (6*uiCode) ); - READ_UVLC( uiCode, "bit_depth_chroma_minus8" ); - CHECK(uiCode > 8, "Invalid chroma bit depth signalled"); - pcSPS->setBitDepth(CHANNEL_TYPE_CHROMA, 8 + uiCode); - pcSPS->setQpBDOffset(CHANNEL_TYPE_CHROMA, (int) (6*uiCode) ); + READ_UVLC( uiCode, "min_qp_prime_ts_minus4" ); + pcSPS->setMinQpPrimeTsMinus4(CHANNEL_TYPE_LUMA, uiCode); + CHECK(uiCode > 48, "Invalid min_qp_prime_ts_minus4 signalled"); + pcSPS->setMinQpPrimeTsMinus4(CHANNEL_TYPE_CHROMA, uiCode); + READ_FLAG( uiCode, "sps_weighted_pred_flag" ); pcSPS->setUseWP( uiCode ? true : false ); + READ_FLAG( uiCode, "sps_weighted_bipred_flag" ); pcSPS->setUseWPBiPred( uiCode ? true : false ); - READ_UVLC( uiCode, "log2_max_pic_order_cnt_lsb_minus4" ); pcSPS->setBitsForPOC( 4 + uiCode ); + READ_CODE(4, uiCode, "log2_max_pic_order_cnt_lsb_minus4"); pcSPS->setBitsForPOC( 4 + uiCode ); CHECK(uiCode > 12, "Invalid code"); - // KJS: Marakech decision: sub-layers added back uint32_t subLayerOrderingInfoPresentFlag; - READ_FLAG(subLayerOrderingInfoPresentFlag, "sps_sub_layer_ordering_info_present_flag"); + if (pcSPS->getMaxTLayers() > 1) + { + READ_FLAG(subLayerOrderingInfoPresentFlag, "sps_sub_layer_ordering_info_present_flag"); + } + else + { + subLayerOrderingInfoPresentFlag = 0; + } for(uint32_t i=0; i <= pcSPS->getMaxTLayers()-1; i++) { @@ -990,78 +1175,160 @@ void HLSyntaxReader::parseSPS(SPS* pcSPS) } } + READ_FLAG(uiCode, "long_term_ref_pics_flag"); pcSPS->setLongTermRefsPresent(uiCode); + READ_FLAG( uiCode, "inter_layer_ref_pics_present_flag" ); pcSPS->setInterLayerPresentFlag( uiCode ); + READ_FLAG( uiCode, "sps_idr_rpl_present_flag" ); pcSPS->setIDRRefParamListPresent( (bool) uiCode ); + READ_FLAG(uiCode, "rpl1_copy_from_rpl0_flag"); + pcSPS->setRPL1CopyFromRPL0Flag(uiCode); + + //Read candidate for List0 + READ_UVLC(uiCode, "num_ref_pic_lists_in_sps[0]"); + uint32_t numberOfRPL = uiCode; + pcSPS->createRPLList0(numberOfRPL); + RPLList* rplList = pcSPS->getRPLList0(); + ReferencePictureList* rpl; + for (uint32_t ii = 0; ii < numberOfRPL; ii++) + { + rpl = rplList->getReferencePictureList(ii); + parseRefPicList(pcSPS, rpl); + } + + //Read candidate for List1 + if (!pcSPS->getRPL1CopyFromRPL0Flag()) + { + READ_UVLC(uiCode, "num_ref_pic_lists_in_sps[1]"); + numberOfRPL = uiCode; + pcSPS->createRPLList1(numberOfRPL); + rplList = pcSPS->getRPLList1(); + for (uint32_t ii = 0; ii < numberOfRPL; ii++) + { + rpl = rplList->getReferencePictureList(ii); + parseRefPicList(pcSPS, rpl); + } + } + else + { + numberOfRPL = pcSPS->getNumRPL0(); + pcSPS->createRPLList1(numberOfRPL); + RPLList* rplListSource = pcSPS->getRPLList0(); + RPLList* rplListDest = pcSPS->getRPLList1(); + for (uint32_t ii = 0; ii < numberOfRPL; ii++) + copyRefPicList(pcSPS, rplListSource->getReferencePictureList(ii), rplListDest->getReferencePictureList(ii)); + } + unsigned minQT[3] = { 0, 0, 0 }; unsigned maxBTD[3] = { 0, 0, 0 }; unsigned maxBTSize[3] = { 0, 0, 0 }; unsigned maxTTSize[3] = { 0, 0, 0 }; - READ_FLAG(uiCode, "qtbtt_dual_tree_intra_flag"); pcSPS->setUseDualITree(uiCode); - READ_UVLC(uiCode, "log2_ctu_size_minus2"); pcSPS->setCTUSize(1 << (uiCode + 2)); - pcSPS->setMaxCodingDepth(uiCode); - pcSPS->setLog2DiffMaxMinCodingBlockSize(uiCode); - pcSPS->setMaxCUWidth(pcSPS->getCTUSize()); - pcSPS->setMaxCUHeight(pcSPS->getCTUSize()); + if( pcSPS->getChromaFormatIdc() != CHROMA_400 ) + { + READ_FLAG(uiCode, "qtbtt_dual_tree_intra_flag"); pcSPS->setUseDualITree(uiCode); + } + else + { + pcSPS->setUseDualITree(0); + } READ_UVLC(uiCode, "log2_min_luma_coding_block_size_minus2"); int log2MinCUSize = uiCode + 2; pcSPS->setLog2MinCodingBlockSize(log2MinCUSize); + CHECK(uiCode > ctbLog2SizeY - 2, "Invalid log2_min_luma_coding_block_size_minus2 signalled"); + + CHECK(log2MinCUSize > std::min(6, (int)(ctbLog2SizeY)), "log2_min_luma_coding_block_size_minus2 shall be in the range of 0 to min (4, log2_ctu_size - 2)"); + CHECK( ( pcSPS->getMaxPicWidthInLumaSamples() % ( std::max( 8, int( pcSPS->getMaxCUWidth() >> ( pcSPS->getMaxCodingDepth() - 1 ) ) ) ) ) != 0, "Coded frame width must be a multiple of Max(8, the minimum unit size)" ); + CHECK( ( pcSPS->getMaxPicHeightInLumaSamples() % ( std::max( 8, int( pcSPS->getMaxCUHeight() >> ( pcSPS->getMaxCodingDepth() - 1 ) ) ) ) ) != 0, "Coded frame height must be a multiple of Max(8, the minimum unit size)" ); + READ_FLAG(uiCode, "partition_constraints_override_enabled_flag"); pcSPS->setSplitConsOverrideEnabledFlag(uiCode); - READ_UVLC(uiCode, "sps_log2_diff_min_qt_min_cb_intra_tile_group_luma"); minQT[0] = 1 << (uiCode + pcSPS->getLog2MinCodingBlockSize()); - READ_UVLC(uiCode, "sps_log2_diff_min_qt_min_cb_inter_tile_group"); minQT[1] = 1 << (uiCode + pcSPS->getLog2MinCodingBlockSize()); - READ_UVLC(uiCode, "sps_max_mtt_hierarchy_depth_inter_tile_group"); maxBTD[1] = uiCode; - READ_UVLC(uiCode, "sps_max_mtt_hierarchy_depth_intra_tile_group_luma"); maxBTD[0] = uiCode; + READ_UVLC(uiCode, "sps_log2_diff_min_qt_min_cb_intra_slice_luma"); + unsigned minQtLog2SizeIntraY = uiCode + pcSPS->getLog2MinCodingBlockSize(); + minQT[0] = 1 << minQtLog2SizeIntraY; + READ_UVLC(uiCode, "sps_log2_diff_min_qt_min_cb_inter_slice"); + unsigned minQtLog2SizeInterY = uiCode + pcSPS->getLog2MinCodingBlockSize(); + minQT[1] = 1 << minQtLog2SizeInterY; + READ_UVLC(uiCode, "sps_max_mtt_hierarchy_depth_inter_slice"); maxBTD[1] = uiCode; + CHECK(uiCode > 2*(ctbLog2SizeY - log2MinCUSize), "sps_max_mtt_hierarchy_depth_inter_slice shall be in the range 0 to 2*(ctbLog2SizeY - log2MinCUSize)"); + READ_UVLC(uiCode, "sps_max_mtt_hierarchy_depth_intra_slice_luma"); maxBTD[0] = uiCode; + CHECK(uiCode > 2 * (ctbLog2SizeY - log2MinCUSize), "sps_max_mtt_hierarchy_depth_intra_slice_luma shall be in the range 0 to 2*(ctbLog2SizeY - log2MinCUSize)"); maxTTSize[0] = maxBTSize[0] = minQT[0]; if (maxBTD[0] != 0) { - READ_UVLC(uiCode, "sps_log2_diff_max_bt_min_qt_intra_tile_group_luma"); maxBTSize[0] <<= uiCode; - READ_UVLC(uiCode, "sps_log2_diff_max_tt_min_qt_intra_tile_group_luma"); maxTTSize[0] <<= uiCode; + READ_UVLC(uiCode, "sps_log2_diff_max_bt_min_qt_intra_slice_luma"); maxBTSize[0] <<= uiCode; + CHECK(uiCode > ctbLog2SizeY - minQtLog2SizeIntraY, "Invalid code"); + READ_UVLC(uiCode, "sps_log2_diff_max_tt_min_qt_intra_slice_luma"); maxTTSize[0] <<= uiCode; + CHECK(uiCode > ctbLog2SizeY - minQtLog2SizeIntraY, "Invalid code"); } maxTTSize[1] = maxBTSize[1] = minQT[1]; if (maxBTD[1] != 0) { - READ_UVLC(uiCode, "sps_log2_diff_max_bt_min_qt_inter_tile_group"); maxBTSize[1] <<= uiCode; - READ_UVLC(uiCode, "sps_log2_diff_max_tt_min_qt_inter_tile_group"); maxTTSize[1] <<= uiCode; + READ_UVLC(uiCode, "sps_log2_diff_max_bt_min_qt_inter_slice"); maxBTSize[1] <<= uiCode; + CHECK(uiCode > ctbLog2SizeY - minQtLog2SizeInterY, "Invalid code"); + READ_UVLC(uiCode, "sps_log2_diff_max_tt_min_qt_inter_slice"); maxTTSize[1] <<= uiCode; + CHECK(uiCode > ctbLog2SizeY - minQtLog2SizeInterY, "Invalid code"); } if (pcSPS->getUseDualITree()) { - READ_UVLC(uiCode, "sps_log2_diff_min_qt_min_cb_intra_tile_group_chroma"); minQT[2] = 1 << (uiCode + pcSPS->getLog2MinCodingBlockSize()); - READ_UVLC(uiCode, "sps_max_mtt_hierarchy_depth_intra_tile_group_chroma"); maxBTD[2] = uiCode; + READ_UVLC(uiCode, "sps_log2_diff_min_qt_min_cb_intra_slice_chroma"); minQT[2] = 1 << (uiCode + pcSPS->getLog2MinCodingBlockSize()); + READ_UVLC(uiCode, "sps_max_mtt_hierarchy_depth_intra_slice_chroma"); maxBTD[2] = uiCode; + CHECK(uiCode > 2 * (ctbLog2SizeY - log2MinCUSize), "sps_max_mtt_hierarchy_depth_intra_slice_chroma shall be in the range 0 to 2*(ctbLog2SizeY - log2MinCUSize)"); maxTTSize[2] = maxBTSize[2] = minQT[2]; if (maxBTD[2] != 0) { - READ_UVLC(uiCode, "sps_log2_diff_max_bt_min_qt_intra_tile_group_chroma"); maxBTSize[2] <<= uiCode; - READ_UVLC(uiCode, "sps_log2_diff_max_tt_min_qt_intra_tile_group_chroma"); maxTTSize[2] <<= uiCode; + READ_UVLC(uiCode, "sps_log2_diff_max_bt_min_qt_intra_slice_chroma"); maxBTSize[2] <<= uiCode; + READ_UVLC(uiCode, "sps_log2_diff_max_tt_min_qt_intra_slice_chroma"); maxTTSize[2] <<= uiCode; } } pcSPS->setMinQTSizes(minQT); - pcSPS->setMaxBTDepth(maxBTD[1], maxBTD[0], maxBTD[2]); + pcSPS->setMaxMTTHierarchyDepth(maxBTD[1], maxBTD[0], maxBTD[2]); pcSPS->setMaxBTSize(maxBTSize[1], maxBTSize[0], maxBTSize[2]); pcSPS->setMaxTTSize(maxTTSize[1], maxTTSize[0], maxTTSize[2]); -#if !JVET_M0101_HLS - if (pcSPS->getPTL()->getGeneralPTL()->getLevelIdc() >= Level::LEVEL5) + + READ_FLAG( uiCode, "sps_max_luma_transform_size_64_flag"); pcSPS->setLog2MaxTbSize( (uiCode ? 1 : 0) + 5 ); + + READ_FLAG(uiCode, "sps_joint_cbcr_enabled_flag"); pcSPS->setJointCbCrEnabledFlag(uiCode ? true : false); + if (pcSPS->getChromaFormatIdc() != CHROMA_400) { - CHECK(log2MinCUSize + pcSPS->getLog2DiffMaxMinCodingBlockSize() < 5, "Invalid code"); + ChromaQpMappingTableParams chromaQpMappingTableParams; + READ_FLAG(uiCode, "same_qp_table_for_chroma"); chromaQpMappingTableParams.setSameCQPTableForAllChromaFlag(uiCode); + int numQpTables = chromaQpMappingTableParams.getSameCQPTableForAllChromaFlag() ? 1 : (pcSPS->getJointCbCrEnabledFlag() ? 3 : 2); + chromaQpMappingTableParams.setNumQpTables(numQpTables); + for (int i = 0; i < numQpTables; i++) + { + int32_t qpTableStart = 0; + READ_SVLC(qpTableStart, "qp_table_starts_minus26"); chromaQpMappingTableParams.setQpTableStartMinus26(i, qpTableStart); + READ_UVLC(uiCode, "num_points_in_qp_table_minus1"); chromaQpMappingTableParams.setNumPtsInCQPTableMinus1(i,uiCode); + std::vector<int> deltaQpInValMinus1(chromaQpMappingTableParams.getNumPtsInCQPTableMinus1(i) + 1); + std::vector<int> deltaQpOutVal(chromaQpMappingTableParams.getNumPtsInCQPTableMinus1(i) + 1); + for (int j = 0; j <= chromaQpMappingTableParams.getNumPtsInCQPTableMinus1(i); j++) + { + READ_UVLC(uiCode, "delta_qp_in_val_minus1"); deltaQpInValMinus1[j] = uiCode; + READ_UVLC(uiCode, "delta_qp_diff_val"); + deltaQpOutVal[j] = uiCode ^ deltaQpInValMinus1[j]; + } + chromaQpMappingTableParams.setDeltaQpInValMinus1(i, deltaQpInValMinus1); + chromaQpMappingTableParams.setDeltaQpOutVal(i, deltaQpOutVal); + } + pcSPS->setChromaQpMappingTableFromParams(chromaQpMappingTableParams, pcSPS->getQpBDOffset(CHANNEL_TYPE_CHROMA)); + pcSPS->derivedChromaQPMappingTables(); } -#endif -#if MAX_TB_SIZE_SIGNALLING - // KJS: Not in syntax - READ_UVLC( uiCode, "log2_max_luma_transform_block_size_minus2" ); pcSPS->setLog2MaxTbSize( uiCode + 2 ); -#endif + READ_FLAG( uiCode, "sps_sao_enabled_flag" ); pcSPS->setSAOEnabledFlag ( uiCode ? true : false ); READ_FLAG( uiCode, "sps_alf_enabled_flag" ); pcSPS->setALFEnabledFlag ( uiCode ? true : false ); - READ_FLAG( uiCode, "sps_pcm_enabled_flag" ); pcSPS->setPCMEnabledFlag( uiCode ? true : false ); - if( pcSPS->getPCMEnabledFlag() ) + READ_FLAG(uiCode, "sps_transform_skip_enabled_flag"); pcSPS->setTransformSkipEnabledFlag(uiCode ? true : false); + if (pcSPS->getTransformSkipEnabledFlag()) { - READ_CODE( 4, uiCode, "pcm_sample_bit_depth_luma_minus1" ); pcSPS->setPCMBitDepth ( CHANNEL_TYPE_LUMA, 1 + uiCode ); - READ_CODE( 4, uiCode, "pcm_sample_bit_depth_chroma_minus1" ); pcSPS->setPCMBitDepth ( CHANNEL_TYPE_CHROMA, 1 + uiCode ); - READ_UVLC( uiCode, "log2_min_pcm_luma_coding_block_size_minus3" ); pcSPS->setPCMLog2MinSize ( uiCode+3 ); - READ_UVLC( uiCode, "log2_diff_max_min_pcm_luma_coding_block_size" ); pcSPS->setPCMLog2MaxSize ( uiCode+pcSPS->getPCMLog2MinSize() ); - READ_FLAG( uiCode, "pcm_loop_filter_disable_flag" ); pcSPS->setPCMFilterDisableFlag ( uiCode ? true : false ); + READ_FLAG(uiCode, "sps_bdpcm_enabled_flag"); + if (uiCode && pcSPS->getChromaFormatIdc() == CHROMA_444 ) + { + READ_FLAG(uiCode, "sps_bdpcm_enabled_chroma_flag"); + uiCode++; + } + pcSPS->setBDPCMEnabled(uiCode); } READ_FLAG(uiCode, "sps_ref_wraparound_enabled_flag"); pcSPS->setWrapAroundEnabledFlag( uiCode ? true : false ); @@ -1085,47 +1352,91 @@ void HLSyntaxReader::parseSPS(SPS* pcSPS) READ_FLAG( uiCode, "sps_amvr_enabled_flag" ); pcSPS->setAMVREnabledFlag ( uiCode != 0 ); READ_FLAG( uiCode, "sps_bdof_enabled_flag" ); pcSPS->setBDOFEnabledFlag ( uiCode != 0 ); - - READ_FLAG( uiCode, "sps_affine_amvr_enabled_flag" ); pcSPS->setAffineAmvrEnabledFlag ( uiCode != 0 ); - - READ_FLAG(uiCode, "sps_dmvr_enable_flag"); pcSPS->setUseDMVR(uiCode != 0); - - // KJS: sps_cclm_enabled_flag - READ_FLAG( uiCode, "lm_chroma_enabled_flag" ); pcSPS->setUseLMChroma ( uiCode != 0 ); - if ( pcSPS->getUseLMChroma() && pcSPS->getChromaFormatIdc() == CHROMA_420 ) + if (pcSPS->getBDOFEnabledFlag()) + { + READ_FLAG(uiCode, "sps_bdof_pic_present_flag"); pcSPS->setBdofControlPresentFlag( uiCode != 0 ); + } + else { + pcSPS->setBdofControlPresentFlag( false ); + } + READ_FLAG(uiCode, "sps_smvd_enabled_flag"); pcSPS->setUseSMVD( uiCode != 0 ); + READ_FLAG(uiCode, "sps_dmvr_enabled_flag"); pcSPS->setUseDMVR(uiCode != 0); + if (pcSPS->getUseDMVR()) { - READ_FLAG( uiCode, "sps_cclm_collocated_chroma_flag" ); pcSPS->setCclmCollocatedChromaFlag( uiCode != 0 ); + READ_FLAG(uiCode, "sps_dmvr_pic_present_flag"); pcSPS->setDmvrControlPresentFlag( uiCode != 0 ); + } + else { + pcSPS->setDmvrControlPresentFlag( false ); + } + READ_FLAG(uiCode, "sps_mmvd_enabled_flag"); pcSPS->setUseMMVD(uiCode != 0); + READ_FLAG(uiCode, "sps_isp_enabled_flag"); pcSPS->setUseISP( uiCode != 0 ); + READ_FLAG(uiCode, "sps_mrl_enabled_flag"); pcSPS->setUseMRL( uiCode != 0 ); + READ_FLAG(uiCode, "sps_mip_enabled_flag"); pcSPS->setUseMIP( uiCode != 0 ); + if( pcSPS->getChromaFormatIdc() != CHROMA_400) + { + READ_FLAG( uiCode, "sps_cclm_enabled_flag" ); pcSPS->setUseLMChroma( uiCode != 0 ); + } + else + { + pcSPS->setUseLMChroma(0); + } + if( pcSPS->getChromaFormatIdc() == CHROMA_420 ) + { + READ_FLAG( uiCode, "sps_chroma_horizontal_collocated_flag" ); pcSPS->setHorCollocatedChromaFlag( uiCode != 0 ); + READ_FLAG( uiCode, "sps_chroma_vertical_collocated_flag" ); pcSPS->setVerCollocatedChromaFlag( uiCode != 0 ); } - READ_FLAG( uiCode, "mts_enabled_flag" ); pcSPS->setUseMTS ( uiCode != 0 ); + READ_FLAG( uiCode, "sps_mts_enabled_flag" ); pcSPS->setUseMTS ( uiCode != 0 ); if ( pcSPS->getUseMTS() ) { - READ_FLAG( uiCode, "mts_intra_enabled_flag" ); pcSPS->setUseIntraMTS ( uiCode != 0 ); - READ_FLAG( uiCode, "mts_inter_enabled_flag" ); pcSPS->setUseInterMTS ( uiCode != 0 ); + READ_FLAG( uiCode, "sps_explicit_mts_intra_enabled_flag" ); pcSPS->setUseIntraMTS ( uiCode != 0 ); + READ_FLAG( uiCode, "sps_explicit_mts_inter_enabled_flag" ); pcSPS->setUseInterMTS ( uiCode != 0 ); } - // KJS: sps_affine_enabled_flag - READ_FLAG( uiCode, "affine_flag" ); pcSPS->setUseAffine ( uiCode != 0 ); + READ_FLAG(uiCode, "sps_sbt_enabled_flag"); pcSPS->setUseSBT ( uiCode != 0 ); + READ_FLAG( uiCode, "sps_affine_enabled_flag" ); pcSPS->setUseAffine ( uiCode != 0 ); if ( pcSPS->getUseAffine() ) { - READ_FLAG( uiCode, "affine_type_flag" ); pcSPS->setUseAffineType ( uiCode != 0 ); + READ_FLAG( uiCode, "sps_affine_type_flag" ); pcSPS->setUseAffineType ( uiCode != 0 ); + READ_FLAG( uiCode, "sps_affine_amvr_enabled_flag" ); pcSPS->setAffineAmvrEnabledFlag ( uiCode != 0 ); + READ_FLAG( uiCode, "sps_affine_prof_enabled_flag" ); pcSPS->setUsePROF ( uiCode != 0 ); + if (pcSPS->getUsePROF()) + { + READ_FLAG(uiCode, "sps_prof_pic_present_flag"); pcSPS->setProfControlPresentFlag ( uiCode != 0 ); + } + else { + pcSPS->setProfControlPresentFlag( false ); + } + } + if (pcSPS->getChromaFormatIdc() == CHROMA_444) + { + READ_FLAG(uiCode, "sps_act_enabled_flag"); pcSPS->setUseColorTrans(uiCode != 0); + } + else + { + pcSPS->setUseColorTrans(false); } - READ_FLAG( uiCode, "gbi_flag" ); pcSPS->setUseGBi ( uiCode != 0 ); - READ_FLAG(uiCode, "ibc_flag"); pcSPS->setIBCFlag(uiCode); + if (pcSPS->getChromaFormatIdc() == CHROMA_444) + { + READ_FLAG( uiCode, "sps_palette_enabled_flag"); pcSPS->setPLTMode ( uiCode != 0 ); + } + else + { + pcSPS->setPLTMode(false); + } + READ_FLAG( uiCode, "sps_bcw_enabled_flag" ); pcSPS->setUseBcw( uiCode != 0 ); + READ_FLAG(uiCode, "sps_ibc_enabled_flag"); pcSPS->setIBCFlag(uiCode); // KJS: sps_ciip_enabled_flag - READ_FLAG( uiCode, "mhintra_flag" ); pcSPS->setUseMHIntra ( uiCode != 0 ); - - READ_FLAG( uiCode, "triangle_flag" ); pcSPS->setUseTriangle ( uiCode != 0 ); + READ_FLAG( uiCode, "sps_ciip_enabled_flag" ); pcSPS->setUseCiip ( uiCode != 0 ); - // KJS: not in draft yet - READ_FLAG( uiCode, "sps_fracmmvd_disabled_flag" ); pcSPS->setDisFracMmvdEnabledFlag ( uiCode != 0 ); - // KJS: not in draft yet - READ_FLAG(uiCode, "sbt_enable_flag"); pcSPS->setUseSBT(uiCode != 0); - if( pcSPS->getUseSBT() ) + if ( pcSPS->getUseMMVD() ) { - READ_FLAG(uiCode, "max_sbt_size_64_flag"); pcSPS->setMaxSbtSize(uiCode != 0 ? 64 : 32); + READ_FLAG( uiCode, "sps_fpel_mmvd_enabled_flag" ); pcSPS->setFpelMmvdEnabledFlag ( uiCode != 0 ); } - // KJS: not in draft yet - READ_FLAG(uiCode, "sps_reshaper_enable_flag"); pcSPS->setUseReshaper(uiCode == 1); + + READ_FLAG( uiCode, "triangle_flag" ); pcSPS->setUseTriangle ( uiCode != 0 ); + + READ_FLAG(uiCode, "sps_lmcs_enable_flag"); pcSPS->setUseLmcs(uiCode == 1); + READ_FLAG( uiCode, "sps_lfnst_enabled_flag" ); pcSPS->setUseLFNST( uiCode != 0 ); #if LUMA_ADAPTIVE_DEBLOCKING_FILTER_QP_OFFSET READ_FLAG( uiCode, "sps_ladf_enabled_flag" ); pcSPS->setLadfEnabled( uiCode != 0 ); @@ -1144,51 +1455,50 @@ void HLSyntaxReader::parseSPS(SPS* pcSPS) #endif // KJS: reference picture sets to be replaced - READ_UVLC( uiCode, "num_short_term_ref_pic_sets" ); - CHECK(uiCode > 64, "Invalid code"); - pcSPS->createRPSList(uiCode); - RPSList* rpsList = pcSPS->getRPSList(); - ReferencePictureSet* rps; + // KJS: not found in draft -> does not exist - for(uint32_t i=0; i< rpsList->getNumberOfReferencePictureSets(); i++) - { - rps = rpsList->getReferencePictureSet(i); - parseShortTermRefPicSet(pcSPS,rps,i); - } - READ_FLAG( uiCode, "long_term_ref_pics_present_flag" ); pcSPS->setLongTermRefsPresent(uiCode); - if (pcSPS->getLongTermRefsPresent()) + // KJS: remove scaling lists? + READ_FLAG( uiCode, "scaling_list_enabled_flag" ); pcSPS->setScalingListFlag ( uiCode ); + + READ_FLAG( uiCode, "sps_loop_filter_across_virtual_boundaries_disabled_present_flag" ); pcSPS->setLoopFilterAcrossVirtualBoundariesDisabledFlag( uiCode != 0 ); + if( pcSPS->getLoopFilterAcrossVirtualBoundariesDisabledFlag() ) { - READ_UVLC( uiCode, "num_long_term_ref_pics_sps" ); - pcSPS->setNumLongTermRefPicSPS(uiCode); - for (uint32_t k = 0; k < pcSPS->getNumLongTermRefPicSPS(); k++) + READ_CODE( 2, uiCode, "sps_num_ver_virtual_boundaries"); pcSPS->setNumVerVirtualBoundaries( uiCode ); + for( unsigned i = 0; i < pcSPS->getNumVerVirtualBoundaries(); i++ ) { - READ_CODE( pcSPS->getBitsForPOC(), uiCode, "lt_ref_pic_poc_lsb_sps" ); - pcSPS->setLtRefPicPocLsbSps(k, uiCode); - READ_FLAG( uiCode, "used_by_curr_pic_lt_sps_flag[i]"); - pcSPS->setUsedByCurrPicLtSPSFlag(k, uiCode?1:0); + READ_CODE(13, uiCode, "sps_virtual_boundaries_pos_x"); pcSPS->setVirtualBoundariesPosX(uiCode << 3, i); + } + READ_CODE( 2, uiCode, "sps_num_hor_virtual_boundaries"); pcSPS->setNumHorVirtualBoundaries( uiCode ); + for( unsigned i = 0; i < pcSPS->getNumHorVirtualBoundaries(); i++ ) + { + READ_CODE(13, uiCode, "sps_virtual_boundaries_pos_y"); pcSPS->setVirtualBoundariesPosY(uiCode << 3, i); } } + else + { + pcSPS->setNumVerVirtualBoundaries( 0 ); + pcSPS->setNumHorVirtualBoundaries( 0 ); + } - // KJS: not found in draft -> does not exist -#if HEVC_USE_INTRA_SMOOTHING_T32 || HEVC_USE_INTRA_SMOOTHING_T64 - READ_FLAG( uiCode, "strong_intra_smoothing_enable_flag" ); pcSPS->setUseStrongIntraSmoothing(uiCode); -#endif - - // KJS: remove scaling lists? -#if HEVC_USE_SCALING_LISTS - READ_FLAG( uiCode, "scaling_list_enabled_flag" ); pcSPS->setScalingListFlag ( uiCode ); - if(pcSPS->getScalingListFlag()) + TimingInfo *timingInfo = pcSPS->getTimingInfo(); + READ_FLAG( uiCode, "general_hrd_parameters_present_flag"); pcSPS->setHrdParametersPresentFlag(uiCode); + if( pcSPS->getHrdParametersPresentFlag() ) { - READ_FLAG( uiCode, "sps_scaling_list_data_present_flag" ); pcSPS->setScalingListPresentFlag ( uiCode ); - if(pcSPS->getScalingListPresentFlag ()) + READ_CODE( 32, uiCode, "num_units_in_tick"); timingInfo->setNumUnitsInTick (uiCode); + READ_CODE( 32, uiCode, "time_scale"); timingInfo->setTimeScale (uiCode); + + READ_FLAG( uiCode, "sub_layer_cpb_parameters_present_flag"); pcSPS->setSubLayerParametersPresentFlag(uiCode); + if (pcSPS->getSubLayerParametersPresentFlag()) + { + parseHrdParameters(pcSPS->getHrdParameters(), 0, pcSPS->getMaxTLayers() - 1); + } + else { - parseScalingList( &(pcSPS->getScalingList()) ); + parseHrdParameters(pcSPS->getHrdParameters(), pcSPS->getMaxTLayers() - 1, pcSPS->getMaxTLayers() - 1); } } -#endif - // KJS: no VUI defined yet READ_FLAG( uiCode, "vui_parameters_present_flag" ); pcSPS->setVuiParametersPresentFlag(uiCode); if (pcSPS->getVuiParametersPresentFlag()) @@ -1258,517 +1568,1207 @@ void HLSyntaxReader::parseSPS(SPS* pcSPS) xReadRbspTrailingBits(); } -#if HEVC_VPS -void HLSyntaxReader::parseVPS(VPS* pcVPS) +void HLSyntaxReader::parseDPS(DPS* dps) { #if ENABLE_TRACING - xTraceVPSHeader (); + xTraceDPSHeader (); #endif - uint32_t uiCode; + uint32_t symbol; - READ_CODE( 4, uiCode, "vps_video_parameter_set_id" ); pcVPS->setVPSId( uiCode ); - READ_FLAG( uiCode, "vps_base_layer_internal_flag" ); CHECK(uiCode != 1, "Invalid code"); - READ_FLAG( uiCode, "vps_base_layer_available_flag" ); CHECK(uiCode != 1, "Invalid code"); - READ_CODE( 6, uiCode, "vps_max_layers_minus1" ); - READ_CODE( 3, uiCode, "vps_max_sub_layers_minus1" ); pcVPS->setMaxTLayers( uiCode + 1 ); CHECK(uiCode+1 > MAX_TLAYER, "Invalid code"); - READ_FLAG( uiCode, "vps_temporal_id_nesting_flag" ); pcVPS->setTemporalNestingFlag( uiCode ? true:false ); - CHECK (pcVPS->getMaxTLayers()<=1&&!pcVPS->getTemporalNestingFlag(), "Invalid VPS state"); - READ_CODE( 16, uiCode, "vps_reserved_0xffff_16bits" ); CHECK(uiCode != 0xffff, "Invalid value for reserved bits"); - parsePTL ( pcVPS->getPTL(), true, pcVPS->getMaxTLayers()-1); - uint32_t subLayerOrderingInfoPresentFlag; - READ_FLAG(subLayerOrderingInfoPresentFlag, "vps_sub_layer_ordering_info_present_flag"); - for(uint32_t i = 0; i <= pcVPS->getMaxTLayers()-1; i++) + READ_CODE( 4, symbol, "dps_decoding_parameter_set_id" ); + CHECK(symbol == 0, "dps_decoding_parameter_set_id equal to zero is reserved and should not be use in a bitstream"); + dps->setDecodingParameterSetId( symbol ); + + READ_CODE( 3, symbol, "dps_max_sub_layers_minus1" ); dps->setMaxSubLayersMinus1( symbol ); + READ_CODE( 5, symbol, "dps_reserved_zero_5bits" ); CHECK(symbol != 0, "dps_reserved_zero_5bits must be equal to zero"); + + uint32_t numPTLs; + READ_CODE( 4, numPTLs, "dps_num_ptls_minus1" ); + numPTLs += 1; + + std::vector<ProfileTierLevel> ptls; + ptls.resize(numPTLs); + for (int i=0; i<numPTLs; i++) { - READ_UVLC( uiCode, "vps_max_dec_pic_buffering_minus1[i]" ); pcVPS->setMaxDecPicBuffering( uiCode + 1, i ); - READ_UVLC( uiCode, "vps_max_num_reorder_pics[i]" ); pcVPS->setNumReorderPics( uiCode, i ); - READ_UVLC( uiCode, "vps_max_latency_increase_plus1[i]" ); pcVPS->setMaxLatencyIncrease( uiCode, i ); + parseProfileTierLevel(&ptls[i], dps->getMaxSubLayersMinus1()); + } + dps->setProfileTierLevel(ptls); - if (!subLayerOrderingInfoPresentFlag) + READ_FLAG( symbol, "dps_extension_flag" ); + if (symbol) + { + while ( xMoreRbspData() ) { - for (i++; i <= pcVPS->getMaxTLayers()-1; i++) - { - pcVPS->setMaxDecPicBuffering(pcVPS->getMaxDecPicBuffering(0), i); - pcVPS->setNumReorderPics(pcVPS->getNumReorderPics(0), i); - pcVPS->setMaxLatencyIncrease(pcVPS->getMaxLatencyIncrease(0), i); - } - break; + READ_FLAG( symbol, "dps_extension_data_flag"); } } + xReadRbspTrailingBits(); +} + +void HLSyntaxReader::parseVPS(VPS* pcVPS) +{ +#if ENABLE_TRACING + xTraceVPSHeader(); +#endif + uint32_t uiCode; - CHECK( pcVPS->getNumHrdParameters() >= MAX_VPS_OP_SETS_PLUS1, "Too many HDR parameters" ); - CHECK( pcVPS->getMaxNuhReservedZeroLayerId() >= MAX_VPS_NUH_RESERVED_ZERO_LAYER_ID_PLUS1, "Reserved zero layer id too big" ); - READ_CODE( 6, uiCode, "vps_max_layer_id" ); pcVPS->setMaxNuhReservedZeroLayerId( uiCode ); - READ_UVLC( uiCode, "vps_num_layer_sets_minus1" ); pcVPS->setMaxOpSets( uiCode + 1 ); - for( uint32_t opsIdx = 1; opsIdx <= ( pcVPS->getMaxOpSets() - 1 ); opsIdx ++ ) + READ_CODE(4, uiCode, "vps_video_parameter_set_id"); + CHECK( uiCode == 0, "vps_video_parameter_set_id equal to zero is reserved and shall not be used in a bitstream" ); + pcVPS->setVPSId(uiCode); + + READ_CODE(6, uiCode, "vps_max_layers_minus1"); pcVPS->setMaxLayers(uiCode + 1); CHECK(uiCode + 1 > MAX_VPS_LAYERS, "Invalid code"); + if (pcVPS->getMaxLayers() - 1 == 0) + { + pcVPS->setEachLayerIsAnOlsFlag(1); + } + READ_CODE(3, uiCode, "vps_max_sublayers_minus1"); pcVPS->setMaxSubLayers(uiCode + 1); CHECK(uiCode + 1 > MAX_VPS_SUBLAYERS, "Invalid code"); + if( pcVPS->getMaxLayers() > 1 && pcVPS->getMaxSubLayers() > 1) + { + READ_FLAG(uiCode, "vps_all_layers_same_num_sublayers_flag"); pcVPS->setAllLayersSameNumSublayersFlag(uiCode); + } + else + { + pcVPS->setAllLayersSameNumSublayersFlag(1); + } + if( pcVPS->getMaxLayers() > 1 ) { - // Operation point set - for( uint32_t i = 0; i <= pcVPS->getMaxNuhReservedZeroLayerId(); i ++ ) + READ_FLAG(uiCode, "vps_all_independent_layers_flag"); pcVPS->setAllIndependentLayersFlag(uiCode); + if (pcVPS->getAllIndependentLayersFlag() == 0) { - READ_FLAG( uiCode, "layer_id_included_flag[opsIdx][i]" ); pcVPS->setLayerIdIncludedFlag( uiCode == 1 ? true : false, opsIdx, i ); + pcVPS->setEachLayerIsAnOlsFlag(0); } } - - TimingInfo *timingInfo = pcVPS->getTimingInfo(); - READ_FLAG( uiCode, "vps_timing_info_present_flag"); timingInfo->setTimingInfoPresentFlag (uiCode ? true : false); - if(timingInfo->getTimingInfoPresentFlag()) + for (uint32_t i = 0; i < pcVPS->getMaxLayers(); i++) { - READ_CODE( 32, uiCode, "vps_num_units_in_tick"); timingInfo->setNumUnitsInTick (uiCode); - READ_CODE( 32, uiCode, "vps_time_scale"); timingInfo->setTimeScale (uiCode); - READ_FLAG( uiCode, "vps_poc_proportional_to_timing_flag"); timingInfo->setPocProportionalToTimingFlag(uiCode ? true : false); - if(timingInfo->getPocProportionalToTimingFlag()) + READ_CODE(6, uiCode, "vps_layer_id"); pcVPS->setLayerId(i, uiCode); + pcVPS->setGeneralLayerIdx(uiCode, i); + + if (i > 0 && !pcVPS->getAllIndependentLayersFlag()) { - READ_UVLC( uiCode, "vps_num_ticks_poc_diff_one_minus1"); timingInfo->setNumTicksPocDiffOneMinus1 (uiCode); + READ_FLAG(uiCode, "vps_independent_layer_flag"); pcVPS->setIndependentLayerFlag(i, uiCode); + if (!pcVPS->getIndependentLayerFlag(i)) + { + uint16_t sumUiCode = 0; + for (int j = 0, k = 0; j < i; j++) + { + READ_FLAG(uiCode, "vps_direct_dependency_flag"); pcVPS->setDirectRefLayerFlag(i, j, uiCode); + if( uiCode ) + { + pcVPS->setInterLayerRefIdc( i, j, k ); + pcVPS->setDirectRefLayerIdx( i, k++, j ); + sumUiCode++; + } + } + CHECK(sumUiCode == 0, "There has to be at least one value of j such that the value of vps_direct_dependency_flag[ i ][ j ] is equal to 1,when vps_independent_layer_flag[ i ] is equal to 0 "); + } } + } - READ_UVLC( uiCode, "vps_num_hrd_parameters" ); pcVPS->setNumHrdParameters( uiCode ); - - if( pcVPS->getNumHrdParameters() > 0 ) + if (pcVPS->getMaxLayers() > 1) + { + if (pcVPS->getAllIndependentLayersFlag()) { - pcVPS->createHrdParamBuffer(); + READ_FLAG(uiCode, "vps_each_layer_is_an_ols_flag"); pcVPS->setEachLayerIsAnOlsFlag(uiCode); + if (pcVPS->getEachLayerIsAnOlsFlag() == 0) + { + pcVPS->setOlsModeIdc(2); + } } - for( uint32_t i = 0; i < pcVPS->getNumHrdParameters(); i ++ ) + if (!pcVPS->getEachLayerIsAnOlsFlag()) { - READ_UVLC( uiCode, "hrd_layer_set_idx[i]" ); pcVPS->setHrdOpSetIdx( uiCode, i ); - if( i > 0 ) + if (!pcVPS->getAllIndependentLayersFlag()) { - READ_FLAG( uiCode, "cprms_present_flag[i]" ); pcVPS->setCprmsPresentFlag( uiCode == 1 ? true : false, i ); + READ_CODE(2, uiCode, "vps_ols_mode_idc"); pcVPS->setOlsModeIdc(uiCode); CHECK(uiCode > MAX_VPS_OLS_MODE_IDC, "Invalid code"); } - else + if (pcVPS->getOlsModeIdc() == 2) { - pcVPS->setCprmsPresentFlag( true, i ); + READ_CODE(8, uiCode, "num_output_layer_sets_minus1"); pcVPS->setNumOutputLayerSets(uiCode + 1); + for (uint32_t i = 1; i <= pcVPS->getNumOutputLayerSets() - 1; i++) + { + for (uint32_t j = 0; j < pcVPS->getMaxLayers(); j++) + { + READ_FLAG(uiCode, "vps_ols_output_layer_flag"); pcVPS->setOlsOutputLayerFlag(i, j, uiCode); + } + } } - - parseHrdParameters(pcVPS->getHrdParameters(i), pcVPS->getCprmsPresentFlag( i ), pcVPS->getMaxTLayers() - 1); } } - - READ_FLAG( uiCode, "vps_extension_flag" ); + READ_FLAG(uiCode, "vps_extension_flag"); if (uiCode) { - while ( xMoreRbspData() ) + while (xMoreRbspData()) { - READ_FLAG( uiCode, "vps_extension_data_flag"); + READ_FLAG(uiCode, "vps_extension_data_flag"); } } xReadRbspTrailingBits(); } -#endif -void HLSyntaxReader::parseSliceHeader (Slice* pcSlice, ParameterSetManager *parameterSetManager, const int prevTid0POC) +void HLSyntaxReader::parsePictureHeader( PicHeader* picHeader, ParameterSetManager *parameterSetManager ) { - uint32_t uiCode; - int iCode; - + uint32_t uiCode; + int iCode; + PPS* pps = NULL; + SPS* sps = NULL; + #if ENABLE_TRACING - xTraceSliceHeader(); + xTracePictureHeader(); #endif - PPS* pps = NULL; - SPS* sps = NULL; - uint32_t firstSliceSegmentInPic; - READ_FLAG( firstSliceSegmentInPic, "first_slice_segment_in_pic_flag" ); - if( pcSlice->getRapPicFlag()) + READ_FLAG(uiCode, "non_reference_picture_flag"); picHeader->setNonReferencePictureFlag( uiCode != 0 ); + READ_FLAG(uiCode, "gdr_pic_flag"); picHeader->setGdrPicFlag( uiCode != 0 ); + READ_FLAG(uiCode, "no_output_of_prior_pics_flag"); picHeader->setNoOutputOfPriorPicsFlag( uiCode != 0 ); + if( picHeader->getGdrPicFlag() ) { - READ_FLAG( uiCode, "no_output_of_prior_pics_flag" ); //ignored -- updated already - pcSlice->setNoOutputPriorPicsFlag(uiCode ? true : false); + READ_UVLC(uiCode, "recovery_poc_cnt"); picHeader->setRecoveryPocCnt( uiCode ); } - READ_UVLC ( uiCode, "slice_pic_parameter_set_id" ); pcSlice->setPPSId(uiCode); - pps = parameterSetManager->getPPS(uiCode); - //!KS: need to add error handling code here, if PPS is not available - CHECK(pps==0, "Invalid PPS"); - sps = parameterSetManager->getSPS(pps->getSPSId()); - //!KS: need to add error handling code here, if SPS is not available - CHECK(sps==0, "Invalid SPS"); - - const ChromaFormat chFmt = sps->getChromaFormatIdc(); - const uint32_t numValidComp=getNumberValidComponents(chFmt); - const bool bChroma=(chFmt!=CHROMA_400); - -#if HEVC_DEPENDENT_SLICES - if( pps->getDependentSliceSegmentsEnabledFlag() && ( !firstSliceSegmentInPic )) + else { - READ_FLAG( uiCode, "dependent_slice_segment_flag" ); pcSlice->setDependentSliceSegmentFlag(uiCode ? true : false); + picHeader->setRecoveryPocCnt( 0 ); } - else + + // parameter sets + READ_UVLC(uiCode, "ph_pic_parameter_set_id"); + picHeader->setPPSId( uiCode ); + pps = parameterSetManager->getPPS(picHeader->getPPSId()); + CHECK(pps==0, "Invalid PPS"); + picHeader->setSPSId( pps->getSPSId() ); + sps = parameterSetManager->getSPS(picHeader->getSPSId()); + CHECK(sps==0, "Invalid SPS"); + + // initialize tile/slice info for no partitioning case + if( pps->getNoPicPartitionFlag() ) { - pcSlice->setDependentSliceSegmentFlag(false); + pps->resetTileSliceInfo(); + pps->setLog2CtuSize( ceilLog2(sps->getCTUSize()) ); + pps->setNumExpTileColumns(1); + pps->setNumExpTileRows(1); + pps->addTileColumnWidth( pps->getPicWidthInCtu( ) ); + pps->addTileRowHeight( pps->getPicHeightInCtu( ) ); + pps->initTiles(); + pps->setRectSliceFlag( 1 ); + pps->setNumSlicesInPic( 1 ); + pps->initRectSlices( ); + pps->setTileIdxDeltaPresentFlag( 0 ); + pps->setSliceTileIdx( 0, 0 ); + pps->initRectSliceMap( ); } -#endif - int numCTUs = ((sps->getPicWidthInLumaSamples()+sps->getMaxCUWidth()-1)/sps->getMaxCUWidth())*((sps->getPicHeightInLumaSamples()+sps->getMaxCUHeight()-1)/sps->getMaxCUHeight()); - uint32_t sliceSegmentAddress = 0; - int bitsSliceSegmentAddress = 0; - while(numCTUs>(1<<bitsSliceSegmentAddress)) + else { - bitsSliceSegmentAddress++; + CHECK(pps->getCtuSize() != sps->getCTUSize(), "PPS CTU size does not match CTU size in SPS"); } - if(!firstSliceSegmentInPic) + // sub-picture IDs + if( sps->getSubPicIdPresentFlag() ) { - READ_CODE( bitsSliceSegmentAddress, sliceSegmentAddress, "slice_segment_address" ); + if( sps->getSubPicIdSignallingPresentFlag() ) + { + for( int picIdx = 0; picIdx < sps->getNumSubPics( ); picIdx++ ) + { + picHeader->setSubPicId( picIdx, sps->getSubPicId( picIdx ) ); + } + } + else + { + READ_FLAG(uiCode, "ph_subpic_id_signalling_present_flag"); picHeader->setSubPicIdSignallingPresentFlag( uiCode != 0 ); + if( picHeader->getSubPicIdSignallingPresentFlag() ) + { + READ_UVLC( uiCode, "ph_subpic_id_len_minus1" ); picHeader->setSubPicIdLen( uiCode + 1 ); + CHECK( uiCode > 15, "Invalid ph_subpic_id_len_minus1 signalled"); + for( int picIdx = 0; picIdx < sps->getNumSubPics( ); picIdx++ ) + { + READ_CODE( picHeader->getSubPicIdLen( ), uiCode, "ph_subpic_id[i]" ); picHeader->setSubPicId( picIdx, uiCode ); + } + } + else + { + for( int picIdx = 0; picIdx < sps->getNumSubPics( ); picIdx++ ) + { + picHeader->setSubPicId( picIdx, pps->getSubPicId( picIdx ) ); + } + } + } } - //set uiCode to equal slice start address (or dependent slice start address) -#if HEVC_DEPENDENT_SLICES - pcSlice->setSliceSegmentCurStartCtuTsAddr( sliceSegmentAddress );// this is actually a Raster-Scan (RS) address, but we do not have the RS->TS conversion table defined yet. - pcSlice->setSliceSegmentCurEndCtuTsAddr(numCTUs); // Set end as the last CTU of the picture. - - if (!pcSlice->getDependentSliceSegmentFlag()) + else { -#endif - pcSlice->setSliceCurStartCtuTsAddr(sliceSegmentAddress); // this is actually a Raster-Scan (RS) address, but we do not have the RS->TS conversion table defined yet. - pcSlice->setSliceCurEndCtuTsAddr(numCTUs); -#if HEVC_DEPENDENT_SLICES + for( int picIdx = 0; picIdx < sps->getNumSubPics( ); picIdx++ ) + { + picHeader->setSubPicId( picIdx, picIdx ); + } } - if(!pcSlice->getDependentSliceSegmentFlag()) + // virtual boundaries + if( !sps->getLoopFilterAcrossVirtualBoundariesDisabledFlag() ) { -#endif - for (int i = 0; i < pps->getNumExtraSliceHeaderBits(); i++) + READ_FLAG( uiCode, "ph_loop_filter_across_virtual_boundaries_disabled_present_flag" ); picHeader->setLoopFilterAcrossVirtualBoundariesDisabledFlag( uiCode != 0 ); + if( picHeader->getLoopFilterAcrossVirtualBoundariesDisabledFlag() ) { - READ_FLAG(uiCode, "slice_reserved_flag[]"); // ignored + READ_CODE( 2, uiCode, "ph_num_ver_virtual_boundaries"); picHeader->setNumVerVirtualBoundaries( uiCode ); + for( unsigned i = 0; i < picHeader->getNumVerVirtualBoundaries(); i++ ) + { + READ_CODE(13, uiCode, "ph_virtual_boundaries_pos_x"); picHeader->setVirtualBoundariesPosX(uiCode << 3, i); + } + READ_CODE( 2, uiCode, "ph_num_hor_virtual_boundaries"); picHeader->setNumHorVirtualBoundaries( uiCode ); + for( unsigned i = 0; i < picHeader->getNumHorVirtualBoundaries(); i++ ) + { + READ_CODE(13, uiCode, "ph_virtual_boundaries_pos_y"); picHeader->setVirtualBoundariesPosY(uiCode << 3, i); + } } - - READ_UVLC ( uiCode, "slice_type" ); pcSlice->setSliceType((SliceType)uiCode); - if( pps->getOutputFlagPresentFlag() ) + else { - READ_FLAG( uiCode, "pic_output_flag" ); pcSlice->setPicOutputFlag( uiCode ? true : false ); + picHeader->setNumVerVirtualBoundaries( 0 ); + picHeader->setNumHorVirtualBoundaries( 0 ); } - else + } + else + { + picHeader->setLoopFilterAcrossVirtualBoundariesDisabledFlag( sps->getLoopFilterAcrossVirtualBoundariesDisabledFlag() ); + picHeader->setNumVerVirtualBoundaries( sps->getNumVerVirtualBoundaries() ); + picHeader->setNumHorVirtualBoundaries( sps->getNumHorVirtualBoundaries() ); + for( unsigned i = 0; i < 3; i++ ) { - pcSlice->setPicOutputFlag( true ); + picHeader->setVirtualBoundariesPosX( sps->getVirtualBoundariesPosX(i), i ); + picHeader->setVirtualBoundariesPosY( sps->getVirtualBoundariesPosY(i), i ); } + } + + // 4:4:4 colour plane ID + if( sps->getSeparateColourPlaneFlag() ) + { + READ_CODE( 2, uiCode, "colour_plane_id" ); picHeader->setColourPlaneId( uiCode ); + CHECK(uiCode > 2, "colour_plane_id exceeds valid range"); + } + else + { + picHeader->setColourPlaneId( 0 ); + } - // if (separate_colour_plane_flag == 1) - // read colour_plane_id - // (separate_colour_plane_flag == 1) is not supported in this version of the standard. + // picture output flag + if( pps->getOutputFlagPresentFlag() ) + { + READ_FLAG( uiCode, "pic_output_flag" ); picHeader->setPicOutputFlag( uiCode != 0 ); + } + else + { + picHeader->setPicOutputFlag( true ); + } - if( pcSlice->getIdrPicFlag() ) - { - READ_CODE(sps->getBitsForPOC(), uiCode, "slice_pic_order_cnt_lsb"); - pcSlice->setPOC(uiCode); - ReferencePictureSet* rps = pcSlice->getLocalRPS(); - (*rps)=ReferencePictureSet(); - pcSlice->setRPS(rps); - } - else - { - READ_CODE(sps->getBitsForPOC(), uiCode, "slice_pic_order_cnt_lsb"); - int iPOClsb = uiCode; - int iPrevPOC = prevTid0POC; - int iMaxPOClsb = 1<< sps->getBitsForPOC(); - int iPrevPOClsb = iPrevPOC & (iMaxPOClsb - 1); - int iPrevPOCmsb = iPrevPOC-iPrevPOClsb; - int iPOCmsb; - if( ( iPOClsb < iPrevPOClsb ) && ( ( iPrevPOClsb - iPOClsb ) >= ( iMaxPOClsb / 2 ) ) ) + // reference picture lists + READ_FLAG( uiCode, "pic_rpl_present_flag" ); picHeader->setPicRplPresentFlag( uiCode != 0 ); + if( picHeader->getPicRplPresentFlag() ) + { + // List0 and List1 + for(int listIdx = 0; listIdx < 2; listIdx++) + { + // copy L1 index from L0 index + if (listIdx == 1 && !pps->getRpl1IdxPresentFlag()) { - iPOCmsb = iPrevPOCmsb + iMaxPOClsb; + picHeader->setRPL1idx(picHeader->getRPL0idx()); + uiCode = (picHeader->getRPL0idx() != -1); } - else if( (iPOClsb > iPrevPOClsb ) && ( (iPOClsb - iPrevPOClsb ) > ( iMaxPOClsb / 2 ) ) ) + // RPL in picture header or SPS + else if (sps->getNumRPL( listIdx ) == 0) { - iPOCmsb = iPrevPOCmsb - iMaxPOClsb; + uiCode = 0; } - else + else if (!pps->getPPSRefPicListSPSIdc( listIdx )) { - iPOCmsb = iPrevPOCmsb; + READ_FLAG(uiCode, "pic_rpl_sps_flag[i]"); } -#if !JVET_M0101_HLS - if ( pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_BLA_W_LP - || pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_BLA_W_RADL - || pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_BLA_N_LP ) + else { - // For BLA picture types, POCmsb is set to 0. - iPOCmsb = 0; + uiCode = pps->getPPSRefPicListSPSIdc( listIdx ) - 1; } -#endif - pcSlice->setPOC (iPOCmsb+iPOClsb); - - ReferencePictureSet* rps; - rps = pcSlice->getLocalRPS(); - (*rps)=ReferencePictureSet(); - pcSlice->setRPS(rps); - READ_FLAG( uiCode, "short_term_ref_pic_set_sps_flag" ); - if(uiCode == 0) // use short-term reference picture set explicitly signalled in slice header + // explicit RPL in picture header + if (!uiCode) { - parseShortTermRefPicSet(sps,rps, sps->getRPSList()->getNumberOfReferencePictureSets()); + ReferencePictureList* rpl = picHeader->getLocalRPL( listIdx ); + (*rpl) = ReferencePictureList(); + parseRefPicList(sps, rpl); + picHeader->setRPLIdx(listIdx, -1); + picHeader->setRPL(listIdx, rpl); } - else // use reference to short-term reference picture set in PPS - { - int numBits = 0; - while ((1 << numBits) < sps->getRPSList()->getNumberOfReferencePictureSets()) + // use list from SPS + else + { + if (listIdx == 1 && !pps->getRpl1IdxPresentFlag()) { - numBits++; + picHeader->setRPL( listIdx, sps->getRPLList( listIdx )->getReferencePictureList(picHeader->getRPLIdx( listIdx ))); } - if (numBits > 0) + else if (sps->getNumRPL( listIdx ) > 1) { - READ_CODE( numBits, uiCode, "short_term_ref_pic_set_idx"); + int numBits = ceilLog2(sps->getNumRPL( listIdx )); + READ_CODE(numBits, uiCode, "pic_rpl_idx[i]"); + picHeader->setRPLIdx( listIdx, uiCode ); + picHeader->setRPL( listIdx, sps->getRPLList( listIdx )->getReferencePictureList(uiCode)); } else { - uiCode = 0; - + picHeader->setRPLIdx( listIdx, 0 ); + picHeader->setRPL( listIdx, sps->getRPLList( listIdx )->getReferencePictureList(0)); } - *rps = *(sps->getRPSList()->getReferencePictureSet(uiCode)); } - if(sps->getLongTermRefsPresent()) + + // POC MSB cycle signalling for LTRP + for (int i = 0; i < picHeader->getRPL( listIdx )->getNumberOfLongtermPictures() + picHeader->getRPL( listIdx )->getNumberOfShorttermPictures(); i++) + { + picHeader->getLocalRPL( listIdx )->setDeltaPocMSBPresentFlag(i, false); + picHeader->getLocalRPL( listIdx )->setDeltaPocMSBCycleLT(i, 0); + } + if (picHeader->getRPL( listIdx )->getNumberOfLongtermPictures()) { - int offset = rps->getNumberOfNegativePictures()+rps->getNumberOfPositivePictures(); - uint32_t numOfLtrp = 0; - uint32_t numLtrpInSPS = 0; - if (sps->getNumLongTermRefPicSPS() > 0) + for (int i = 0; i < picHeader->getRPL( listIdx )->getNumberOfLongtermPictures() + picHeader->getRPL( listIdx )->getNumberOfShorttermPictures(); i++) { - READ_UVLC( uiCode, "num_long_term_sps"); - numLtrpInSPS = uiCode; - numOfLtrp += numLtrpInSPS; - rps->setNumberOfLongtermPictures(numOfLtrp); - } - int bitsForLtrpInSPS = 0; - while (sps->getNumLongTermRefPicSPS() > (1 << bitsForLtrpInSPS)) - { - bitsForLtrpInSPS++; - } - READ_UVLC( uiCode, "num_long_term_pics"); rps->setNumberOfLongtermPictures(uiCode); - numOfLtrp += uiCode; - rps->setNumberOfLongtermPictures(numOfLtrp); - int maxPicOrderCntLSB = 1 << sps->getBitsForPOC(); - int prevDeltaMSB = 0, deltaPocMSBCycleLT = 0; - for(int j=offset+rps->getNumberOfLongtermPictures()-1, k = 0; k < numOfLtrp; j--, k++) - { - int pocLsbLt; - if (k < numLtrpInSPS) + if (picHeader->getRPL( listIdx )->isRefPicLongterm(i)) { - uiCode = 0; - if (bitsForLtrpInSPS > 0) + if (picHeader->getRPL( listIdx )->getLtrpInSliceHeaderFlag()) { - READ_CODE(bitsForLtrpInSPS, uiCode, "lt_idx_sps[i]"); + READ_CODE(sps->getBitsForPOC(), uiCode, "pic_poc_lsb_lt[i][j]"); + picHeader->getLocalRPL( listIdx )->setRefPicIdentifier( i, uiCode, true, false, 0 ); } - bool usedByCurrFromSPS=sps->getUsedByCurrPicLtSPSFlag(uiCode); - - pocLsbLt = sps->getLtRefPicPocLsbSps(uiCode); - rps->setUsed(j,usedByCurrFromSPS); - } - else - { - READ_CODE(sps->getBitsForPOC(), uiCode, "poc_lsb_lt"); pocLsbLt= uiCode; - READ_FLAG( uiCode, "used_by_curr_pic_lt_flag"); rps->setUsed(j,uiCode); - } - READ_FLAG(uiCode,"delta_poc_msb_present_flag"); - bool mSBPresentFlag = uiCode ? true : false; - if(mSBPresentFlag) - { - READ_UVLC( uiCode, "delta_poc_msb_cycle_lt[i]" ); - bool deltaFlag = false; - // First LTRP || First LTRP from SH - if( (j == offset+rps->getNumberOfLongtermPictures()-1) || (j == offset+(numOfLtrp-numLtrpInSPS)-1) ) + READ_FLAG(uiCode, "pic_delta_poc_msb_present_flag[i][j]"); + picHeader->getLocalRPL( listIdx )->setDeltaPocMSBPresentFlag(i, uiCode ? true : false); + if (uiCode) { - deltaFlag = true; + READ_UVLC(uiCode, "pic_delta_poc_msb_cycle_lt[i][j]"); + picHeader->getLocalRPL( listIdx )->setDeltaPocMSBCycleLT(i, uiCode); } - if(deltaFlag) - { - deltaPocMSBCycleLT = uiCode; - } - else - { - deltaPocMSBCycleLT = uiCode + prevDeltaMSB; - } - - int pocLTCurr = pcSlice->getPOC() - deltaPocMSBCycleLT * maxPicOrderCntLSB - - iPOClsb + pocLsbLt; - rps->setPOC (j, pocLTCurr); - rps->setDeltaPOC(j, - pcSlice->getPOC() + pocLTCurr); - rps->setCheckLTMSBPresent(j,true); } - else - { - rps->setPOC (j, pocLsbLt); - rps->setDeltaPOC(j, - pcSlice->getPOC() + pocLsbLt); - rps->setCheckLTMSBPresent(j,false); + } + } + } + } - // reset deltaPocMSBCycleLT for first LTRP from slice header if MSB not present - if( j == offset+(numOfLtrp-numLtrpInSPS)-1 ) - { - deltaPocMSBCycleLT = 0; - } - } - prevDeltaMSB = deltaPocMSBCycleLT; + // partitioning constraint overrides + if (sps->getSplitConsOverrideEnabledFlag()) + { + READ_FLAG(uiCode, "partition_constraints_override_flag"); picHeader->setSplitConsOverrideFlag( uiCode != 0 ); + if (picHeader->getSplitConsOverrideFlag()) + { + unsigned minQT[3] = { 0, 0, 0 }; + unsigned maxBTD[3] = { 0, 0, 0 }; + unsigned maxBTSize[3] = { 0, 0, 0 }; + unsigned maxTTSize[3] = { 0, 0, 0 }; + unsigned ctbLog2SizeY = floorLog2(sps->getCTUSize()); + + READ_UVLC(uiCode, "pic_log2_diff_min_qt_min_cb_intra_slice_luma"); + unsigned minQtLog2SizeIntraY = uiCode + sps->getLog2MinCodingBlockSize(); + minQT[0] = 1 << minQtLog2SizeIntraY; + READ_UVLC(uiCode, "pic_log2_diff_min_qt_min_cb_inter_slice"); + unsigned minQtLog2SizeInterY = uiCode + sps->getLog2MinCodingBlockSize(); + minQT[1] = 1 << minQtLog2SizeInterY; + READ_UVLC(uiCode, "pic_max_mtt_hierarchy_depth_inter_slice"); maxBTD[1] = uiCode; + READ_UVLC(uiCode, "pic_max_mtt_hierarchy_depth_intra_slice_luma"); maxBTD[0] = uiCode; + + maxTTSize[0] = maxBTSize[0] = minQT[0]; + if (maxBTD[0] != 0) + { + READ_UVLC(uiCode, "pic_log2_diff_max_bt_min_qt_intra_slice_luma"); maxBTSize[0] <<= uiCode; + CHECK(uiCode > ctbLog2SizeY - minQtLog2SizeIntraY, "Invalid code"); + READ_UVLC(uiCode, "pic_log2_diff_max_tt_min_qt_intra_slice_luma"); maxTTSize[0] <<= uiCode; + CHECK(uiCode > ctbLog2SizeY - minQtLog2SizeIntraY, "Invalid code"); + } + maxTTSize[1] = maxBTSize[1] = minQT[1]; + if (maxBTD[1] != 0) + { + READ_UVLC(uiCode, "pic_log2_diff_max_bt_min_qt_inter_slice"); maxBTSize[1] <<= uiCode; + CHECK(uiCode > ctbLog2SizeY - minQtLog2SizeInterY, "Invalid code"); + READ_UVLC(uiCode, "pic_log2_diff_max_tt_min_qt_inter_slice"); maxTTSize[1] <<= uiCode; + CHECK(uiCode > ctbLog2SizeY - minQtLog2SizeInterY, "Invalid code"); + } + if (sps->getUseDualITree()) + { + READ_UVLC(uiCode, "pic_log2_diff_min_qt_min_cb_intra_slice_chroma"); minQT[2] = 1 << (uiCode + sps->getLog2MinCodingBlockSize()); + READ_UVLC(uiCode, "pic_max_mtt_hierarchy_depth_intra_slice_chroma"); maxBTD[2] = uiCode; + maxTTSize[2] = maxBTSize[2] = minQT[2]; + if (maxBTD[2] != 0) + { + READ_UVLC(uiCode, "pic_log2_diff_max_bt_min_qt_intra_slice_chroma"); maxBTSize[2] <<= uiCode; + READ_UVLC(uiCode, "pic_log2_diff_max_tt_min_qt_intra_slice_chroma"); maxTTSize[2] <<= uiCode; } - offset += rps->getNumberOfLongtermPictures(); - rps->setNumberOfPictures(offset); } -#if !JVET_M0101_HLS - if ( pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_BLA_W_LP - || pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_BLA_W_RADL - || pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_BLA_N_LP ) + + picHeader->setMinQTSizes(minQT); + picHeader->setMaxMTTHierarchyDepths(maxBTD); + picHeader->setMaxBTSizes(maxBTSize); + picHeader->setMaxTTSizes(maxTTSize); + } + } + else + { + picHeader->setSplitConsOverrideFlag(0); + } + + // inherit constraint values from SPS + if (!sps->getSplitConsOverrideEnabledFlag() || !picHeader->getSplitConsOverrideFlag()) + { + picHeader->setMinQTSizes(sps->getMinQTSizes()); + picHeader->setMaxMTTHierarchyDepths(sps->getMaxMTTHierarchyDepths()); + picHeader->setMaxBTSizes(sps->getMaxBTSizes()); + picHeader->setMaxTTSizes(sps->getMaxTTSizes()); + } + + // delta quantization and chrom and chroma offset + if (pps->getUseDQP()) + { + READ_UVLC( uiCode, "pic_cu_qp_delta_subdiv_intra_slice" ); picHeader->setCuQpDeltaSubdivIntra( uiCode ); + READ_UVLC( uiCode, "pic_cu_qp_delta_subdiv_inter_slice" ); picHeader->setCuQpDeltaSubdivInter( uiCode ); + } + else + { + picHeader->setCuQpDeltaSubdivIntra( 0 ); + picHeader->setCuQpDeltaSubdivInter( 0 ); + } + if (pps->getCuChromaQpOffsetEnabledFlag()) + { + READ_UVLC( uiCode, "pic_cu_chroma_qp_offset_subdiv_intra_slice" ); picHeader->setCuChromaQpOffsetSubdivIntra( uiCode ); + READ_UVLC( uiCode, "pic_cu_chroma_qp_offset_subdiv_inter_slice" ); picHeader->setCuChromaQpOffsetSubdivInter( uiCode ); + } + else + { + picHeader->setCuChromaQpOffsetSubdivIntra( 0 ); + picHeader->setCuChromaQpOffsetSubdivInter( 0 ); + } + + // temporal motion vector prediction + if (sps->getSPSTemporalMVPEnabledFlag()) + { + READ_FLAG( uiCode, "pic_temporal_mvp_enabled_flag" ); + picHeader->setEnableTMVPFlag( uiCode != 0 ); + } + else + { + picHeader->setEnableTMVPFlag(false); + } + + // mvd L1 zero flag + if (!pps->getPPSMvdL1ZeroIdc()) + { + READ_FLAG(uiCode, "pic_mvd_l1_zero_flag"); + } + else + { + uiCode = pps->getPPSMvdL1ZeroIdc() - 1; + } + picHeader->setMvdL1ZeroFlag( uiCode != 0 ); + + // merge candidate list size + if (!pps->getPPSSixMinusMaxNumMergeCandPlus1()) + { + READ_UVLC(uiCode, "pic_six_minus_max_num_merge_cand"); + } + else + { + uiCode = pps->getPPSSixMinusMaxNumMergeCandPlus1() - 1; + } + CHECK(MRG_MAX_NUM_CANDS <= uiCode, "Incorrrect max number of merge candidates!"); + picHeader->setMaxNumMergeCand(MRG_MAX_NUM_CANDS - uiCode); + + // subblock merge candidate list size + if ( sps->getUseAffine() ) + { + READ_UVLC(uiCode, "pic_five_minus_max_num_subblock_merge_cand"); + CHECK(AFFINE_MRG_MAX_NUM_CANDS < uiCode, "Incorrrect max number of affine merge candidates!"); + picHeader->setMaxNumAffineMergeCand( AFFINE_MRG_MAX_NUM_CANDS - uiCode ); + } + else + { + picHeader->setMaxNumAffineMergeCand( sps->getSBTMVPEnabledFlag() && picHeader->getEnableTMVPFlag() ); + } + + // full-pel MMVD flag + if (sps->getFpelMmvdEnabledFlag()) + { + READ_FLAG( uiCode, "pic_fpel_mmvd_enabled_flag" ); + picHeader->setDisFracMMVD( uiCode != 0 ); + } + else + { + picHeader->setDisFracMMVD(false); + } + + // picture level BDOF disable flags + if (sps->getBdofControlPresentFlag()) + { + READ_FLAG(uiCode, "pic_disable_bdof_flag"); picHeader->setDisBdofFlag(uiCode != 0); + } + else + { + picHeader->setDisBdofFlag(0); + } + + // picture level DMVR disable flags + if (sps->getDmvrControlPresentFlag()) + { + READ_FLAG(uiCode, "pic_disable_dmvr_flag"); picHeader->setDisDmvrFlag(uiCode != 0); + } + else + { + picHeader->setDisDmvrFlag(0); + } + + // picture level PROF disable flags + if (sps->getProfControlPresentFlag()) + { + READ_FLAG(uiCode, "pic_disable_prof_flag"); picHeader->setDisProfFlag(uiCode != 0); + } + else + { + picHeader->setDisProfFlag(0); + } + + // triangle merge candidate list size + if (sps->getUseTriangle() && picHeader->getMaxNumMergeCand() >= 2) + { + if (!pps->getPPSMaxNumMergeCandMinusMaxNumTriangleCandPlus1()) + { + READ_UVLC(uiCode, "pic_max_num_merge_cand_minus_max_num_triangle_cand"); + } + else + { + uiCode = pps->getPPSMaxNumMergeCandMinusMaxNumTriangleCandPlus1() - 1; + } + CHECK(picHeader->getMaxNumMergeCand() < uiCode, "Incorrrect max number of triangle candidates!"); + picHeader->setMaxNumTriangleCand((uint32_t)(picHeader->getMaxNumMergeCand() - uiCode)); + } + else + { + picHeader->setMaxNumTriangleCand(0); + } + + // ibc merge candidate list size + if (sps->getIBCFlag()) + { + READ_UVLC(uiCode, "pic_six_minus_max_num_ibc_merge_cand"); + CHECK(IBC_MRG_MAX_NUM_CANDS <= uiCode, "Incorrrect max number of IBC merge candidates!"); + picHeader->setMaxNumIBCMergeCand(IBC_MRG_MAX_NUM_CANDS - uiCode); + } + else + { + picHeader->setMaxNumIBCMergeCand(0); + } + + // joint Cb/Cr sign flag + if (sps->getJointCbCrEnabledFlag()) + { + READ_FLAG( uiCode, "pic_joint_cbcr_sign_flag" ); + picHeader->setJointCbCrSignFlag(uiCode != 0); + } + else + { + picHeader->setJointCbCrSignFlag(false); + } + + // sao enable flags + if(sps->getSAOEnabledFlag()) + { + READ_FLAG(uiCode, "pic_sao_enabled_present_flag"); + picHeader->setSaoEnabledPresentFlag(uiCode != 0); + + if (picHeader->getSaoEnabledPresentFlag()) + { + READ_FLAG(uiCode, "slice_sao_luma_flag"); + picHeader->setSaoEnabledFlag(CHANNEL_TYPE_LUMA, uiCode != 0); + + if (sps->getChromaFormatIdc() != CHROMA_400) { - // In the case of BLA picture types, rps data is read from slice header but ignored - rps = pcSlice->getLocalRPS(); - (*rps)=ReferencePictureSet(); - pcSlice->setRPS(rps); + READ_FLAG(uiCode, "slice_sao_chroma_flag"); + picHeader->setSaoEnabledFlag(CHANNEL_TYPE_CHROMA, uiCode != 0); } -#endif - if (sps->getSPSTemporalMVPEnabledFlag()) + } + else + { + picHeader->setSaoEnabledFlag(CHANNEL_TYPE_LUMA, true); + picHeader->setSaoEnabledFlag(CHANNEL_TYPE_CHROMA, sps->getChromaFormatIdc() != CHROMA_400); + } + } + else + { + picHeader->setSaoEnabledFlag(CHANNEL_TYPE_LUMA, false); + picHeader->setSaoEnabledFlag(CHANNEL_TYPE_CHROMA, false); + } + + // alf enable flags and aps IDs + if( sps->getALFEnabledFlag() ) + { + READ_FLAG(uiCode, "pic_alf_enabled_present_flag"); + picHeader->setAlfEnabledPresentFlag(uiCode != 0); + + if (picHeader->getAlfEnabledPresentFlag()) + { + READ_FLAG(uiCode, "pic_alf_enabled_flag"); + picHeader->setAlfEnabledFlag(COMPONENT_Y, uiCode); + + int alfChromaIdc = 0; + if (uiCode) { - READ_FLAG( uiCode, "slice_temporal_mvp_enabled_flag" ); - pcSlice->setEnableTMVPFlag( uiCode == 1 ? true : false ); + READ_CODE(3, uiCode, "pic_num_alf_aps_ids_luma"); + int numAps = uiCode; + picHeader->setNumAlfAps(numAps); + + std::vector<int> apsId(numAps, -1); + for (int i = 0; i < numAps; i++) + { + READ_CODE(3, uiCode, "pic_alf_aps_id_luma"); + apsId[i] = uiCode; + } + picHeader->setAlfAPSs(apsId); + + if (sps->getChromaFormatIdc() != CHROMA_400) + { + READ_CODE(2, uiCode, "pic_alf_chroma_idc"); + alfChromaIdc = uiCode; + } + else + { + alfChromaIdc = 0; + } + if (alfChromaIdc) + { + READ_CODE(3, uiCode, "pic_alf_aps_id_chroma"); + picHeader->setAlfApsIdChroma(uiCode); + } } else { - pcSlice->setEnableTMVPFlag(false); + picHeader->setNumAlfAps(0); } + picHeader->setAlfEnabledFlag(COMPONENT_Cb, alfChromaIdc & 1); + picHeader->setAlfEnabledFlag(COMPONENT_Cr, alfChromaIdc >> 1); } - if(sps->getSAOEnabledFlag()) + else { - READ_FLAG(uiCode, "slice_sao_luma_flag"); pcSlice->setSaoEnabledFlag(CHANNEL_TYPE_LUMA, (bool)uiCode); + picHeader->setAlfEnabledFlag(COMPONENT_Y, true); + picHeader->setAlfEnabledFlag(COMPONENT_Cb, true); + picHeader->setAlfEnabledFlag(COMPONENT_Cr, true); + } + } + else + { + picHeader->setAlfEnabledFlag(COMPONENT_Y, false); + picHeader->setAlfEnabledFlag(COMPONENT_Cb, false); + picHeader->setAlfEnabledFlag(COMPONENT_Cr, false); + } - if (bChroma) + // dependent quantization + if (!pps->getPPSDepQuantEnabledIdc()) + { + READ_FLAG(uiCode, "pic_dep_quant_enabled_flag"); + } + else + { + uiCode = pps->getPPSDepQuantEnabledIdc() - 1; + } + picHeader->setDepQuantEnabledFlag( uiCode != 0 ); + + // sign data hiding + if( !picHeader->getDepQuantEnabledFlag() ) + { + READ_FLAG( uiCode, "pic_sign_data_hiding_enabled_flag" ); + picHeader->setSignDataHidingEnabledFlag( uiCode != 0 ); + } + else + { + picHeader->setSignDataHidingEnabledFlag(false); + } + + // deblocking filter controls + if (pps->getDeblockingFilterControlPresentFlag()) + { + if(pps->getDeblockingFilterOverrideEnabledFlag()) + { + READ_FLAG ( uiCode, "pic_deblocking_filter_override_present_flag" ); + picHeader->setDeblockingFilterOverridePresentFlag(uiCode != 0); + + if( picHeader->getDeblockingFilterOverridePresentFlag() ) { - READ_FLAG(uiCode, "slice_sao_chroma_flag"); pcSlice->setSaoEnabledFlag(CHANNEL_TYPE_CHROMA, (bool)uiCode); + READ_FLAG ( uiCode, "pic_deblocking_filter_override_flag" ); + picHeader->setDeblockingFilterOverrideFlag(uiCode != 0); } + else + { + picHeader->setDeblockingFilterOverrideFlag(false); + } + } + else + { + picHeader->setDeblockingFilterOverridePresentFlag(false); + picHeader->setDeblockingFilterOverrideFlag(false); } - if( sps->getALFEnabledFlag() ) + if(picHeader->getDeblockingFilterOverrideFlag()) { - READ_FLAG(uiCode, "tile_group_alf_enabled_flag"); - if (uiCode) + READ_FLAG ( uiCode, "pic_deblocking_filter_disabled_flag" ); + picHeader->setDeblockingFilterDisable(uiCode != 0); + if(!picHeader->getDeblockingFilterDisable()) { - READ_CODE(5, uiCode, "tile_group_aps_id"); - pcSlice->setAPSId(uiCode); - pcSlice->setAPS(parameterSetManager->getAPS(uiCode)); - pcSlice->setTileGroupAlfEnabledFlag(true); + READ_SVLC( iCode, "pic_beta_offset_div2" ); + picHeader->setDeblockingFilterBetaOffsetDiv2(iCode); + CHECK( picHeader->getDeblockingFilterBetaOffsetDiv2() < -6 && + picHeader->getDeblockingFilterBetaOffsetDiv2() > 6, "Invalid deblocking filter configuration"); + + READ_SVLC( iCode, "pic_tc_offset_div2" ); + picHeader->setDeblockingFilterTcOffsetDiv2(iCode); + CHECK (picHeader->getDeblockingFilterTcOffsetDiv2() < -6 && + picHeader->getDeblockingFilterTcOffsetDiv2() > 6, "Invalid deblocking filter configuration"); + } + } + else + { + picHeader->setDeblockingFilterDisable ( pps->getPPSDeblockingFilterDisabledFlag() ); + picHeader->setDeblockingFilterBetaOffsetDiv2( pps->getDeblockingFilterBetaOffsetDiv2() ); + picHeader->setDeblockingFilterTcOffsetDiv2 ( pps->getDeblockingFilterTcOffsetDiv2() ); + } + } + else + { + picHeader->setDeblockingFilterDisable ( false ); + picHeader->setDeblockingFilterBetaOffsetDiv2( 0 ); + picHeader->setDeblockingFilterTcOffsetDiv2 ( 0 ); + } + + // luma mapping / chroma scaling controls + if (sps->getUseLmcs()) + { + READ_FLAG(uiCode, "pic_lmcs_enabled_flag"); + picHeader->setLmcsEnabledFlag(uiCode != 0); + + if (picHeader->getLmcsEnabledFlag()) + { + READ_CODE(2, uiCode, "pic_lmcs_aps_id"); + picHeader->setLmcsAPSId(uiCode); + + if (sps->getChromaFormatIdc() != CHROMA_400) + { + READ_FLAG(uiCode, "pic_chroma_residual_scale_flag"); + picHeader->setLmcsChromaResidualScaleFlag(uiCode != 0); } else { - pcSlice->setTileGroupAlfEnabledFlag(false); - pcSlice->setAPSId(-1); - pcSlice->setAPS(nullptr); + picHeader->setLmcsChromaResidualScaleFlag(false); } } + } + else + { + picHeader->setLmcsEnabledFlag(false); + picHeader->setLmcsChromaResidualScaleFlag(false); + } - if (pcSlice->getIdrPicFlag()) + // quantization scaling lists + if( sps->getScalingListFlag() ) + { + READ_FLAG( uiCode, "pic_scaling_list_present_flag" ); + picHeader->setScalingListPresentFlag( uiCode ); + if( picHeader->getScalingListPresentFlag() ) { - pcSlice->setEnableTMVPFlag(false); + READ_CODE( 3, uiCode, "pic_scaling_list_aps_id" ); + picHeader->setScalingListAPSId( uiCode ); } - if (!pcSlice->isIntra()) + } + else + { + picHeader->setScalingListPresentFlag( false ); + } + + // picture header extension + if(pps->getPictureHeaderExtensionPresentFlag()) + { + READ_UVLC(uiCode,"pic_segment_header_extension_length"); + for(int i=0; i<uiCode; i++) { + uint32_t ignore_; + READ_CODE(8,ignore_,"pic_segment_header_extension_data_byte"); + } + } - READ_FLAG( uiCode, "num_ref_idx_active_override_flag"); - if (uiCode) + xReadRbspTrailingBits(); +} + +void HLSyntaxReader::parseSliceHeader (Slice* pcSlice, PicHeader* picHeader, ParameterSetManager *parameterSetManager, const int prevTid0POC) +{ + uint32_t uiCode; + int iCode; + +#if ENABLE_TRACING + xTraceSliceHeader(); +#endif + PPS* pps = NULL; + SPS* sps = NULL; + + CHECK(picHeader==0, "Invalid Picture Header"); + CHECK(picHeader->isValid()==false, "Invalid Picture Header"); + pps = parameterSetManager->getPPS( picHeader->getPPSId() ); + //!KS: need to add error handling code here, if PPS is not available + CHECK(pps==0, "Invalid PPS"); + sps = parameterSetManager->getSPS(pps->getSPSId()); + //!KS: need to add error handling code here, if SPS is not available + CHECK(sps==0, "Invalid SPS"); + + const ChromaFormat chFmt = sps->getChromaFormatIdc(); + const uint32_t numValidComp=getNumberValidComponents(chFmt); + const bool bChroma=(chFmt!=CHROMA_400); + + // picture order count + READ_CODE(sps->getBitsForPOC(), uiCode, "slice_pic_order_cnt_lsb"); + if (pcSlice->getIdrPicFlag()) + { + pcSlice->setPOC(uiCode); + } + else + { + int iPOClsb = uiCode; + int iPrevPOC = prevTid0POC; + int iMaxPOClsb = 1 << sps->getBitsForPOC(); + int iPrevPOClsb = iPrevPOC & (iMaxPOClsb - 1); + int iPrevPOCmsb = iPrevPOC - iPrevPOClsb; + int iPOCmsb; + if ((iPOClsb < iPrevPOClsb) && ((iPrevPOClsb - iPOClsb) >= (iMaxPOClsb / 2))) + { + iPOCmsb = iPrevPOCmsb + iMaxPOClsb; + } + else if ((iPOClsb > iPrevPOClsb) && ((iPOClsb - iPrevPOClsb) > (iMaxPOClsb / 2))) + { + iPOCmsb = iPrevPOCmsb - iMaxPOClsb; + } + else + { + iPOCmsb = iPrevPOCmsb; + } + pcSlice->setPOC(iPOCmsb + iPOClsb); + } + + if (sps->getSubPicPresentFlag()) + { + uint32_t bitsSubPicId; + if (sps->getSubPicIdSignallingPresentFlag()) + { + bitsSubPicId = sps->getSubPicIdLen(); + } + else if (picHeader->getSubPicIdSignallingPresentFlag()) + { + bitsSubPicId = picHeader->getSubPicIdLen(); + } + else if (pps->getSubPicIdSignallingPresentFlag()) + { + bitsSubPicId = pps->getSubPicIdLen(); + } + else + { + bitsSubPicId = ceilLog2(sps->getNumSubPics()); + } + READ_CODE(bitsSubPicId, uiCode, "slice_subpic_id"); pcSlice->setSliceSubPicId(uiCode); + } + + // raster scan slices + if(pps->getRectSliceFlag() == 0) + { + uint32_t sliceAddr, numTilesInSlice; + + // slice address is the raster scan tile index of first tile in slice + if( pps->getNumTiles() > 1 ) + { + int bitsSliceAddress = ceilLog2(pps->getNumTiles()); + READ_CODE(bitsSliceAddress, uiCode, "slice_address"); sliceAddr = uiCode; + READ_UVLC(uiCode, "num_tiles_in_slice_minus1"); numTilesInSlice = uiCode + 1; + } + else { + sliceAddr = 0; + numTilesInSlice = 1; + } + CHECK(sliceAddr >= pps->getNumTiles(), "Invalid slice address"); + pcSlice->initSliceMap(); + pcSlice->setSliceID(sliceAddr); + + for( uint32_t tileIdx = sliceAddr; tileIdx < sliceAddr + numTilesInSlice; tileIdx++ ) + { + uint32_t tileX = tileIdx % pps->getNumTileColumns(); + uint32_t tileY = tileIdx / pps->getNumTileColumns(); + CHECK(tileY >= pps->getNumTileRows(), "Number of tiles in slice exceeds the remaining number of tiles in picture"); + + pcSlice->addCtusToSlice(pps->getTileColumnBd(tileX), pps->getTileColumnBd(tileX + 1), + pps->getTileRowBd(tileY), pps->getTileRowBd(tileY + 1), pps->getPicWidthInCtu()); + } + } + // rectangular slices + else + { + uint32_t sliceAddr; + + // slice address is the index of the slice within the current sub-picture + if( pps->getNumSlicesInPic() > 1 ) + { + int bitsSliceAddress = ceilLog2(pps->getNumSlicesInPic()); // change to NumSlicesInSubPic when available + READ_CODE(bitsSliceAddress, uiCode, "slice_address"); sliceAddr = uiCode; + CHECK(sliceAddr >= pps->getNumSlicesInPic(), "Invalid slice address"); + } + else { + sliceAddr = 0; + } + pcSlice->setSliceMap( pps->getSliceMap(sliceAddr) ); + pcSlice->setSliceID(sliceAddr); + } + + + READ_UVLC ( uiCode, "slice_type" ); pcSlice->setSliceType((SliceType)uiCode); + + // inherit values from picture header + // set default values in case slice overrides are disabled + pcSlice->inheritFromPicHeader( picHeader, pps, sps ); + + + if( picHeader->getPicRplPresentFlag() ) + { + pcSlice->setRPL0(picHeader->getRPL0()); + pcSlice->setRPL1(picHeader->getRPL1()); + *pcSlice->getLocalRPL0() = *picHeader->getLocalRPL0(); + *pcSlice->getLocalRPL1() = *picHeader->getLocalRPL1(); + } + else if( pcSlice->getIdrPicFlag() && !(sps->getIDRRefParamListPresent()) ) + { + ReferencePictureList* rpl0 = pcSlice->getLocalRPL0(); + (*rpl0) = ReferencePictureList(); + pcSlice->setRPL0(rpl0); + ReferencePictureList* rpl1 = pcSlice->getLocalRPL1(); + (*rpl1) = ReferencePictureList(); + pcSlice->setRPL1(rpl1); + } + else + { + //Read L0 related syntax elements + if (sps->getNumRPL0() > 0) { - READ_UVLC (uiCode, "num_ref_idx_l0_active_minus1" ); pcSlice->setNumRefIdx( REF_PIC_LIST_0, uiCode + 1 ); - if (pcSlice->isInterB()) + if (!pps->getPPSRefPicListSPSIdc0()) { - READ_UVLC (uiCode, "num_ref_idx_l1_active_minus1" ); pcSlice->setNumRefIdx( REF_PIC_LIST_1, uiCode + 1 ); + READ_FLAG(uiCode, "ref_pic_list_sps_flag[0]"); } else { - pcSlice->setNumRefIdx(REF_PIC_LIST_1, 0); + uiCode = pps->getPPSRefPicListSPSIdc0() - 1; } } else { - pcSlice->setNumRefIdx(REF_PIC_LIST_0, pps->getNumRefIdxL0DefaultActive()); - if (pcSlice->isInterB()) + uiCode = 0; + } + + if (!uiCode) //explicitly carried in this SH + { + ReferencePictureList* rpl0 = pcSlice->getLocalRPL0(); + (*rpl0) = ReferencePictureList(); + parseRefPicList(sps, rpl0); + pcSlice->setRPL0idx(-1); + pcSlice->setRPL0(rpl0); + } + else //Refer to list in SPS + { + if (sps->getNumRPL0() > 1) { - pcSlice->setNumRefIdx(REF_PIC_LIST_1, pps->getNumRefIdxL1DefaultActive()); + int numBits = ceilLog2(sps->getNumRPL0()); + READ_CODE(numBits, uiCode, "ref_pic_list_idx[0]"); + pcSlice->setRPL0idx(uiCode); + pcSlice->setRPL0(sps->getRPLList0()->getReferencePictureList(uiCode)); } else { - pcSlice->setNumRefIdx(REF_PIC_LIST_1,0); + pcSlice->setRPL0idx(0); + pcSlice->setRPL0(sps->getRPLList0()->getReferencePictureList(0)); } } - } - // } - RefPicListModification* refPicListModification = pcSlice->getRefPicListModification(); - if(!pcSlice->isIntra()) - { - if( !pps->getListsModificationPresentFlag() || pcSlice->getNumRpsCurrTempList() <= 1 ) + //Deal POC Msb cycle signalling for LTRP + for (int i = 0; i < pcSlice->getRPL0()->getNumberOfLongtermPictures() + pcSlice->getRPL0()->getNumberOfShorttermPictures(); i++) { - refPicListModification->setRefPicListModificationFlagL0( 0 ); + pcSlice->getLocalRPL0()->setDeltaPocMSBPresentFlag(i, false); + pcSlice->getLocalRPL0()->setDeltaPocMSBCycleLT(i, 0); } - else + if (pcSlice->getRPL0()->getNumberOfLongtermPictures()) { - READ_FLAG( uiCode, "ref_pic_list_modification_flag_l0" ); refPicListModification->setRefPicListModificationFlagL0( uiCode ? 1 : 0 ); + for (int i = 0; i < pcSlice->getRPL0()->getNumberOfLongtermPictures() + pcSlice->getRPL0()->getNumberOfShorttermPictures(); i++) + { + if (pcSlice->getRPL0()->isRefPicLongterm(i)) + { + if (pcSlice->getRPL0()->getLtrpInSliceHeaderFlag()) + { + READ_CODE(sps->getBitsForPOC(), uiCode, "slice_poc_lsb_lt[i][j]"); + pcSlice->getLocalRPL0()->setRefPicIdentifier( i, uiCode, true, false, 0 ); + } + READ_FLAG(uiCode, "delta_poc_msb_present_flag[i][j]"); + pcSlice->getLocalRPL0()->setDeltaPocMSBPresentFlag(i, uiCode ? true : false); + if (uiCode) + { + READ_UVLC(uiCode, "slice_delta_poc_msb_cycle_lt[i][j]"); + pcSlice->getLocalRPL0()->setDeltaPocMSBCycleLT(i, uiCode); + } + } + } } - if(refPicListModification->getRefPicListModificationFlagL0()) + //Read L1 related syntax elements + if (!pps->getRpl1IdxPresentFlag()) { - uiCode = 0; - int i = 0; - int numRpsCurrTempList0 = pcSlice->getNumRpsCurrTempList(); - if ( numRpsCurrTempList0 > 1 ) + pcSlice->setRPL1idx(pcSlice->getRPL0idx()); + if (pcSlice->getRPL1idx() != -1) + pcSlice->setRPL1(sps->getRPLList1()->getReferencePictureList(pcSlice->getRPL0idx())); + } + else + { + if (sps->getNumRPL1() > 0) { - int length = 1; - numRpsCurrTempList0 --; - while ( numRpsCurrTempList0 >>= 1) + if (!pps->getPPSRefPicListSPSIdc1()) { - length ++; + READ_FLAG(uiCode, "ref_pic_list_sps_flag[1]"); } - for (i = 0; i < pcSlice->getNumRefIdx(REF_PIC_LIST_0); i ++) + else { - READ_CODE( length, uiCode, "list_entry_l0" ); - refPicListModification->setRefPicSetIdxL0(i, uiCode ); + uiCode = pps->getPPSRefPicListSPSIdc1() - 1; } } else { - for (i = 0; i < pcSlice->getNumRefIdx(REF_PIC_LIST_0); i ++) + uiCode = 0; + } + if (uiCode == 1) + { + if (sps->getNumRPL1() > 1) { - refPicListModification->setRefPicSetIdxL0(i, 0 ); + int numBits = ceilLog2(sps->getNumRPL1()); + READ_CODE(numBits, uiCode, "ref_pic_list_idx[1]"); + pcSlice->setRPL1idx(uiCode); + pcSlice->setRPL1(sps->getRPLList1()->getReferencePictureList(uiCode)); } + else + { + pcSlice->setRPL1idx(0); + pcSlice->setRPL1(sps->getRPLList1()->getReferencePictureList(0)); + } + } + else + { + pcSlice->setRPL1idx(-1); } } - } - else - { - refPicListModification->setRefPicListModificationFlagL0(0); - } - if(pcSlice->isInterB()) - { - if( !pps->getListsModificationPresentFlag() || pcSlice->getNumRpsCurrTempList() <= 1 ) + if (pcSlice->getRPL1idx() == -1) //explicitly carried in this SH { - refPicListModification->setRefPicListModificationFlagL1( 0 ); + ReferencePictureList* rpl1 = pcSlice->getLocalRPL1(); + (*rpl1) = ReferencePictureList(); + parseRefPicList(sps, rpl1); + pcSlice->setRPL1idx(-1); + pcSlice->setRPL1(rpl1); } - else + + //Deal POC Msb cycle signalling for LTRP + for (int i = 0; i < pcSlice->getRPL1()->getNumberOfLongtermPictures() + pcSlice->getRPL1()->getNumberOfShorttermPictures(); i++) { - READ_FLAG( uiCode, "ref_pic_list_modification_flag_l1" ); refPicListModification->setRefPicListModificationFlagL1( uiCode ? 1 : 0 ); + pcSlice->getLocalRPL1()->setDeltaPocMSBPresentFlag(i, false); + pcSlice->getLocalRPL1()->setDeltaPocMSBCycleLT(i, 0); } - if(refPicListModification->getRefPicListModificationFlagL1()) + if (pcSlice->getRPL1()->getNumberOfLongtermPictures()) { - uiCode = 0; - int i = 0; - int numRpsCurrTempList1 = pcSlice->getNumRpsCurrTempList(); - if ( numRpsCurrTempList1 > 1 ) + for (int i = 0; i < pcSlice->getRPL1()->getNumberOfLongtermPictures() + pcSlice->getRPL1()->getNumberOfShorttermPictures(); i++) { - int length = 1; - numRpsCurrTempList1 --; - while ( numRpsCurrTempList1 >>= 1) + if (pcSlice->getRPL1()->isRefPicLongterm(i)) { - length ++; + if (pcSlice->getRPL1()->getLtrpInSliceHeaderFlag()) + { + READ_CODE(sps->getBitsForPOC(), uiCode, "slice_poc_lsb_lt[i][j]"); + pcSlice->getLocalRPL1()->setRefPicIdentifier( i, uiCode, true, false, 0 ); + } + READ_FLAG(uiCode, "delta_poc_msb_present_flag[i][j]"); + pcSlice->getLocalRPL1()->setDeltaPocMSBPresentFlag(i, uiCode ? true : false); + if (uiCode) + { + READ_UVLC(uiCode, "slice_delta_poc_msb_cycle_lt[i][j]"); + pcSlice->getLocalRPL1()->setDeltaPocMSBCycleLT(i, uiCode); + } + } + } + } + + } + if( !picHeader->getPicRplPresentFlag() && pcSlice->getIdrPicFlag() && !(sps->getIDRRefParamListPresent())) + { + pcSlice->setNumRefIdx(REF_PIC_LIST_0, 0); + pcSlice->setNumRefIdx(REF_PIC_LIST_1, 0); + } + else + { + if ((!pcSlice->isIntra() && pcSlice->getRPL0()->getNumRefEntries() > 1) || + (pcSlice->isInterB() && pcSlice->getRPL1()->getNumRefEntries() > 1) ) + { + READ_FLAG( uiCode, "num_ref_idx_active_override_flag"); + if (uiCode) + { + if(pcSlice->getRPL0()->getNumRefEntries() > 1) + { + READ_UVLC (uiCode, "num_ref_idx_l0_active_minus1" ); + } + else + { + uiCode = 0; } - for (i = 0; i < pcSlice->getNumRefIdx(REF_PIC_LIST_1); i ++) + pcSlice->setNumRefIdx( REF_PIC_LIST_0, uiCode + 1 ); + if (pcSlice->isInterB()) { - READ_CODE( length, uiCode, "list_entry_l1" ); - refPicListModification->setRefPicSetIdxL1(i, uiCode ); + if(pcSlice->getRPL1()->getNumRefEntries() > 1) + { + READ_UVLC (uiCode, "num_ref_idx_l1_active_minus1" ); + } + else + { + uiCode = 0; + } + pcSlice->setNumRefIdx(REF_PIC_LIST_1, uiCode + 1); + } + else + { + pcSlice->setNumRefIdx(REF_PIC_LIST_1, 0); + } + } + else + { + if(pcSlice->getRPL0()->getNumRefEntries() >= pps->getNumRefIdxL0DefaultActive()) + { + pcSlice->setNumRefIdx(REF_PIC_LIST_0, pps->getNumRefIdxL0DefaultActive()); + } + else + { + pcSlice->setNumRefIdx(REF_PIC_LIST_0, pcSlice->getRPL0()->getNumRefEntries()); + } + + if (pcSlice->isInterB()) + { + if(pcSlice->getRPL1()->getNumRefEntries() >= pps->getNumRefIdxL1DefaultActive()) + { + pcSlice->setNumRefIdx(REF_PIC_LIST_1, pps->getNumRefIdxL1DefaultActive()); + } + else + { + pcSlice->setNumRefIdx(REF_PIC_LIST_1, pcSlice->getRPL1()->getNumRefEntries()); + } } - } - else - { - for (i = 0; i < pcSlice->getNumRefIdx(REF_PIC_LIST_1); i ++) + else { - refPicListModification->setRefPicSetIdxL1(i, 0 ); + pcSlice->setNumRefIdx(REF_PIC_LIST_1, 0); } } } + else + { + pcSlice->setNumRefIdx( REF_PIC_LIST_0, pcSlice->isIntra() ? 0 : 1 ); + pcSlice->setNumRefIdx( REF_PIC_LIST_1, pcSlice->isInterB() ? 1 : 0 ); + } } - else - { - refPicListModification->setRefPicListModificationFlagL1(0); - } - if (pcSlice->isInterB()) + + if (pcSlice->isInterP() || pcSlice->isInterB()) { - READ_FLAG( uiCode, "mvd_l1_zero_flag" ); pcSlice->setMvdL1ZeroFlag( (uiCode ? true : false) ); + CHECK(pcSlice->getNumRefIdx(REF_PIC_LIST_0) == 0, "Number of active entries in RPL0 of P or B picture shall be greater than 0"); + if (pcSlice->isInterB()) + CHECK(pcSlice->getNumRefIdx(REF_PIC_LIST_1) == 0, "Number of active entries in RPL1 of B picture shall be greater than 0"); } + pcSlice->setCabacInitFlag( false ); // default if(pps->getCabacInitPresentFlag() && !pcSlice->isIntra()) { @@ -1777,11 +2777,18 @@ void HLSyntaxReader::parseSliceHeader (Slice* pcSlice, ParameterSetManager *para pcSlice->setEncCABACTableIdx( pcSlice->getSliceType() == B_SLICE ? ( uiCode ? P_SLICE : B_SLICE ) : ( uiCode ? B_SLICE : P_SLICE ) ); } - if ( pcSlice->getEnableTMVPFlag() ) + if ( pcSlice->getPicHeader()->getEnableTMVPFlag() ) { if ( pcSlice->getSliceType() == B_SLICE ) { - READ_FLAG( uiCode, "collocated_from_l0_flag" ); + if (!pps->getPPSCollocatedFromL0Idc()) + { + READ_FLAG(uiCode, "collocated_from_l0_flag"); + } + else + { + uiCode = pps->getPPSCollocatedFromL0Idc() - 1; + } pcSlice->setColFromL0Flag(uiCode); } else @@ -1806,87 +2813,22 @@ void HLSyntaxReader::parseSliceHeader (Slice* pcSlice, ParameterSetManager *para parsePredWeightTable(pcSlice, sps); pcSlice->initWpScaling(sps); } - READ_FLAG( uiCode, "dep_quant_enabled_flag" ); - pcSlice->setDepQuantEnabledFlag( uiCode != 0 ); -#if HEVC_USE_SIGN_HIDING - if( !pcSlice->getDepQuantEnabledFlag() ) - { - READ_FLAG( uiCode, "sign_data_hiding_enabled_flag" ); - pcSlice->setSignDataHidingEnabledFlag( uiCode != 0 ); - } else { - pcSlice->setSignDataHidingEnabledFlag( 0 ); - } -#endif - if ( - sps->getSplitConsOverrideEnabledFlag() - ) - { - READ_FLAG(uiCode, "partition_constrainst_override_flag"); pcSlice->setSplitConsOverrideFlag(uiCode ? true : false); - if (pcSlice->getSplitConsOverrideFlag()) + WPScalingParam *wp; + for ( int iNumRef=0 ; iNumRef<((pcSlice->getSliceType() == B_SLICE )?2:1); iNumRef++ ) { - READ_UVLC(uiCode, "log2_diff_min_qt_min_cb"); pcSlice->setMinQTSize(1 << (uiCode + sps->getLog2MinCodingBlockSize())); - READ_UVLC(uiCode, "max_mtt_hierarchy_depth"); pcSlice->setMaxBTDepth(uiCode); - if (pcSlice->getMaxBTDepth() != 0) + RefPicList eRefPicList = ( iNumRef ? REF_PIC_LIST_1 : REF_PIC_LIST_0 ); + for ( int iRefIdx=0 ; iRefIdx<pcSlice->getNumRefIdx(eRefPicList) ; iRefIdx++ ) { - READ_UVLC(uiCode, "log2_diff_max_bt_min_qt"); pcSlice->setMaxBTSize(pcSlice->getMinQTSize() << uiCode); - READ_UVLC(uiCode, "log2_diff_max_tt_min_qt"); pcSlice->setMaxTTSize(pcSlice->getMinQTSize() << uiCode); - } - else - { - pcSlice->setMaxBTSize(pcSlice->getMinQTSize()); - pcSlice->setMaxTTSize(pcSlice->getMinQTSize()); - } - if ( - pcSlice->isIntra() && sps->getUseDualITree() - ) - { - READ_UVLC(uiCode, "log2_diff_min_qt_min_cb_chroma"); pcSlice->setMinQTSizeIChroma(1 << (uiCode + sps->getLog2MinCodingBlockSize())); - READ_UVLC(uiCode, "max_mtt_hierarchy_depth_chroma"); pcSlice->setMaxBTDepthIChroma(uiCode); - if (pcSlice->getMaxBTDepthIChroma() != 0) - { - READ_UVLC(uiCode, "log2_diff_max_bt_min_qt_chroma"); pcSlice->setMaxBTSizeIChroma(pcSlice->getMinQTSizeIChroma() << uiCode); - READ_UVLC(uiCode, "log2_diff_max_tt_min_qt_chroma"); pcSlice->setMaxTTSizeIChroma(pcSlice->getMinQTSizeIChroma() << uiCode); - } - else - { - pcSlice->setMaxBTSizeIChroma(pcSlice->getMinQTSizeIChroma()); - pcSlice->setMaxTTSizeIChroma(pcSlice->getMinQTSizeIChroma()); - } + pcSlice->getWpScaling(eRefPicList, iRefIdx, wp); + wp[0].bPresentFlag = false; + wp[1].bPresentFlag = false; + wp[2].bPresentFlag = false; } } } - if (!pcSlice->isIntra() || sps->getIBCFlag()) - { - READ_UVLC(uiCode, "six_minus_max_num_merge_cand"); - pcSlice->setMaxNumMergeCand(MRG_MAX_NUM_CANDS - uiCode); - } - - if (!pcSlice->isIntra()) - { - - if ( sps->getSBTMVPEnabledFlag() && !sps->getUseAffine() ) // ATMVP only - { - pcSlice->setMaxNumAffineMergeCand( 1 ); - } - else if ( !sps->getSBTMVPEnabledFlag() && !sps->getUseAffine() ) // both off - { - pcSlice->setMaxNumAffineMergeCand( 0 ); - } - else - if ( sps->getUseAffine() ) - { - READ_UVLC( uiCode, "five_minus_max_num_affine_merge_cand" ); - pcSlice->setMaxNumAffineMergeCand( AFFINE_MRG_MAX_NUM_CANDS - uiCode ); - } - if ( sps->getDisFracMmvdEnabledFlag() ) - { - READ_FLAG( uiCode, "tile_group_fracmmvd_disabled_flag" ); - pcSlice->setDisFracMMVD( uiCode ? true : false ); - } - } READ_SVLC( iCode, "slice_qp_delta" ); pcSlice->setSliceQp (26 + pps->getPicInitQPMinus26() + iCode); @@ -1915,10 +2857,19 @@ void HLSyntaxReader::parseSliceHeader (Slice* pcSlice, ParameterSetManager *para CHECK( pcSlice->getSliceChromaQpDelta(COMPONENT_Cr) > 12, "Invalid chroma QP offset" ); CHECK( (pps->getQpOffset(COMPONENT_Cr) + pcSlice->getSliceChromaQpDelta(COMPONENT_Cr)) < -12, "Invalid chroma QP offset" ); CHECK( (pps->getQpOffset(COMPONENT_Cr) + pcSlice->getSliceChromaQpDelta(COMPONENT_Cr)) > 12, "Invalid chroma QP offset" ); + if (sps->getJointCbCrEnabledFlag()) + { + READ_SVLC(iCode, "slice_joint_cbcr_qp_offset" ); + pcSlice->setSliceChromaQpDelta(JOINT_CbCr, iCode); + CHECK( pcSlice->getSliceChromaQpDelta(JOINT_CbCr) < -12, "Invalid chroma QP offset"); + CHECK( pcSlice->getSliceChromaQpDelta(JOINT_CbCr) > 12, "Invalid chroma QP offset"); + CHECK( (pps->getQpOffset(JOINT_CbCr) + pcSlice->getSliceChromaQpDelta(JOINT_CbCr)) < -12, "Invalid chroma QP offset"); + CHECK( (pps->getQpOffset(JOINT_CbCr) + pcSlice->getSliceChromaQpDelta(JOINT_CbCr)) > 12, "Invalid chroma QP offset"); + } } } - if (pps->getPpsRangeExtension().getChromaQpOffsetListEnabledFlag()) + if (pps->getCuChromaQpOffsetEnabledFlag()) { READ_FLAG(uiCode, "cu_chroma_qp_offset_enabled_flag"); pcSlice->setUseChromaQpAdj(uiCode != 0); } @@ -1927,11 +2878,67 @@ void HLSyntaxReader::parseSliceHeader (Slice* pcSlice, ParameterSetManager *para pcSlice->setUseChromaQpAdj(false); } + if( sps->getSAOEnabledFlag() && !picHeader->getSaoEnabledPresentFlag() ) + { + READ_FLAG(uiCode, "slice_sao_luma_flag"); pcSlice->setSaoEnabledFlag(CHANNEL_TYPE_LUMA, (bool)uiCode); + + if (bChroma) + { + READ_FLAG(uiCode, "slice_sao_chroma_flag"); pcSlice->setSaoEnabledFlag(CHANNEL_TYPE_CHROMA, (bool)uiCode); + } + } + + if( sps->getALFEnabledFlag() && !picHeader->getAlfEnabledPresentFlag() ) + { + READ_FLAG(uiCode, "slice_alf_enabled_flag"); + pcSlice->setTileGroupAlfEnabledFlag(COMPONENT_Y, uiCode); + int alfChromaIdc = 0; + if (uiCode) + { + READ_CODE(3, uiCode, "slice_num_alf_aps_ids_luma"); + int numAps = uiCode; + pcSlice->setTileGroupNumAps(numAps); + std::vector<int> apsId(numAps, -1); + for (int i = 0; i < numAps; i++) + { + READ_CODE(3, uiCode, "slice_alf_aps_id_luma"); + apsId[i] = uiCode; + APS* APStoCheckLuma = parameterSetManager->getAPS(apsId[i], ALF_APS); + CHECK(APStoCheckLuma->getAlfAPSParam().newFilterFlag[CHANNEL_TYPE_LUMA] != 1, "bitstream conformance error, alf_luma_filter_signal_flag shall be equal to 1"); + } + + + pcSlice->setAlfAPSs(apsId); + if (bChroma) + { + READ_CODE(2, uiCode, "slice_alf_chroma_idc"); alfChromaIdc = uiCode; + } + else + { + alfChromaIdc = 0; + } + if (alfChromaIdc) + { + READ_CODE(3, uiCode, "slice_alf_aps_id_chroma"); + pcSlice->setTileGroupApsIdChroma(uiCode); + APS* APStoCheckChroma = parameterSetManager->getAPS(uiCode, ALF_APS); + CHECK(APStoCheckChroma->getAlfAPSParam().newFilterFlag[CHANNEL_TYPE_CHROMA] != 1, "bitstream conformance error, alf_chroma_filter_signal_flag shall be equal to 1"); + } + } + else + { + pcSlice->setTileGroupNumAps(0); + } + pcSlice->setTileGroupAlfEnabledFlag(COMPONENT_Cb, alfChromaIdc & 1); + pcSlice->setTileGroupAlfEnabledFlag(COMPONENT_Cr, alfChromaIdc >> 1); + } + + if (pps->getDeblockingFilterControlPresentFlag()) { - if(pps->getDeblockingFilterOverrideEnabledFlag()) + if( pps->getDeblockingFilterOverrideEnabledFlag() && !picHeader->getDeblockingFilterOverridePresentFlag() ) { - READ_FLAG ( uiCode, "deblocking_filter_override_flag" ); pcSlice->setDeblockingFilterOverrideFlag(uiCode ? true : false); + READ_FLAG ( uiCode, "slice_deblocking_filter_override_flag" ); pcSlice->setDeblockingFilterOverrideFlag(uiCode ? true : false); } else { @@ -1952,9 +2959,9 @@ void HLSyntaxReader::parseSliceHeader (Slice* pcSlice, ParameterSetManager *para } else { - pcSlice->setDeblockingFilterDisable ( pps->getPPSDeblockingFilterDisabledFlag() ); - pcSlice->setDeblockingFilterBetaOffsetDiv2( pps->getDeblockingFilterBetaOffsetDiv2() ); - pcSlice->setDeblockingFilterTcOffsetDiv2 ( pps->getDeblockingFilterTcOffsetDiv2() ); + pcSlice->setDeblockingFilterDisable ( picHeader->getDeblockingFilterDisable() ); + pcSlice->setDeblockingFilterBetaOffsetDiv2( picHeader->getDeblockingFilterBetaOffsetDiv2() ); + pcSlice->setDeblockingFilterTcOffsetDiv2 ( picHeader->getDeblockingFilterTcOffsetDiv2() ); } } else @@ -1964,28 +2971,9 @@ void HLSyntaxReader::parseSliceHeader (Slice* pcSlice, ParameterSetManager *para pcSlice->setDeblockingFilterTcOffsetDiv2 ( 0 ); } - bool isSAOEnabled = sps->getSAOEnabledFlag() && (pcSlice->getSaoEnabledFlag(CHANNEL_TYPE_LUMA) || (bChroma && pcSlice->getSaoEnabledFlag(CHANNEL_TYPE_CHROMA))); - bool isDBFEnabled = (!pcSlice->getDeblockingFilterDisable()); - - if(pps->getLoopFilterAcrossSlicesEnabledFlag() && ( isSAOEnabled || isDBFEnabled )) - { - READ_FLAG( uiCode, "slice_loop_filter_across_slices_enabled_flag"); - } - else - { - uiCode = pps->getLoopFilterAcrossSlicesEnabledFlag()?1:0; - } - pcSlice->setLFCrossSliceBoundaryFlag( (uiCode==1)?true:false); - if (sps->getUseReshaper()) - { - parseReshaper(pcSlice->getReshapeInfo(), sps, pcSlice->isIntra()); - } -#if HEVC_DEPENDENT_SLICES - } -#endif - if( firstSliceSegmentInPic ) + if( pcSlice->getFirstCtuRsAddrInSlice() == 0 ) { pcSlice->setDefaultClpRng( *sps ); @@ -2001,26 +2989,20 @@ void HLSyntaxReader::parseSliceHeader (Slice* pcSlice, ParameterSetManager *para } } - -#if HEVC_TILES_WPP std::vector<uint32_t> entryPointOffset; - if( pps->getTilesEnabledFlag() || pps->getEntropyCodingSyncEnabledFlag() ) + + pcSlice->setNumEntryPoints( pps ); + if( pcSlice->getNumEntryPoints() > 0 ) { - uint32_t numEntryPointOffsets; uint32_t offsetLenMinus1; - READ_UVLC( numEntryPointOffsets, "num_entry_point_offsets" ); - if( numEntryPointOffsets > 0 ) + READ_UVLC( offsetLenMinus1, "offset_len_minus1" ); + entryPointOffset.resize( pcSlice->getNumEntryPoints() ); + for( uint32_t idx = 0; idx < pcSlice->getNumEntryPoints(); idx++ ) { - READ_UVLC( offsetLenMinus1, "offset_len_minus1" ); - entryPointOffset.resize( numEntryPointOffsets ); - for( uint32_t idx = 0; idx < numEntryPointOffsets; idx++ ) - { - READ_CODE( offsetLenMinus1 + 1, uiCode, "entry_point_offset_minus1" ); - entryPointOffset[idx] = uiCode + 1; - } + READ_CODE( offsetLenMinus1 + 1, uiCode, "entry_point_offset_minus1" ); + entryPointOffset[idx] = uiCode + 1; } } -#endif #if RExt__DECODER_DEBUG_BIT_STATISTICS CodingStatistics::IncrementStatisticEP(STATS__BYTE_ALIGNMENT_BITS,m_pcBitstream->readByteAlignment(),0); @@ -2030,8 +3012,7 @@ void HLSyntaxReader::parseSliceHeader (Slice* pcSlice, ParameterSetManager *para pcSlice->clearSubstreamSizes(); -#if HEVC_TILES_WPP - if( pps->getTilesEnabledFlag() || pps->getEntropyCodingSyncEnabledFlag() ) + if( pcSlice->getNumEntryPoints() > 0 ) { int endOfSliceHeaderLocation = m_pcBitstream->getByteLocation(); @@ -2065,11 +3046,54 @@ void HLSyntaxReader::parseSliceHeader (Slice* pcSlice, ParameterSetManager *para pcSlice->addSubstreamSize(entryPointOffset [ idx ] ); } } -#endif return; } -#if JVET_M0101_HLS +void HLSyntaxReader::parseSliceHeaderToPoc (Slice* pcSlice, PicHeader* picHeader, ParameterSetManager *parameterSetManager, const int prevTid0POC) +{ + uint32_t uiCode; + PPS* pps = NULL; + SPS* sps = NULL; + + CHECK(picHeader==0, "Invalid Picture Header"); + CHECK(picHeader->isValid()==false, "Invalid Picture Header"); + pps = parameterSetManager->getPPS( picHeader->getPPSId() ); + //!KS: need to add error handling code here, if PPS is not available + CHECK(pps==0, "Invalid PPS"); + sps = parameterSetManager->getSPS(pps->getSPSId()); + //!KS: need to add error handling code here, if SPS is not available + CHECK(sps==0, "Invalid SPS"); + + // picture order count + READ_CODE(sps->getBitsForPOC(), uiCode, "slice_pic_order_cnt_lsb"); + if (pcSlice->getIdrPicFlag()) + { + pcSlice->setPOC(uiCode); + } + else + { + int iPOClsb = uiCode; + int iPrevPOC = prevTid0POC; + int iMaxPOClsb = 1 << sps->getBitsForPOC(); + int iPrevPOClsb = iPrevPOC & (iMaxPOClsb - 1); + int iPrevPOCmsb = iPrevPOC - iPrevPOClsb; + int iPOCmsb; + if ((iPOClsb < iPrevPOClsb) && ((iPrevPOClsb - iPOClsb) >= (iMaxPOClsb / 2))) + { + iPOCmsb = iPrevPOCmsb + iMaxPOClsb; + } + else if ((iPOClsb > iPrevPOClsb) && ((iPOClsb - iPrevPOClsb) > (iMaxPOClsb / 2))) + { + iPOCmsb = iPrevPOCmsb - iMaxPOClsb; + } + else + { + iPOCmsb = iPrevPOCmsb; + } + pcSlice->setPOC(iPOCmsb + iPOClsb); + } +} + void HLSyntaxReader::parseConstraintInfo(ConstraintInfo *cinfo) { uint32_t symbol; @@ -2081,27 +3105,42 @@ void HLSyntaxReader::parseConstraintInfo(ConstraintInfo *cinfo) READ_CODE(4, symbol, "max_bitdepth_constraint_idc" ); cinfo->setMaxBitDepthConstraintIdc(symbol); READ_CODE(2, symbol, "max_chroma_format_constraint_idc" ); cinfo->setMaxChromaFormatConstraintIdc((ChromaFormat)symbol); - + READ_FLAG(symbol, "no_qtbtt_dual_tree_intra_constraint_flag" ); cinfo->setNoQtbttDualTreeIntraConstraintFlag(symbol > 0 ? true : false); + READ_FLAG(symbol, "no_partition_constraints_override_constraint_flag"); cinfo->setNoPartitionConstraintsOverrideConstraintFlag(symbol > 0 ? true : false); READ_FLAG(symbol, "no_sao_constraint_flag"); cinfo->setNoSaoConstraintFlag(symbol > 0 ? true : false); READ_FLAG(symbol, "no_alf_constraint_flag"); cinfo->setNoAlfConstraintFlag(symbol > 0 ? true : false); - READ_FLAG(symbol, "no_pcm_constraint_flag"); cinfo->setNoPcmConstraintFlag(symbol > 0 ? true : false); + READ_FLAG(symbol, "no_joint_cbcr_constraint_flag"); cinfo->setNoJointCbCrConstraintFlag(symbol > 0 ? true : false); + READ_FLAG(symbol, "no_ref_wraparound_constraint_flag"); cinfo->setNoRefWraparoundConstraintFlag(symbol > 0 ? true : false); READ_FLAG(symbol, "no_temporal_mvp_constraint_flag"); cinfo->setNoTemporalMvpConstraintFlag(symbol > 0 ? true : false); READ_FLAG(symbol, "no_sbtmvp_constraint_flag"); cinfo->setNoSbtmvpConstraintFlag(symbol > 0 ? true : false); READ_FLAG(symbol, "no_amvr_constraint_flag"); cinfo->setNoAmvrConstraintFlag(symbol > 0 ? true : false); READ_FLAG(symbol, "no_bdof_constraint_flag"); cinfo->setNoBdofConstraintFlag(symbol > 0 ? true : false); + READ_FLAG(symbol, "no_dmvr_constraint_flag"); cinfo->setNoDmvrConstraintFlag(symbol > 0 ? true : false); READ_FLAG(symbol, "no_cclm_constraint_flag"); cinfo->setNoCclmConstraintFlag(symbol > 0 ? true : false); READ_FLAG(symbol, "no_mts_constraint_flag"); cinfo->setNoMtsConstraintFlag(symbol > 0 ? true : false); + READ_FLAG(symbol, "no_sbt_constraint_flag"); cinfo->setNoSbtConstraintFlag(symbol > 0 ? true : false); READ_FLAG(symbol, "no_affine_motion_constraint_flag"); cinfo->setNoAffineMotionConstraintFlag(symbol > 0 ? true : false); - READ_FLAG(symbol, "no_gbi_constraint_flag"); cinfo->setNoGbiConstraintFlag(symbol > 0 ? true : false); - READ_FLAG(symbol, "no_mh_intra_constraint_flag"); cinfo->setNoMhIntraConstraintFlag(symbol > 0 ? true : false); + READ_FLAG(symbol, "no_bcw_constraint_flag"); cinfo->setNoBcwConstraintFlag(symbol > 0 ? true : false); + READ_FLAG(symbol, "no_ibc_constraint_flag"); cinfo->setNoIbcConstraintFlag(symbol > 0 ? true : false); + READ_FLAG(symbol, "no_ciip_constraint_flag"); cinfo->setNoCiipConstraintFlag(symbol > 0 ? true : false); + READ_FLAG(symbol, "no_fpel_mmvd_constraint_flag"); cinfo->setNoFPelMmvdConstraintFlag(symbol > 0 ? true : false); READ_FLAG(symbol, "no_triangle_constraint_flag"); cinfo->setNoTriangleConstraintFlag(symbol > 0 ? true : false); READ_FLAG(symbol, "no_ladf_constraint_flag"); cinfo->setNoLadfConstraintFlag(symbol > 0 ? true : false); - READ_FLAG(symbol, "no_curr_pic_ref_constraint_flag"); cinfo->setNoCurrPicRefConstraintFlag(symbol > 0 ? true : false); + READ_FLAG(symbol, "no_transform_skip_constraint_flag"); cinfo->setNoTransformSkipConstraintFlag(symbol > 0 ? true : false); + READ_FLAG(symbol, "no_bdpcm_constraint_flag"); cinfo->setNoBDPCMConstraintFlag(symbol > 0 ? true : false); READ_FLAG(symbol, "no_qp_delta_constraint_flag"); cinfo->setNoQpDeltaConstraintFlag(symbol > 0 ? true : false); READ_FLAG(symbol, "no_dep_quant_constraint_flag"); cinfo->setNoDepQuantConstraintFlag(symbol > 0 ? true : false); READ_FLAG(symbol, "no_sign_data_hiding_constraint_flag"); cinfo->setNoSignDataHidingConstraintFlag(symbol > 0 ? true : false); + READ_FLAG(symbol, "no_trail_constraint_flag"); cinfo->setNoTrailConstraintFlag(symbol > 0 ? true : false); + READ_FLAG(symbol, "no_stsa_constraint_flag"); cinfo->setNoStsaConstraintFlag(symbol > 0 ? true : false); + READ_FLAG(symbol, "no_rasl_constraint_flag"); cinfo->setNoRaslConstraintFlag(symbol > 0 ? true : false); + READ_FLAG(symbol, "no_radl_constraint_flag"); cinfo->setNoRadlConstraintFlag(symbol > 0 ? true : false); + READ_FLAG(symbol, "no_idr_constraint_flag"); cinfo->setNoIdrConstraintFlag(symbol > 0 ? true : false); + READ_FLAG(symbol, "no_cra_constraint_flag"); cinfo->setNoCraConstraintFlag(symbol > 0 ? true : false); + READ_FLAG(symbol, "no_gdr_constraint_flag"); cinfo->setNoGdrConstraintFlag(symbol > 0 ? true : false); + READ_FLAG(symbol, "no_aps_constraint_flag"); cinfo->setNoApsConstraintFlag(symbol > 0 ? true : false); } @@ -2113,7 +3152,16 @@ void HLSyntaxReader::parseProfileTierLevel(ProfileTierLevel *ptl, int maxNumSubL parseConstraintInfo( ptl->getConstraintInfo() ); - READ_CODE(8 , symbol, "general_level_idc" ); ptl->setLevelIdc (Level::Name(symbol)); + READ_CODE( 8, symbol, "general_level_idc" ); ptl->setLevelIdc( Level::Name( symbol ) ); + + READ_CODE(8, symbol, "num_sub_profiles"); + uint8_t numSubProfiles = symbol; + ptl->setNumSubProfile( numSubProfiles ); + for (int i = 0; i < numSubProfiles; i++) + { + READ_CODE(32, symbol, "general_sub_profile_idc[i]"); ptl->setSubProfileIdc(i, symbol); + } + for (int i = 0; i < maxNumSubLayersMinus1; i++) { @@ -2132,118 +3180,17 @@ void HLSyntaxReader::parseProfileTierLevel(ProfileTierLevel *ptl, int maxNumSubL READ_CODE(8 , symbol, "sub_layer_level_idc" ); ptl->setSubLayerLevelIdc (i, Level::Name(symbol)); } } -} - - -#else -void HLSyntaxReader::parsePTL( PTL *rpcPTL, bool profilePresentFlag, int maxNumSubLayersMinus1 ) -{ - uint32_t uiCode; - if(profilePresentFlag) - { - parseProfileTier(rpcPTL->getGeneralPTL(), false); - } - READ_CODE( 8, uiCode, "general_level_idc" ); rpcPTL->getGeneralPTL()->setLevelIdc(Level::Name(uiCode)); - - for (int i = 0; i < maxNumSubLayersMinus1; i++) - { - READ_FLAG( uiCode, "sub_layer_profile_present_flag[i]" ); rpcPTL->setSubLayerProfilePresentFlag(i, uiCode); - READ_FLAG( uiCode, "sub_layer_level_present_flag[i]" ); rpcPTL->setSubLayerLevelPresentFlag (i, uiCode); - } - - if (maxNumSubLayersMinus1 > 0) - { - for (int i = maxNumSubLayersMinus1; i < 8; i++) - { - READ_CODE(2, uiCode, "reserved_zero_2bits"); - CHECK(uiCode != 0, "Invalid code"); - } - } - - for(int i = 0; i < maxNumSubLayersMinus1; i++) + ptl->setSubLayerLevelIdc(maxNumSubLayersMinus1, ptl->getLevelIdc()); + for( int i = maxNumSubLayersMinus1 - 1; i >= 0; i-- ) { - if( rpcPTL->getSubLayerProfilePresentFlag(i) ) - { - parseProfileTier(rpcPTL->getSubLayerPTL(i), true); - } - if(rpcPTL->getSubLayerLevelPresentFlag(i)) + if( !ptl->getSubLayerLevelPresentFlag( i ) ) { - READ_CODE( 8, uiCode, "sub_layer_level_idc[i]" ); rpcPTL->getSubLayerPTL(i)->setLevelIdc(Level::Name(uiCode)); + ptl->setSubLayerLevelIdc( i, ptl->getSubLayerLevelIdc( i + 1 ) ); } } } -#if ENABLE_TRACING|| RExt__DECODER_DEBUG_BIT_STATISTICS -void HLSyntaxReader::parseProfileTier(ProfileTierLevel *ptl, const bool bIsSubLayer) -#define PTL_TRACE_TEXT(txt) bIsSubLayer?("sub_layer_" txt) : ("general_" txt) -#else -void HLSyntaxReader::parseProfileTier(ProfileTierLevel *ptl, const bool /*bIsSubLayer*/) -#define PTL_TRACE_TEXT(txt) txt -#endif -{ - uint32_t uiCode; - READ_CODE(2 , uiCode, PTL_TRACE_TEXT("profile_space" )); ptl->setProfileSpace(uiCode); - READ_FLAG( uiCode, PTL_TRACE_TEXT("tier_flag" )); ptl->setTierFlag (uiCode ? Level::HIGH : Level::MAIN); - READ_CODE(5 , uiCode, PTL_TRACE_TEXT("profile_idc" )); ptl->setProfileIdc (Profile::Name(uiCode)); - for(int j = 0; j < 32; j++) - { - READ_FLAG( uiCode, PTL_TRACE_TEXT("profile_compatibility_flag[][j]" )); ptl->setProfileCompatibilityFlag(j, uiCode ? 1 : 0); - } - READ_FLAG(uiCode, PTL_TRACE_TEXT("progressive_source_flag" )); ptl->setProgressiveSourceFlag(uiCode ? true : false); - - READ_FLAG(uiCode, PTL_TRACE_TEXT("interlaced_source_flag" )); ptl->setInterlacedSourceFlag(uiCode ? true : false); - - READ_FLAG(uiCode, PTL_TRACE_TEXT("non_packed_constraint_flag" )); ptl->setNonPackedConstraintFlag(uiCode ? true : false); - - READ_FLAG(uiCode, PTL_TRACE_TEXT("frame_only_constraint_flag" )); ptl->setFrameOnlyConstraintFlag(uiCode ? true : false); - - if (ptl->getProfileIdc() == Profile::MAINREXT || ptl->getProfileCompatibilityFlag(Profile::MAINREXT) || - ptl->getProfileIdc() == Profile::HIGHTHROUGHPUTREXT || ptl->getProfileCompatibilityFlag(Profile::HIGHTHROUGHPUTREXT)) - { - uint32_t maxBitDepth=16; - READ_FLAG( uiCode, PTL_TRACE_TEXT("max_12bit_constraint_flag" )); if (uiCode) maxBitDepth=12; - READ_FLAG( uiCode, PTL_TRACE_TEXT("max_10bit_constraint_flag" )); if (uiCode) maxBitDepth=10; - READ_FLAG( uiCode, PTL_TRACE_TEXT("max_8bit_constraint_flag" )); if (uiCode) maxBitDepth=8; - ptl->setBitDepthConstraint(maxBitDepth); - ChromaFormat chromaFmtConstraint=CHROMA_444; - READ_FLAG( uiCode, PTL_TRACE_TEXT("max_422chroma_constraint_flag" )); if (uiCode) chromaFmtConstraint=CHROMA_422; - READ_FLAG( uiCode, PTL_TRACE_TEXT("max_420chroma_constraint_flag" )); if (uiCode) chromaFmtConstraint=CHROMA_420; - READ_FLAG( uiCode, PTL_TRACE_TEXT("max_monochrome_constraint_flag" )); if (uiCode) chromaFmtConstraint=CHROMA_400; - ptl->setChromaFormatConstraint(chromaFmtConstraint); - READ_FLAG( uiCode, PTL_TRACE_TEXT("intra_constraint_flag" )); ptl->setIntraConstraintFlag(uiCode != 0); - READ_FLAG( uiCode, PTL_TRACE_TEXT("one_picture_only_constraint_flag")); ptl->setOnePictureOnlyConstraintFlag(uiCode != 0); - READ_FLAG( uiCode, PTL_TRACE_TEXT("lower_bit_rate_constraint_flag" )); ptl->setLowerBitRateConstraintFlag(uiCode != 0); - READ_CODE(16, uiCode, PTL_TRACE_TEXT("reserved_zero_34bits[0..15]" )); - READ_CODE(16, uiCode, PTL_TRACE_TEXT("reserved_zero_34bits[16..31]" )); - READ_CODE(2, uiCode, PTL_TRACE_TEXT("reserved_zero_34bits[32..33]" )); - } - else - { - ptl->setBitDepthConstraint( ( ptl->getProfileIdc() == Profile::MAIN10 || ptl->getProfileIdc() == Profile::NEXT ) ? 10 : 8 ); - ptl->setChromaFormatConstraint(CHROMA_420); - ptl->setIntraConstraintFlag(false); - ptl->setLowerBitRateConstraintFlag(true); - READ_CODE(16, uiCode, PTL_TRACE_TEXT("reserved_zero_43bits[0..15]" )); - READ_CODE(16, uiCode, PTL_TRACE_TEXT("reserved_zero_43bits[16..31]" )); - READ_CODE(11, uiCode, PTL_TRACE_TEXT("reserved_zero_43bits[32..42]" )); - } - if ((ptl->getProfileIdc() >= Profile::MAIN && ptl->getProfileIdc() <= Profile::HIGHTHROUGHPUTREXT) || - ptl->getProfileCompatibilityFlag(Profile::MAIN) || - ptl->getProfileCompatibilityFlag(Profile::MAIN10) || - ptl->getProfileCompatibilityFlag(Profile::MAINSTILLPICTURE) || - ptl->getProfileCompatibilityFlag(Profile::MAINREXT) || - ptl->getProfileCompatibilityFlag(Profile::HIGHTHROUGHPUTREXT) ) - { - READ_FLAG( uiCode, PTL_TRACE_TEXT("inbld_flag" )); CHECK(uiCode != 0, "Invalid code"); - } - else - { - READ_FLAG( uiCode, PTL_TRACE_TEXT("reserved_zero_bit" )); - } -#undef PTL_TRACE_TEXT -} -#endif void HLSyntaxReader::parseTerminatingBit( uint32_t& ruiBit ) { @@ -2406,57 +3353,49 @@ void HLSyntaxReader::parsePredWeightTable( Slice* pcSlice, const SPS *sps ) CHECK(uiTotalSignalledWeightFlags>24, "Too many weight flag signalled"); } -#if HEVC_USE_SCALING_LISTS /** decode quantization matrix * \param scalingList quantization matrix information */ void HLSyntaxReader::parseScalingList(ScalingList* scalingList) { - uint32_t code, sizeId, listId; - bool scalingListPredModeFlag; - //for each size - for(sizeId = SCALING_LIST_FIRST_CODED; sizeId <= SCALING_LIST_LAST_CODED; sizeId++) + uint32_t code; + bool scalingListCopyModeFlag; + READ_FLAG(code, "scaling_matrix_for_lfnst_disabled_flag"); scalingList->setDisableScalingMatrixForLfnstBlks(code ? true : false); + for (int scalingListId = 0; scalingListId < 28; scalingListId++) { - for(listId = 0; listId < SCALING_LIST_NUM; listId++) - { - if ((sizeId==SCALING_LIST_32x32) && (listId%(SCALING_LIST_NUM/NUMBER_OF_PREDICTION_MODES) != 0)) - { - int *src = scalingList->getScalingListAddress(sizeId, listId); - const int size = std::min(MAX_MATRIX_COEF_NUM,(int)g_scalingListSize[sizeId]); - const int *srcNextSmallerSize = scalingList->getScalingListAddress(sizeId-1, listId); - for(int i=0; i<size; i++) - { - src[i] = srcNextSmallerSize[i]; - } - scalingList->setScalingListDC(sizeId,listId,(sizeId > SCALING_LIST_8x8) ? scalingList->getScalingListDC(sizeId-1, listId) : src[0]); - } - else - { - READ_FLAG( code, "scaling_list_pred_mode_flag"); - scalingListPredModeFlag = (code) ? true : false; - scalingList->setScalingListPredModeFlag(sizeId, listId, scalingListPredModeFlag); - if(!scalingListPredModeFlag) //Copy Mode - { - READ_UVLC( code, "scaling_list_pred_matrix_id_delta"); - - if (sizeId==SCALING_LIST_32x32) - { - code*=(SCALING_LIST_NUM/NUMBER_OF_PREDICTION_MODES); // Adjust the decoded code for this size, to cope with the missing 32x32 chroma entries. - } + READ_FLAG(code, "scaling_list_copy_mode_flag"); + scalingListCopyModeFlag = (code) ? true : false; + scalingList->setScalingListCopyModeFlag(scalingListId, scalingListCopyModeFlag); - scalingList->setRefMatrixId (sizeId,listId,(uint32_t)((int)(listId)-(code))); - if( sizeId > SCALING_LIST_8x8 ) - { - scalingList->setScalingListDC(sizeId,listId,((listId == scalingList->getRefMatrixId (sizeId,listId))? 16 :scalingList->getScalingListDC(sizeId, scalingList->getRefMatrixId (sizeId,listId)))); - } - scalingList->processRefMatrix( sizeId, listId, scalingList->getRefMatrixId (sizeId,listId)); + scalingList->setScalingListPreditorModeFlag(scalingListId, false); + if (!scalingListCopyModeFlag) + { + READ_FLAG(code, "scaling_list_predictor_mode_flag"); + scalingList->setScalingListPreditorModeFlag(scalingListId, code); + } - } - else //DPCM Mode - { - decodeScalingList(scalingList, sizeId, listId); - } + if ((scalingListCopyModeFlag || scalingList->getScalingListPreditorModeFlag(scalingListId)) && scalingListId!= SCALING_LIST_1D_START_2x2 && scalingListId!= SCALING_LIST_1D_START_4x4 && scalingListId!= SCALING_LIST_1D_START_8x8) //Copy Mode + { + READ_UVLC(code, "scaling_list_pred_matrix_id_delta"); + scalingList->setRefMatrixId(scalingListId, (uint32_t)((int)(scalingListId)-(code))); + } + else if (scalingListCopyModeFlag || scalingList->getScalingListPreditorModeFlag(scalingListId)) + { + scalingList->setRefMatrixId(scalingListId, (uint32_t)((int)(scalingListId))); + } + if (scalingListCopyModeFlag)//copy + { + if (scalingListId >= SCALING_LIST_1D_START_16x16) + { + scalingList->setScalingListDC(scalingListId, + ((scalingListId == scalingList->getRefMatrixId(scalingListId)) ? 16 + : (scalingList->getRefMatrixId(scalingListId) < SCALING_LIST_1D_START_16x16) ? scalingList->getScalingListAddress(scalingList->getRefMatrixId(scalingListId))[0] : scalingList->getScalingListDC(scalingList->getRefMatrixId(scalingListId)))); } + scalingList->processRefMatrix(scalingListId, scalingList->getRefMatrixId(scalingListId)); + } + else + { + decodeScalingList(scalingList, scalingListId, scalingList->getScalingListPreditorModeFlag(scalingListId)); } } @@ -2468,30 +3407,47 @@ void HLSyntaxReader::parseScalingList(ScalingList* scalingList) * \param sizeId size index * \param listId list index */ -void HLSyntaxReader::decodeScalingList(ScalingList *scalingList, uint32_t sizeId, uint32_t listId) +void HLSyntaxReader::decodeScalingList(ScalingList *scalingList, uint32_t scalingListId, bool isPredictor) { - int i,coefNum = std::min(MAX_MATRIX_COEF_NUM,(int)g_scalingListSize[sizeId]); + int matrixSize = (scalingListId < SCALING_LIST_1D_START_4x4) ? 2 : (scalingListId < SCALING_LIST_1D_START_8x8) ? 4 : 8; + int i, coefNum = matrixSize * matrixSize; int data; int scalingListDcCoefMinus8 = 0; - int nextCoef = SCALING_LIST_START_VALUE; - uint32_t* scan = g_scanOrder[SCAN_UNGROUPED][SCAN_DIAG][gp_sizeIdxInfo->idxFrom( 1 << ( sizeId == SCALING_LIST_FIRST_CODED ? 2 : 3 ) )][gp_sizeIdxInfo->idxFrom( 1 << ( sizeId == SCALING_LIST_FIRST_CODED ? 2 : 3 ) )]; - int *dst = scalingList->getScalingListAddress(sizeId, listId); + int nextCoef = (isPredictor) ? 0 : SCALING_LIST_START_VALUE; + ScanElement *scan = g_scanOrder[SCAN_UNGROUPED][SCAN_DIAG][gp_sizeIdxInfo->idxFrom(matrixSize)][gp_sizeIdxInfo->idxFrom(matrixSize)]; + int *dst = scalingList->getScalingListAddress(scalingListId); + + int PredListId = scalingList->getRefMatrixId(scalingListId); + CHECK(isPredictor && PredListId > scalingListId, "Scaling List error predictor!"); + const int *srcPred = (isPredictor) ? ((scalingListId == PredListId) ? scalingList->getScalingListDefaultAddress(scalingListId) : scalingList->getScalingListAddress(PredListId)) : NULL; + if(isPredictor && scalingListId == PredListId) + scalingList->setScalingListDC(PredListId, SCALING_LIST_DC); + int predCoef = 0; - if( sizeId > SCALING_LIST_8x8 ) + if (scalingListId >= SCALING_LIST_1D_START_16x16) { - READ_SVLC( scalingListDcCoefMinus8, "scaling_list_dc_coef_minus8"); - scalingList->setScalingListDC(sizeId,listId,scalingListDcCoefMinus8 + 8); - nextCoef = scalingList->getScalingListDC(sizeId,listId); + READ_SVLC(scalingListDcCoefMinus8, "scaling_list_dc_coef_minus8"); + nextCoef += scalingListDcCoefMinus8; + if (isPredictor) + { + predCoef = (PredListId >= SCALING_LIST_1D_START_16x16) ? scalingList->getScalingListDC(PredListId) : srcPred[0]; + } + scalingList->setScalingListDC(scalingListId, (nextCoef + predCoef + 256) & 255); } for(i = 0; i < coefNum; i++) { + if (scalingListId >= SCALING_LIST_1D_START_64x64 && scan[i].x >= 4 && scan[i].y >= 4) + { + dst[scan[i].idx] = 0; + continue; + } READ_SVLC( data, "scaling_list_delta_coef"); - nextCoef = (nextCoef + data + 256 ) % 256; - dst[scan[i]] = nextCoef; + nextCoef += data; + predCoef = (isPredictor) ? srcPred[scan[i].idx] : 0; + dst[scan[i].idx] = (nextCoef + predCoef + 256) & 255; } } -#endif bool HLSyntaxReader::xMoreRbspData() { @@ -2522,183 +3478,91 @@ bool HLSyntaxReader::xMoreRbspData() return (cnt>0); } - -int HLSyntaxReader::alfGolombDecode( const int k ) +int HLSyntaxReader::alfGolombDecode( const int k, const bool signed_val ) { - uint32_t uiSymbol; - int q = -1; - int nr = 0; - int m = (int)pow( 2.0, k ); - int a; - - uiSymbol = 1; - while( uiSymbol ) + int numLeadingBits = -1; + uint32_t b = 0; + for (; !b; numLeadingBits++) { #if RExt__DECODER_DEBUG_BIT_STATISTICS - xReadFlag( uiSymbol, "" ); + xReadFlag( b, "" ); #else - xReadFlag( uiSymbol ); + READ_FLAG( b, "alf_coeff_abs_prefix"); #endif - q++; } - for( a = 0; a < k; ++a ) // read out the sequential log2(M) bits + int symbol = ( ( 1 << numLeadingBits ) - 1 ) << k; + if ( numLeadingBits + k > 0) { -#if RExt__DECODER_DEBUG_BIT_STATISTICS - xReadFlag( uiSymbol, "" ); -#else - xReadFlag( uiSymbol ); -#endif - if( uiSymbol ) - { - nr += 1 << a; - } + uint32_t bins; + READ_CODE( numLeadingBits + k, bins, "alf_coeff_abs_suffix" ); + symbol += bins; } - nr += q * m; // add the bits and the multiple of M - if( nr != 0 ) + + if ( signed_val && symbol != 0 ) { #if RExt__DECODER_DEBUG_BIT_STATISTICS - xReadFlag( uiSymbol, "" ); + xReadFlag( b, "" ); #else - xReadFlag( uiSymbol ); + READ_FLAG( b, "alf_coeff_sign" ); #endif - nr = ( uiSymbol ) ? nr : -nr; + symbol = ( b ) ? -symbol : symbol; } - return nr; + return symbol; } -void HLSyntaxReader::alfFilter( AlfSliceParam& alfSliceParam, const bool isChroma ) +void HLSyntaxReader::alfFilter( AlfParam& alfParam, const bool isChroma, const int altIdx ) { uint32_t code; - if( !isChroma ) - { - READ_FLAG( code, "alf_luma_coeff_delta_flag" ); - alfSliceParam.alfLumaCoeffDeltaFlag = code; - - if( !alfSliceParam.alfLumaCoeffDeltaFlag ) - { - std::memset( alfSliceParam.alfLumaCoeffFlag, true, sizeof( alfSliceParam.alfLumaCoeffFlag ) ); - - if( alfSliceParam.numLumaFilters > 1 ) - { - READ_FLAG( code, "alf_luma_coeff_delta_prediction_flag" ); - alfSliceParam.alfLumaCoeffDeltaPredictionFlag = code; - } - else - { - alfSliceParam.alfLumaCoeffDeltaPredictionFlag = 0; - } - } - else - { - alfSliceParam.alfLumaCoeffDeltaPredictionFlag = 0; - } - } // derive maxGolombIdx AlfFilterShape alfShape( isChroma ? 5 : 7 ); - const int maxGolombIdx = AdaptiveLoopFilter::getMaxGolombIdx( alfShape.filterType ); - READ_UVLC( code, isChroma ? "alf_chroma_min_eg_order_minus1" : "alf_luma_min_eg_order_minus1" ); - - int kMin = code + 1; - static int kMinTab[MAX_NUM_ALF_COEFF]; - const int numFilters = isChroma ? 1 : alfSliceParam.numLumaFilters; - short* coeff = isChroma ? alfSliceParam.chromaCoeff : alfSliceParam.lumaCoeff; - - for( int idx = 0; idx < maxGolombIdx; idx++ ) - { - READ_FLAG( code, isChroma ? "alf_chroma_eg_order_increase_flag" : "alf_luma_eg_order_increase_flag" ); - CHECK( code > 1, "Wrong golomb_order_increase_flag" ); - kMinTab[idx] = kMin + code; - kMin = kMinTab[idx]; - } + const int numFilters = isChroma ? 1 : alfParam.numLumaFilters; + short* coeff = isChroma ? alfParam.chromaCoeff[altIdx] : alfParam.lumaCoeff; + short* clipp = isChroma ? alfParam.chromaClipp[altIdx] : alfParam.lumaClipp; - if( !isChroma ) - { - if( alfSliceParam.alfLumaCoeffDeltaFlag ) - { - for( int ind = 0; ind < alfSliceParam.numLumaFilters; ++ind ) - { - READ_FLAG( code, "alf_luma_coeff_flag[i]" ); - alfSliceParam.alfLumaCoeffFlag[ind] = code; - } - } - } // Filter coefficients for( int ind = 0; ind < numFilters; ++ind ) { - if( !isChroma && !alfSliceParam.alfLumaCoeffFlag[ind] && alfSliceParam.alfLumaCoeffDeltaFlag ) - { - memset( coeff + ind * MAX_NUM_ALF_LUMA_COEFF, 0, sizeof( *coeff ) * alfShape.numCoeff ); - continue; - } for( int i = 0; i < alfShape.numCoeff - 1; i++ ) { - coeff[ind * MAX_NUM_ALF_LUMA_COEFF + i] = alfGolombDecode( kMinTab[alfShape.golombIdx[i]] ); + coeff[ind * MAX_NUM_ALF_LUMA_COEFF + i] = alfGolombDecode( 3 ); + CHECK( isChroma && + ( coeff[ind * MAX_NUM_ALF_LUMA_COEFF + i] > 127 || coeff[ind * MAX_NUM_ALF_LUMA_COEFF + i] < -127 ) + , "AlfCoeffC shall be in the range of -127 to 127, inclusive" ); } } -} -int HLSyntaxReader::truncatedUnaryEqProb( const int maxSymbol ) -{ - for( int k = 0; k < maxSymbol; k++ ) - { - uint32_t symbol; -#if RExt__DECODER_DEBUG_BIT_STATISTICS - xReadFlag( symbol, "" ); + // Clipping values coding +#if JVET_Q0249_ALF_CHROMA_CLIPFLAG + if ( alfParam.nonLinearFlag[isChroma] ) #else - xReadFlag( symbol ); + if ( alfParam.nonLinearFlag[isChroma][altIdx] ) #endif + { - if( !symbol ) + // Filter coefficients + for( int ind = 0; ind < numFilters; ++ind ) { - return k; - } - } - return maxSymbol; -} -void HLSyntaxReader::xReadTruncBinCode( uint32_t& ruiSymbol, const int uiMaxSymbol ) -{ - int uiThresh; - if( uiMaxSymbol > 256 ) - { - int uiThreshVal = 1 << 8; - uiThresh = 8; - while( uiThreshVal <= uiMaxSymbol ) - { - uiThresh++; - uiThreshVal <<= 1; + for( int i = 0; i < alfShape.numCoeff - 1; i++ ) + { + READ_CODE(2, code, "alf_clipping_index"); + clipp[ind * MAX_NUM_ALF_LUMA_COEFF + i] = code; + } } - uiThresh--; } else { - uiThresh = g_tbMax[uiMaxSymbol]; - } - - int uiVal = 1 << uiThresh; - int b = uiMaxSymbol - uiVal; -#if RExt__DECODER_DEBUG_BIT_STATISTICS - xReadCode( uiThresh, ruiSymbol, "" ); -#else - xReadCode( uiThresh, ruiSymbol ); -#endif - if( ruiSymbol >= uiVal - b ) - { - uint32_t uiSymbol; -#if RExt__DECODER_DEBUG_BIT_STATISTICS - xReadFlag( uiSymbol, "" ); -#else - xReadFlag( uiSymbol ); -#endif - ruiSymbol <<= 1; - ruiSymbol += uiSymbol; - ruiSymbol -= ( uiVal - b ); + for( int ind = 0; ind < numFilters; ++ind ) + { + std::fill_n( clipp + ind * MAX_NUM_ALF_LUMA_COEFF, alfShape.numCoeff, 0 ); + } } } + //! \} diff --git a/source/Lib/DecoderLib/VLCReader.h b/source/Lib/DecoderLib/VLCReader.h index 01117b9f861d9c1892180848f638c93bf79b6178..69e3f479ca770b34ac1efdb3ebb54d0bb22d7ad3 100644 --- a/source/Lib/DecoderLib/VLCReader.h +++ b/source/Lib/DecoderLib/VLCReader.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * -* Copyright (c) 2010-2019, ITU/ISO/IEC +* Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -46,6 +46,7 @@ #if ENABLE_TRACING +#define READ_SCODE(length, code, name) xReadSCode ( length, code, name ) #define READ_CODE(length, code, name) xReadCodeTr ( length, code, name ) #define READ_UVLC( code, name) xReadUvlcTr ( code, name ) #define READ_SVLC( code, name) xReadSvlcTr ( code, name ) @@ -55,6 +56,7 @@ #if RExt__DECODER_DEBUG_BIT_STATISTICS +#define READ_SCODE(length, code, name) xReadSCode( length, code, name ) #define READ_CODE(length, code, name) xReadCode ( length, code, name ) #define READ_UVLC( code, name) xReadUvlc ( code, name ) #define READ_SVLC( code, name) xReadSvlc ( code, name ) @@ -62,6 +64,7 @@ #else +#define READ_SCODE(length, code, name) xReadSCode ( length, code ) #define READ_CODE(length, code, name) xReadCode ( length, code ) #define READ_UVLC( code, name) xReadUvlc ( code ) #define READ_SVLC( code, name) xReadSvlc ( code ) @@ -103,15 +106,19 @@ protected: void xReadSvlcTr ( int& rValue, const char *pSymbolName ); void xReadFlagTr ( uint32_t& rValue, const char *pSymbolName ); #endif +#if RExt__DECODER_DEBUG_BIT_STATISTICS || ENABLE_TRACING + void xReadSCode ( uint32_t length, int& val, const char *pSymbolName ); +#else + void xReadSCode ( uint32_t length, int& val ); +#endif + public: void setBitstream ( InputBitstream* p ) { m_pcBitstream = p; } InputBitstream* getBitstream() { return m_pcBitstream; } protected: void xReadRbspTrailingBits(); -#if JVET_M0101_HLS bool isByteAligned() { return (m_pcBitstream->getNumBitsUntilByteAligned() == 0 ); } -#endif }; @@ -143,41 +150,37 @@ public: virtual ~HLSyntaxReader(); protected: - void parseShortTermRefPicSet (SPS* pcSPS, ReferencePictureSet* pcRPS, int idx); + void copyRefPicList(SPS* pcSPS, ReferencePictureList* source_rpl, ReferencePictureList* dest_rpl); + void parseRefPicList(SPS* pcSPS, ReferencePictureList* rpl); public: void setBitstream ( InputBitstream* p ) { m_pcBitstream = p; } -#if HEVC_VPS void parseVPS ( VPS* pcVPS ); -#endif + void parseDPS ( DPS* dps ); void parseSPS ( SPS* pcSPS ); - void parsePPS ( PPS* pcPPS ); - void parseAPS ( APS* pcAPS); + void parsePPS ( PPS* pcPPS, ParameterSetManager *parameterSetManager ); + void parseAPS ( APS* pcAPS ); + void parseAlfAps ( APS* pcAPS ); + void parseLmcsAps ( APS* pcAPS ); + void parseScalingListAps ( APS* pcAPS ); void parseVUI ( VUI* pcVUI, SPS* pcSPS ); -#if !JVET_M0101_HLS - void parsePTL ( PTL *rpcPTL, bool profilePresentFlag, int maxNumSubLayersMinus1 ); - void parseProfileTier ( ProfileTierLevel *ptl, const bool bIsSubLayer ); -#else void parseConstraintInfo (ConstraintInfo *cinfo); void parseProfileTierLevel ( ProfileTierLevel *ptl, int maxNumSubLayersMinus1); -#endif - void parseHrdParameters ( HRD *hrd, bool cprms_present_flag, uint32_t tempLevelHigh ); - void parseSliceHeader ( Slice* pcSlice, ParameterSetManager *parameterSetManager, const int prevTid0POC ); + void parseHrdParameters ( HRDParameters *hrd, uint32_t firstSubLayer, uint32_t tempLevelHigh ); + void parsePictureHeader ( PicHeader* picHeader, ParameterSetManager *parameterSetManager ); + void parseSliceHeader ( Slice* pcSlice, PicHeader* picHeader, ParameterSetManager *parameterSetManager, const int prevTid0POC ); + void parseSliceHeaderToPoc ( Slice* pcSlice, PicHeader* picHeader, ParameterSetManager *parameterSetManager, const int prevTid0POC ); void parseTerminatingBit ( uint32_t& ruiBit ); void parseRemainingBytes ( bool noTrailingBytesExpected ); void parsePredWeightTable( Slice* pcSlice, const SPS *sps ); -#if HEVC_USE_SCALING_LISTS void parseScalingList ( ScalingList* scalingList ); - void decodeScalingList ( ScalingList *scalingList, uint32_t sizeId, uint32_t listId); -#endif + void decodeScalingList ( ScalingList *scalingList, uint32_t scalingListId, bool isPredictor); void parseReshaper ( SliceReshapeInfo& sliceReshaperInfo, const SPS* pcSPS, const bool isIntra ); - void alfFilter( AlfSliceParam& alfSliceParam, const bool isChroma ); + void alfFilter( AlfParam& alfParam, const bool isChroma, const int altIdx ); private: - int truncatedUnaryEqProb( const int maxSymbol ); - void xReadTruncBinCode( uint32_t& ruiSymbol, const int uiMaxSymbol ); - int alfGolombDecode( const int k ); + int alfGolombDecode( const int k, const bool signed_val=true ); protected: bool xMoreRbspData(); diff --git a/source/Lib/EncoderLib/AQp.cpp b/source/Lib/EncoderLib/AQp.cpp index bb6effc838cb7f97af26a933aff6bed0f7d5be7e..a157e1c854416e142fb11b39f1b5c27472732f29 100644 --- a/source/Lib/EncoderLib/AQp.cpp +++ b/source/Lib/EncoderLib/AQp.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/source/Lib/EncoderLib/AQp.h b/source/Lib/EncoderLib/AQp.h index b64d66a7dfe7b40bea3d2b63d87f5fafd8275e02..4e62ed1eee3034cd005c2e036041e12e59cebfdd 100644 --- a/source/Lib/EncoderLib/AQp.h +++ b/source/Lib/EncoderLib/AQp.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/source/Lib/EncoderLib/Analyze.h b/source/Lib/EncoderLib/Analyze.h index 086c834bf6c9bcd3a4306d526c5e40c2a5c904fe..c4faaad1c5eb3aed1bfdd79e9ab131332cc11bb5 100644 --- a/source/Lib/EncoderLib/Analyze.h +++ b/source/Lib/EncoderLib/Analyze.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -69,15 +69,21 @@ private: uint32_t m_uiNumPic; double m_dFrmRate; //--CFG_KDY double m_MSEyuvframe[MAX_NUM_COMPONENT]; // sum of MSEs + double m_upscaledPSNR[MAX_NUM_COMPONENT]; #if EXTENSION_360_VIDEO TExt360EncAnalyze m_ext360; #endif +#if JVET_O0756_CALCULATE_HDRMETRICS + double m_logDeltaESum[hdrtoolslib::NB_REF_WHITE]; + double m_psnrLSum[hdrtoolslib::NB_REF_WHITE]; +#endif public: virtual ~Analyze() {} Analyze() { clear(); } void addResult( double psnr[MAX_NUM_COMPONENT], double bits, const double MSEyuvframe[MAX_NUM_COMPONENT] + , const double upscaledPSNR[MAX_NUM_COMPONENT] , bool isEncodeLtRef ) { @@ -88,6 +94,7 @@ public: { m_dPSNRSum[i] += psnr[i]; m_MSEyuvframe[i] += MSEyuvframe[i]; + m_upscaledPSNR[i] += upscaledPSNR[i]; } m_uiNumPic++; @@ -96,12 +103,26 @@ public: double getWPSNR (const ComponentID compID) const { return m_dPSNRSum[compID] / (double)m_uiNumPic; } #endif double getPsnr(ComponentID compID) const { return m_dPSNRSum[compID]; } +#if JVET_O0756_CALCULATE_HDRMETRICS + double getDeltaE() const { return m_logDeltaESum[0]; } + double getPsnrL() const { return m_psnrLSum[0]; } +#endif double getBits() const { return m_dAddBits; } void setBits(double numBits) { m_dAddBits = numBits; } uint32_t getNumPic() const { return m_uiNumPic; } #if EXTENSION_360_VIDEO TExt360EncAnalyze& getExt360Info() { return m_ext360; } #endif +#if JVET_O0756_CALCULATE_HDRMETRICS + void addHDRMetricsResult(double deltaE[hdrtoolslib::NB_REF_WHITE], double psnrL[hdrtoolslib::NB_REF_WHITE]) + { + for (int i=0; i<hdrtoolslib::NB_REF_WHITE; i++) + { + m_logDeltaESum[i] += deltaE[i]; + m_psnrLSum[i] += psnrL[i]; + } + } +#endif void setFrmRate (double dFrameRate) { m_dFrmRate = dFrameRate; } //--CFG_KDY void clear() @@ -111,10 +132,18 @@ public: { m_dPSNRSum[i] = 0; m_MSEyuvframe[i] = 0; + m_upscaledPSNR[i] = 0; } m_uiNumPic = 0; #if EXTENSION_360_VIDEO m_ext360.clear(); +#endif +#if JVET_O0756_CALCULATE_HDRMETRICS + for (int i=0; i<hdrtoolslib::NB_REF_WHITE; i++) + { + m_logDeltaESum[i] = 0.0; + m_psnrLSum[i] = 0.0; + } #endif } @@ -159,9 +188,17 @@ public: } #if ENABLE_QPA || WCG_WPSNR - void printOut ( char cDelim, const ChromaFormat chFmt, const bool printMSEBasedSNR, const bool printSequenceMSE, const bool printHexPsnr, const BitDepths &bitDepths, const bool useWPSNR = false ) + void printOut( char cDelim, const ChromaFormat chFmt, const bool printMSEBasedSNR, const bool printSequenceMSE, const bool printHexPsnr, const bool printRprPSNR, const BitDepths &bitDepths, const bool useWPSNR = false +#if JVET_O0756_CALCULATE_HDRMETRICS + , const bool printHdrMetrics = false +#endif + ) #else - void printOut ( char cDelim, const ChromaFormat chFmt, const bool printMSEBasedSNR, const bool printSequenceMSE, const bool printHexPsnr, const BitDepths &bitDepths ) + void printOut ( char cDelim, const ChromaFormat chFmt, const bool printMSEBasedSNR, const bool printSequenceMSE, const bool printHexPsnr, const BitDepths &bitDepths +#if JVET_O0756_CALCULATE_HDRMETRICS + , const bool printHdrMetrics = false +#endif + ) #endif { #if !WCG_WPSNR @@ -405,10 +442,16 @@ public: { #if ENABLE_QPA || WCG_WPSNR if (useWPSNR) { - msg( e_msg_level, "\tTotal Frames | " "Bitrate " "Y-WPSNR " "U-WPSNR " "V-WPSNR " "YUV-WPSNR" ); + msg( e_msg_level, "\tTotal Frames | " "Bitrate " "Y-WPSNR " "U-WPSNR " "V-WPSNR " "YUV-WPSNR " ); } else #endif - msg( e_msg_level, "\tTotal Frames | " "Bitrate " "Y-PSNR " "U-PSNR " "V-PSNR " "YUV-PSNR " ); + msg( e_msg_level, "\tTotal Frames | " "Bitrate " "Y-PSNR " "U-PSNR " "V-PSNR " "YUV-PSNR " ); +#if JVET_O0756_CALCULATE_HDRMETRICS + if (printHdrMetrics) + { + msg(e_msg_level, "DeltaE " "PSNRL "); + } +#endif #if EXTENSION_360_VIDEO m_ext360.printHeader(e_msg_level); #endif @@ -417,7 +460,12 @@ public: { msg(e_msg_level, "xY-PSNR " "xU-PSNR " "xV-PSNR "); } - +#if JVET_O0756_CALCULATE_HDRMETRICS + if (printHdrMetrics && printHexPsnr) + { + msg(e_msg_level, "xDeltaE " "xPSNRL "); + } +#endif if (printSequenceMSE) { msg( e_msg_level, " Y-MSE " "U-MSE " "V-MSE " "YUV-MSE \n" ); @@ -442,8 +490,14 @@ public: #if ENABLE_QPA useWPSNR ? getWPSNR(COMPONENT_Cr) : #endif - getPsnr(COMPONENT_Cr) / (double)getNumPic(), - PSNRyuv ); + getPsnr(COMPONENT_Cr) / (double)getNumPic(), + PSNRyuv ); +#if JVET_O0756_CALCULATE_HDRMETRICS + if (printHdrMetrics) + { + msg( e_msg_level, " %8.4lf " "%8.4lf ", getDeltaE()/(double)getNumPic(), getPsnrL()/(double)getNumPic()); + } +#endif #if EXTENSION_360_VIDEO m_ext360.printPSNRs(getNumPic(), e_msg_level); @@ -463,7 +517,33 @@ public: } msg(e_msg_level, " %16" PRIx64 " %16" PRIx64 " %16" PRIx64 , xPsnr[COMPONENT_Y], xPsnr[COMPONENT_Cb], xPsnr[COMPONENT_Cr]); } +#if JVET_O0756_CALCULATE_HDRMETRICS + if (printHexPsnr && printHdrMetrics) + { + double dDeltaE[MAX_NUM_COMPONENT]; + uint64_t xDeltaE[MAX_NUM_COMPONENT]; + for (int i = 0; i < 1; i++) + { + dDeltaE[i] = getDeltaE() / (double)getNumPic(); + copy(reinterpret_cast<uint8_t *>(&dDeltaE[i]), + reinterpret_cast<uint8_t *>(&dDeltaE[i]) + sizeof(dDeltaE[i]), + reinterpret_cast<uint8_t *>(&xDeltaE[i])); + } + + double dPsnrL[MAX_NUM_COMPONENT]; + uint64_t xPsnrL[MAX_NUM_COMPONENT]; + for (int i = 0; i < 1; i++) + { + dPsnrL[i] = getPsnrL() / (double)getNumPic(); + + copy(reinterpret_cast<uint8_t *>(&dPsnrL[i]), + reinterpret_cast<uint8_t *>(&dPsnrL[i]) + sizeof(dPsnrL[i]), + reinterpret_cast<uint8_t *>(&xPsnrL[i])); + } + msg(e_msg_level, " %16" PRIx64 " %16" PRIx64 , xDeltaE[0], xPsnrL[0]); + } +#endif if (printSequenceMSE) { msg( e_msg_level, " %8.4lf " "%8.4lf " "%8.4lf " "%8.4lf\n", @@ -476,6 +556,34 @@ public: { msg( e_msg_level, "\n"); } + if( printRprPSNR ) + { + double psnr[MAX_NUM_COMPONENT]; + for( uint32_t componentIndex = 0; componentIndex < MAX_NUM_COMPONENT; componentIndex++ ) + { + const ComponentID compID = ComponentID( componentIndex ); + + if( getNumPic() == 0 ) + { + psnr[compID] = 0.0; + } + else + { + const uint32_t maxval = 255 << ( bitDepths.recon[toChannelType( compID )] - 8 ); + psnr[compID] = ( m_MSEyuvframe[compID] == 0 ) ? 999.99 : 10.0 * log10( ( maxval * maxval ) / ( m_MSEyuvframe[compID] / (double)getNumPic() ) ); + } + } + + msg( e_msg_level, "\nPSNR1 Y-PSNR " "U-PSNR " "V-PSNR\n" ); + msg( e_msg_level, " %8.4lf " " %8.4lf " " %8.4lf\n", + psnr[COMPONENT_Y], psnr[COMPONENT_Cb], psnr[COMPONENT_Cr] ); + + msg( e_msg_level, "PSNR2 Y-PSNR " "U-PSNR " "V-PSNR\n" ); + msg( e_msg_level, " %8.4lf " " %8.4lf " " %8.4lf\n", + m_upscaledPSNR[COMPONENT_Y] / (double)getNumPic(), + m_upscaledPSNR[COMPONENT_Cb] / (double)getNumPic(), + m_upscaledPSNR[COMPONENT_Cr] / (double)getNumPic()); + } } } break; diff --git a/source/Lib/EncoderLib/AnnexBwrite.h b/source/Lib/EncoderLib/AnnexBwrite.h index b2686d47389ab30da4d2fdf6b522ac3ef2a336a5..37f967684409f3fc59c8b18e3dfeaa99c860eaab 100644 --- a/source/Lib/EncoderLib/AnnexBwrite.h +++ b/source/Lib/EncoderLib/AnnexBwrite.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -59,11 +59,7 @@ static std::vector<uint32_t> writeAnnexB(std::ostream& out, const AccessUnit& au uint32_t size = 0; /* size of annexB unit in bytes */ static const uint8_t start_code_prefix[] = {0,0,0,1}; -#if HEVC_VPS - if (it == au.begin() || nalu.m_nalUnitType == NAL_UNIT_VPS || nalu.m_nalUnitType == NAL_UNIT_SPS || nalu.m_nalUnitType == NAL_UNIT_PPS) -#else - if (it == au.begin() || nalu.m_nalUnitType == NAL_UNIT_SPS || nalu.m_nalUnitType == NAL_UNIT_PPS) -#endif + if (it == au.begin() || nalu.m_nalUnitType == NAL_UNIT_DPS || nalu.m_nalUnitType == NAL_UNIT_SPS || nalu.m_nalUnitType == NAL_UNIT_VPS || nalu.m_nalUnitType == NAL_UNIT_PPS) { /* From AVC, When any of the following conditions are fulfilled, the * zero_byte syntax element shall be present: diff --git a/source/Lib/EncoderLib/BinEncoder.cpp b/source/Lib/EncoderLib/BinEncoder.cpp index ebad19d3e1a517e3a4cb087e59b9f29abc117329..2d94f7765b9e05b77433864d532126729e987c18 100644 --- a/source/Lib/EncoderLib/BinEncoder.cpp +++ b/source/Lib/EncoderLib/BinEncoder.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * -* Copyright (c) 2010-2019, ITU/ISO/IEC +* Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -205,56 +205,41 @@ void BinEncoderBase::encodeBinsEP( unsigned bins, unsigned numBins ) } } -void BinEncoderBase::encodeRemAbsEP( unsigned bins, unsigned goRicePar, bool useLimitedPrefixLength, int maxLog2TrDynamicRange ) +void BinEncoderBase::encodeRemAbsEP(unsigned bins, unsigned goRicePar, unsigned cutoff, int maxLog2TrDynamicRange) { - const unsigned threshold = COEF_REMAIN_BIN_REDUCTION << goRicePar; - useLimitedPrefixLength = true; - if( bins < threshold ) + const unsigned threshold = cutoff << goRicePar; + if (bins < threshold) { - const unsigned bitMask = ( 1 << goRicePar ) - 1; - const unsigned length = ( bins >> goRicePar ) + 1; - encodeBinsEP( ( 1 << length ) - 2, length ); - encodeBinsEP( bins & bitMask, goRicePar); + const unsigned bitMask = (1 << goRicePar) - 1; + const unsigned length = (bins >> goRicePar) + 1; + encodeBinsEP((1 << length) - 2, length); + encodeBinsEP(bins & bitMask, goRicePar); } - else if (useLimitedPrefixLength) + else { - const unsigned maxPrefixLength = 32 - COEF_REMAIN_BIN_REDUCTION - maxLog2TrDynamicRange; - unsigned prefixLength = 0; - unsigned codeValue = ( bins >> goRicePar ) - COEF_REMAIN_BIN_REDUCTION; + const unsigned maxPrefixLength = 32 - cutoff - maxLog2TrDynamicRange; + unsigned prefixLength = 0; + unsigned codeValue = (bins >> goRicePar) - cutoff; unsigned suffixLength; - if( codeValue >= ( ( 1 << maxPrefixLength ) - 1 ) ) + if (codeValue >= ((1 << maxPrefixLength) - 1)) { prefixLength = maxPrefixLength; suffixLength = maxLog2TrDynamicRange; } else { - while( codeValue > ( ( 2 << prefixLength ) - 2 ) ) + while (codeValue > ((2 << prefixLength) - 2)) { prefixLength++; } suffixLength = prefixLength + goRicePar + 1; //+1 for the separator bit } - const unsigned totalPrefixLength = prefixLength + COEF_REMAIN_BIN_REDUCTION; - const unsigned bitMask = ( 1 << goRicePar ) - 1; - const unsigned prefix = ( 1 << totalPrefixLength ) - 1; - const unsigned suffix = ( ( codeValue - ( (1 << prefixLength ) - 1 ) ) << goRicePar ) | ( bins & bitMask ); - encodeBinsEP( prefix, totalPrefixLength ); //prefix - encodeBinsEP( suffix, suffixLength ); //separator, suffix, and rParam bits - } - else - { - unsigned length = goRicePar; - unsigned delta = 1 << length; - bins -= threshold; - while (bins >= delta ) - { - bins -= delta; - delta = 1 << (++length); - } - unsigned numBin = COEF_REMAIN_BIN_REDUCTION + length + 1 - goRicePar; - encodeBinsEP( ( 1 << numBin ) - 2, numBin ); - encodeBinsEP( bins, length ); + const unsigned totalPrefixLength = prefixLength + cutoff; + const unsigned bitMask = (1 << goRicePar) - 1; + const unsigned prefix = (1 << totalPrefixLength) - 1; + const unsigned suffix = ((codeValue - ((1 << prefixLength) - 1)) << goRicePar) | (bins & bitMask); + encodeBinsEP(prefix, totalPrefixLength); //prefix + encodeBinsEP(suffix, suffixLength); //separator, suffix, and rParam bits } } @@ -285,22 +270,12 @@ void BinEncoderBase::encodeBinTrm( unsigned bin ) } } -void BinEncoderBase::encodeBinsPCM( unsigned bins, unsigned numBins ) -{ - m_Bitstream->write( bins, numBins ); -} void BinEncoderBase::align() { m_Range = 256; } -void BinEncoderBase::pcmAlignBits() -{ - finish(); - m_Bitstream->write( 1, 1 ); - m_Bitstream->writeAlignZero(); // pcm align zero -} void BinEncoderBase::encodeAlignedBinsEP( unsigned bins, unsigned numBins ) { @@ -437,46 +412,33 @@ BitEstimatorBase::BitEstimatorBase( const BinProbModel* dummy ) m_EstFracBits = 0; } -void BitEstimatorBase::encodeRemAbsEP( unsigned bins, unsigned goRicePar, bool useLimitedPrefixLength, int maxLog2TrDynamicRange ) +void BitEstimatorBase::encodeRemAbsEP(unsigned bins, unsigned goRicePar, unsigned cutoff, int maxLog2TrDynamicRange) { - const unsigned threshold = COEF_REMAIN_BIN_REDUCTION << goRicePar; - useLimitedPrefixLength = true; - if( bins < threshold ) + const unsigned threshold = cutoff << goRicePar; + if (bins < threshold) { - m_EstFracBits += BinProbModelBase::estFracBitsEP( ( bins >> goRicePar ) + 1 + goRicePar ); + m_EstFracBits += BinProbModelBase::estFracBitsEP((bins >> goRicePar) + 1 + goRicePar); } - else if (useLimitedPrefixLength) + else { - const unsigned maxPrefixLength = 32 - COEF_REMAIN_BIN_REDUCTION - maxLog2TrDynamicRange; - unsigned prefixLength = 0; - unsigned codeValue = ( bins >> goRicePar ) - COEF_REMAIN_BIN_REDUCTION; + const unsigned maxPrefixLength = 32 - cutoff - maxLog2TrDynamicRange; + unsigned prefixLength = 0; + unsigned codeValue = (bins >> goRicePar) - cutoff; unsigned suffixLength; - if( codeValue >= ( ( 1 << maxPrefixLength ) - 1 ) ) + if (codeValue >= ((1 << maxPrefixLength) - 1)) { prefixLength = maxPrefixLength; suffixLength = maxLog2TrDynamicRange; } else { - while( codeValue > ( ( 2 << prefixLength ) - 2 ) ) + while (codeValue > ((2 << prefixLength) - 2)) { prefixLength++; } suffixLength = prefixLength + goRicePar + 1; //+1 for the separator bit } - m_EstFracBits += BinProbModelBase::estFracBitsEP( COEF_REMAIN_BIN_REDUCTION + prefixLength + suffixLength ); - } - else - { - unsigned length = goRicePar; - unsigned delta = 1 << length; - bins -= threshold; - while (bins >= delta ) - { - bins -= delta; - delta = 1 << (++length); - } - m_EstFracBits += BinProbModelBase::estFracBitsEP(COEF_REMAIN_BIN_REDUCTION + 1 + (length << 1) - goRicePar); + m_EstFracBits += BinProbModelBase::estFracBitsEP(cutoff + prefixLength + suffixLength); } } @@ -488,13 +450,6 @@ void BitEstimatorBase::align() m_EstFracBits &= mask; } -void BitEstimatorBase::pcmAlignBits() -{ - uint64_t numCurrBits = ( m_EstFracBits >> SCALE_BITS ); - uint64_t filledBytes = ( numCurrBits + 8 ) >> 3; // including aligned_one_bit and aligned_zero_bits - unsigned bitsToAdd = unsigned( ( filledBytes << 3 ) - numCurrBits ); - m_EstFracBits += BinProbModelBase::estFracBitsEP( bitsToAdd ); -} diff --git a/source/Lib/EncoderLib/BinEncoder.h b/source/Lib/EncoderLib/BinEncoder.h index 83c108c05e9a2ee58e1bf47ddd7181a573fe9ade..67500723eaac4809945a80156075f2846b8ffbb1 100644 --- a/source/Lib/EncoderLib/BinEncoder.h +++ b/source/Lib/EncoderLib/BinEncoder.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * -* Copyright (c) 2010-2019, ITU/ISO/IEC +* Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -118,12 +118,10 @@ public: virtual void encodeBinsEP ( unsigned bins, unsigned numBins ) = 0; virtual void encodeRemAbsEP ( unsigned bins, unsigned goRicePar, - bool useLimitedPrefixLength, + unsigned cutoff, int maxLog2TrDynamicRange ) = 0; virtual void encodeBinTrm ( unsigned bin ) = 0; - virtual void encodeBinsPCM ( unsigned bins, unsigned numBins ) = 0; virtual void align () = 0; - virtual void pcmAlignBits () = 0; public: virtual uint32_t getNumBins () = 0; virtual bool isEncoding () = 0; @@ -183,12 +181,10 @@ public: void encodeBinsEP ( unsigned bins, unsigned numBins ); void encodeRemAbsEP ( unsigned bins, unsigned goRicePar, - bool useLimitedPrefixLength, + unsigned cutoff, int maxLog2TrDynamicRange ); void encodeBinTrm ( unsigned bin ); - void encodeBinsPCM ( unsigned bins, unsigned numBins ); void align (); - void pcmAlignBits (); unsigned getNumWrittenBits () { return ( m_Bitstream->getNumberOfWrittenBits() + 8 * m_numBufferedBytes + 23 - m_bitsLeft ); } public: uint32_t getNumBins () { return BinCounter::getAll(); } @@ -251,11 +247,9 @@ public: void encodeBinsEP ( unsigned bins, unsigned numBins ) { m_EstFracBits += BinProbModelBase::estFracBitsEP ( numBins ); } void encodeRemAbsEP ( unsigned bins, unsigned goRicePar, - bool useLimitedPrefixLength, + unsigned cutoff, int maxLog2TrDynamicRange ); - void encodeBinsPCM ( unsigned bins, unsigned numBins ) { m_EstFracBits += BinProbModelBase::estFracBitsEP ( numBins ); } void align (); - void pcmAlignBits (); public: uint32_t getNumBins () { THROW("Not supported"); return 0; } bool isEncoding () { return false; } diff --git a/source/Lib/EncoderLib/CABACWriter.cpp b/source/Lib/EncoderLib/CABACWriter.cpp index cc7faea2c396aa80f40a856cd35b7d5673afb8ad..51afcfbfca8ab039ac1b9c005cba1b62b2969b1e 100644 --- a/source/Lib/EncoderLib/CABACWriter.cpp +++ b/source/Lib/EncoderLib/CABACWriter.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * -* Copyright (c) 2010-2019, ITU/ISO/IEC +* Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -152,51 +152,62 @@ void CABACWriter::end_of_slice() //================================================================================ // clause 7.3.8.2 //-------------------------------------------------------------------------------- -// bool coding_tree_unit( cs, area, qp, ctuRsAddr, skipSao ) +// bool coding_tree_unit( cs, area, qp, ctuRsAddr, skipSao, skipAlf ) //================================================================================ -void CABACWriter::coding_tree_unit( CodingStructure& cs, const UnitArea& area, int (&qps)[2], unsigned ctuRsAddr, bool skipSao /* = false */ ) +void CABACWriter::coding_tree_unit( CodingStructure& cs, const UnitArea& area, int (&qps)[2], unsigned ctuRsAddr, bool skipSao /* = false */, bool skipAlf /* = false */ ) { CUCtx cuCtx( qps[CH_L] ); - Partitioner *partitioner = PartitionerFactory::get( *cs.slice ); + QTBTPartitioner partitioner; - partitioner->initCtu( area, CH_L, *cs.slice ); + partitioner.initCtu(area, CH_L, *cs.slice); if( !skipSao ) { sao( *cs.slice, ctuRsAddr ); } - for( int compIdx = 0; compIdx < MAX_NUM_COMPONENT; compIdx++ ) + if (!skipAlf) { - codeAlfCtuEnableFlag( cs, ctuRsAddr, compIdx ); + for (int compIdx = 0; compIdx < MAX_NUM_COMPONENT; compIdx++) + { + codeAlfCtuEnableFlag(cs, ctuRsAddr, compIdx, NULL); + if (isLuma(ComponentID(compIdx))) + { + codeAlfCtuFilterIndex(cs, ctuRsAddr, cs.slice->getTileGroupAlfEnabledFlag(COMPONENT_Y)); + } + if (isChroma(ComponentID(compIdx))) + { + uint8_t* ctbAlfFlag = cs.slice->getTileGroupAlfEnabledFlag((ComponentID)compIdx) ? cs.slice->getPic()->getAlfCtuEnableFlag( compIdx ) : nullptr; + if( ctbAlfFlag && ctbAlfFlag[ctuRsAddr] ) + { + codeAlfCtuAlternative( cs, ctuRsAddr, compIdx ); + } + } + } } if ( CS::isDualITree(cs) && cs.pcv->chrFormat != CHROMA_400 && cs.pcv->maxCUWidth > 64 ) { CUCtx chromaCuCtx(qps[CH_C]); - Partitioner *chromaPartitioner = PartitionerFactory::get(*cs.slice); - chromaPartitioner->initCtu(area, CH_C, *cs.slice); - coding_tree(cs, *partitioner, cuCtx, chromaPartitioner, &chromaCuCtx); + QTBTPartitioner chromaPartitioner; + chromaPartitioner.initCtu(area, CH_C, *cs.slice); + coding_tree(cs, partitioner, cuCtx, &chromaPartitioner, &chromaCuCtx); qps[CH_L] = cuCtx.qp; qps[CH_C] = chromaCuCtx.qp; - - delete chromaPartitioner; } else { - coding_tree( cs, *partitioner, cuCtx ); + coding_tree(cs, partitioner, cuCtx); qps[CH_L] = cuCtx.qp; if( CS::isDualITree( cs ) && cs.pcv->chrFormat != CHROMA_400 ) { CUCtx cuCtxChroma( qps[CH_C] ); - partitioner->initCtu( area, CH_C, *cs.slice ); - coding_tree( cs, *partitioner, cuCtxChroma ); + partitioner.initCtu(area, CH_C, *cs.slice); + coding_tree(cs, partitioner, cuCtxChroma); qps[CH_C] = cuCtxChroma.qp; } } - - delete partitioner; } @@ -235,14 +246,9 @@ void CABACWriter::sao( const Slice& slice, unsigned ctuRsAddr ) int rx = ctuRsAddr - ry * frame_width_in_ctus; const Position pos ( rx * cs.pcv->maxCUWidth, ry * cs.pcv->maxCUHeight ); const unsigned curSliceIdx = slice.getIndependentSliceIdx(); -#if HEVC_TILES_WPP - const unsigned curTileIdx = cs.picture->tileMap->getTileIdxMap( pos ); - bool leftMergeAvail = cs.getCURestricted( pos.offset( -(int)pcv.maxCUWidth, 0 ), curSliceIdx, curTileIdx, CH_L ) ? true : false; - bool aboveMergeAvail = cs.getCURestricted( pos.offset( 0, -(int)pcv.maxCUHeight ), curSliceIdx, curTileIdx, CH_L ) ? true : false; -#else - bool leftMergeAvail = cs.getCURestricted( pos.offset( -(int)pcv.maxCUWidth, 0 ), curSliceIdx, CH_L ) ? true : false; - bool aboveMergeAvail = cs.getCURestricted( pos.offset( 0, -(int)pcv.maxCUHeight ), curSliceIdx, CH_L ) ? true : false; -#endif + const unsigned curTileIdx = cs.pps->getTileIdx( pos ); + bool leftMergeAvail = cs.getCURestricted( pos.offset( -(int)pcv.maxCUWidth, 0 ), pos, curSliceIdx, curTileIdx, CH_L ) ? true : false; + bool aboveMergeAvail = cs.getCURestricted( pos.offset( 0, -(int)pcv.maxCUHeight ), pos, curSliceIdx, curTileIdx, CH_L ) ? true : false; sao_block_pars( sao_ctu_pars, sps.getBitDepths(), sliceEnabled, leftMergeAvail, aboveMergeAvail, false ); } @@ -374,7 +380,8 @@ void CABACWriter::coding_tree(const CodingStructure& cs, Partitioner& partitione const CodingUnit &cu = *cs.getCU( currArea.blocks[partitioner.chType], partitioner.chType ); // Reset delta QP coding flag and ChromaQPAdjustemt coding flag - if( pps.getUseDQP() && partitioner.currQgEnable() ) + //Note: do not reset qg at chroma CU + if( pps.getUseDQP() && partitioner.currQgEnable() && !isChroma( partitioner.chType ) ) { cuCtx.qgStart = true; cuCtx.isDQPCoded = false; @@ -449,6 +456,17 @@ void CABACWriter::coding_tree(const CodingStructure& cs, Partitioner& partitione } else { + const ModeType modeTypeParent = partitioner.modeType; + const ModeType modeTypeChild = CU::getModeTypeAtDepth( cu, partitioner.currDepth ); + mode_constraint( splitMode, cs, partitioner, modeTypeChild ); + partitioner.modeType = modeTypeChild; + + bool chromaNotSplit = modeTypeParent == MODE_TYPE_ALL && modeTypeChild == MODE_TYPE_INTRA ? true : false; + CHECK( chromaNotSplit && partitioner.chType != CHANNEL_TYPE_LUMA, "chType must be luma" ); + if( partitioner.treeType == TREE_D ) + { + partitioner.treeType = chromaNotSplit ? TREE_L : TREE_D; + } partitioner.splitCurrArea( splitMode, cs ); do @@ -460,6 +478,22 @@ void CABACWriter::coding_tree(const CodingStructure& cs, Partitioner& partitione } while( partitioner.nextPart( cs ) ); partitioner.exitCurrSplit(); + if( chromaNotSplit ) + { + CHECK( partitioner.chType != CHANNEL_TYPE_LUMA, "must be luma status" ); + partitioner.chType = CHANNEL_TYPE_CHROMA; + partitioner.treeType = TREE_C; + + if( cs.picture->blocks[partitioner.chType].contains( partitioner.currArea().blocks[partitioner.chType].pos() ) ) + { + coding_tree( cs, partitioner, cuCtx ); + } + + //recover + partitioner.chType = CHANNEL_TYPE_LUMA; + partitioner.treeType = TREE_D; + } + partitioner.modeType = modeTypeParent; } return; } @@ -470,15 +504,45 @@ void CABACWriter::coding_tree(const CodingStructure& cs, Partitioner& partitione cuCtx.qgStart = false; cuCtx.qp = CU::predictQP( cu, cuCtx.qp ); } + CHECK( cu.treeType != partitioner.treeType, "treeType mismatch" ); // coding unit coding_unit( cu, partitioner, cuCtx ); + if( cu.chType == CHANNEL_TYPE_CHROMA ) + { + DTRACE_COND( (isEncoding()), g_trace_ctx, D_QP, "[chroma CU]x=%d, y=%d, w=%d, h=%d, qp=%d\n", cu.Cb().x, cu.Cb().y, cu.Cb().width, cu.Cb().height, cu.qp ); + } + else + { DTRACE_COND( ( isEncoding() ), g_trace_ctx, D_QP, "x=%d, y=%d, w=%d, h=%d, qp=%d\n", cu.Y().x, cu.Y().y, cu.Y().width, cu.Y().height, cu.qp ); + } DTRACE_BLOCK_REC_COND( ( !isEncoding() ), cs.picture->getRecoBuf( cu ), cu, cu.predMode ); } +void CABACWriter::mode_constraint( const PartSplit split, const CodingStructure& cs, Partitioner& partitioner, const ModeType modeType ) +{ + CHECK( split == CU_DONT_SPLIT, "splitMode shall not be no split" ); + int val = cs.signalModeCons( split, partitioner, partitioner.modeType ); + if( val == LDT_MODE_TYPE_SIGNAL ) + { + CHECK( modeType == MODE_TYPE_ALL, "shall not be no constraint case" ); + bool flag = modeType == MODE_TYPE_INTRA; + int ctxIdx = DeriveCtx::CtxModeConsFlag( cs, partitioner ); + m_BinEncoder.encodeBin( flag, Ctx::ModeConsFlag( ctxIdx ) ); + DTRACE( g_trace_ctx, D_SYNTAX, "mode_cons_flag() flag=%d\n", flag ); + } + else if( val == LDT_MODE_TYPE_INFER ) + { + assert( modeType == MODE_TYPE_INTRA ); + } + else + { + assert( modeType == partitioner.modeType ); + } +} + void CABACWriter::split_cu_mode( const PartSplit split, const CodingStructure& cs, Partitioner& partitioner ) { bool canNo, canQt, canBh, canBv, canTh, canTv; @@ -544,12 +608,9 @@ void CABACWriter::split_cu_mode( const PartSplit split, const CodingStructure& c // clause 7.3.8.5 //-------------------------------------------------------------------------------- // void coding_unit ( cu, partitioner, cuCtx ) -// void cu_transquant_bypass_flag ( cu ) // void cu_skip_flag ( cu ) // void pred_mode ( cu ) // void part_mode ( cu ) -// void pcm_flag ( cu ) -// void pcm_samples ( tu ) // void cu_pred_data ( pus ) // void cu_lic_flag ( cu ) // void intra_luma_pred_modes ( pus ) @@ -561,13 +622,8 @@ void CABACWriter::split_cu_mode( const PartSplit split, const CodingStructure& c void CABACWriter::coding_unit( const CodingUnit& cu, Partitioner& partitioner, CUCtx& cuCtx ) { + DTRACE( g_trace_ctx, D_SYNTAX, "coding_unit() treeType=%d modeType=%d\n", cu.treeType, cu.modeType ); CodingStructure& cs = *cu.cs; - cs.chType = partitioner.chType; - // transquant bypass flag - if( cs.pps->getTransquantBypassEnabledFlag() ) - { - cu_transquant_bypass_flag( cu ); - } // skip flag if ((!cs.slice->isIntra() || cs.slice->getSPS()->getIBCFlag()) && cu.Y().valid()) @@ -580,43 +636,44 @@ void CABACWriter::coding_unit( const CodingUnit& cu, Partitioner& partitioner, C if( cu.skip ) { CHECK( !cu.firstPU->mergeFlag, "Merge flag has to be on!" ); + CHECK(cu.colorTransform, "ACT should not be enabled for skip mode"); PredictionUnit& pu = *cu.firstPU; prediction_unit ( pu ); end_of_ctu ( cu, cuCtx ); return; } -#if !FIX_PCM - // pcm samples - if( CU::isIntra(cu) ) - { - pcm_data( cu, partitioner ); - if( cu.ipcm ) - { - end_of_ctu( cu, cuCtx ); - return; - } - } -#endif // prediction mode and partitioning data pred_mode ( cu ); - -#if FIX_PCM - // pcm samples - if( CU::isIntra(cu) ) + if (CU::isIntra(cu)) + { + adaptive_color_transform(cu); + } + if (CU::isPLT(cu)) { - pcm_data( cu, partitioner ); - if( cu.ipcm ) + CHECK(cu.colorTransform, "ACT should not be enabled for PLT mode"); + if (cu.isSepTree()) { - end_of_ctu( cu, cuCtx ); - return; + if (isLuma(partitioner.chType)) + { + cu_palette_info(cu, COMPONENT_Y, 1, cuCtx); + } + if (cu.chromaFormat != CHROMA_400 && (partitioner.chType == CHANNEL_TYPE_CHROMA)) + { + cu_palette_info(cu, COMPONENT_Cb, 2, cuCtx); + } } + else + { + cu_palette_info(cu, COMPONENT_Y, 3, cuCtx); + } + end_of_ctu(cu, cuCtx); + return; } -#endif - extend_ref_line(cu); - - isp_mode( cu ); + bdpcm_mode( cu, ComponentID( partitioner.chType ) ); + if (!CS::isDualITree(cs) && isLuma(partitioner.chType)) + bdpcm_mode(cu, ComponentID(CHANNEL_TYPE_CHROMA)); // prediction data ( intra prediction modes / reference indexes + motion vectors ) cu_pred_data( cu ); @@ -629,97 +686,132 @@ void CABACWriter::coding_unit( const CodingUnit& cu, Partitioner& partitioner, C } -void CABACWriter::cu_transquant_bypass_flag( const CodingUnit& cu ) -{ - m_BinEncoder.encodeBin( (cu.transQuantBypass), Ctx::TransquantBypassFlag() ); -} - - void CABACWriter::cu_skip_flag( const CodingUnit& cu ) { unsigned ctxId = DeriveCtx::CtxSkipFlag( cu ); - if (cu.slice->isIntra() && cu.cs->slice->getSPS()->getIBCFlag()) + if ((cu.slice->isIntra() || cu.isConsIntra()) && cu.cs->slice->getSPS()->getIBCFlag()) { + if (cu.lwidth() < 128 && cu.lheight() < 128) // disable IBC mode larger than 64x64 + { m_BinEncoder.encodeBin((cu.skip), Ctx::SkipFlag(ctxId)); DTRACE(g_trace_ctx, D_SYNTAX, "cu_skip_flag() ctx=%d skip=%d\n", ctxId, cu.skip ? 1 : 0); + } + return; + } + if ( !cu.cs->slice->getSPS()->getIBCFlag() && cu.lwidth() == 4 && cu.lheight() == 4 ) + { + return; + } + if( !cu.cs->slice->getSPS()->getIBCFlag() && cu.isConsIntra() ) + { return; } - m_BinEncoder.encodeBin( ( cu.skip ), Ctx::SkipFlag( ctxId ) ); DTRACE( g_trace_ctx, D_SYNTAX, "cu_skip_flag() ctx=%d skip=%d\n", ctxId, cu.skip ? 1 : 0 ); if (cu.skip && cu.cs->slice->getSPS()->getIBCFlag()) { + if (cu.lwidth() < 128 && cu.lheight() < 128 && !cu.isConsInter()) // disable IBC mode larger than 64x64 and disable IBC when only allowing inter mode + { + if ( cu.lwidth() == 4 && cu.lheight() == 4 ) + { + return; + } unsigned ctxidx = DeriveCtx::CtxIBCFlag(cu); m_BinEncoder.encodeBin(CU::isIBC(cu) ? 1 : 0, Ctx::IBCFlag(ctxidx)); DTRACE(g_trace_ctx, D_SYNTAX, "ibc() ctx=%d cu.predMode=%d\n", ctxidx, cu.predMode); -#if !JVET_MMVD_OFF_MACRO - if (CU::isInter(cu)) - { - m_BinEncoder.encodeBin(cu.mmvdSkip, Ctx::MmvdFlag(0)); - DTRACE(g_trace_ctx, D_SYNTAX, "mmvd_cu_skip_flag() ctx=%d mmvd_skip=%d\n", 0, cu.mmvdSkip ? 1 : 0); } -#endif - } -#if !JVET_MMVD_OFF_MACRO - if (cu.skip && !cu.cs->slice->getSPS()->getIBCFlag()) - { - m_BinEncoder.encodeBin(cu.mmvdSkip, Ctx::MmvdFlag(0)); - DTRACE(g_trace_ctx, D_SYNTAX, "mmvd_cu_skip_flag() ctx=%d mmvd_skip=%d\n", 0, cu.mmvdSkip ? 1 : 0); } -#endif } void CABACWriter::pred_mode( const CodingUnit& cu ) { - if (cu.cs->slice->getSPS()->getIBCFlag()) + if (cu.cs->slice->getSPS()->getIBCFlag() && cu.chType != CHANNEL_TYPE_CHROMA) { - if (cu.cs->slice->isIntra()) + if( cu.isConsInter() ) + { + assert( CU::isInter( cu ) ); + return; + } + + if ( cu.cs->slice->isIntra() || ( cu.lwidth() == 4 && cu.lheight() == 4 ) || cu.isConsIntra() ) { + if (cu.lwidth() < 128 && cu.lheight() < 128) // disable IBC mode larger than 64x64 + { unsigned ctxidx = DeriveCtx::CtxIBCFlag(cu); m_BinEncoder.encodeBin(CU::isIBC(cu), Ctx::IBCFlag(ctxidx)); + } + if (!CU::isIBC(cu) && cu.cs->slice->getSPS()->getPLTMode() && cu.lwidth() <= 64 && cu.lheight() <= 64) + { + m_BinEncoder.encodeBin(CU::isPLT(cu), Ctx::PLTFlag(0)); + } } else { - m_BinEncoder.encodeBin((CU::isIntra(cu)), Ctx::PredMode(DeriveCtx::CtxPredModeFlag(cu))); - if (!CU::isIntra(cu)) + if( cu.isConsInter() ) + { + return; + } + m_BinEncoder.encodeBin((CU::isIntra(cu) || CU::isPLT(cu)), Ctx::PredMode(DeriveCtx::CtxPredModeFlag(cu))); + if (CU::isIntra(cu) || CU::isPLT(cu)) { + if (cu.cs->slice->getSPS()->getPLTMode() && cu.lwidth() <= 64 && cu.lheight() <= 64) + m_BinEncoder.encodeBin(CU::isPLT(cu), Ctx::PLTFlag(0)); + } + else + { + if (cu.lwidth() < 128 && cu.lheight() < 128) // disable IBC mode larger than 64x64 + { unsigned ctxidx = DeriveCtx::CtxIBCFlag(cu); m_BinEncoder.encodeBin(CU::isIBC(cu), Ctx::IBCFlag(ctxidx)); + } } } } else { - if (cu.cs->slice->isIntra()) + if( cu.isConsInter() ) + { + assert( CU::isInter( cu ) ); + return; + } + + if ( cu.cs->slice->isIntra() || ( cu.lwidth() == 4 && cu.lheight() == 4 ) || cu.isConsIntra() ) { + if (cu.cs->slice->getSPS()->getPLTMode() && cu.lwidth() <= 64 && cu.lheight() <= 64) + m_BinEncoder.encodeBin((CU::isPLT(cu)), Ctx::PLTFlag(0)); return; } - m_BinEncoder.encodeBin((CU::isIntra(cu)), Ctx::PredMode(DeriveCtx::CtxPredModeFlag(cu))); + m_BinEncoder.encodeBin((CU::isIntra(cu) || CU::isPLT(cu)), Ctx::PredMode(DeriveCtx::CtxPredModeFlag(cu))); + if ((CU::isIntra(cu) || CU::isPLT(cu)) && cu.cs->slice->getSPS()->getPLTMode() && cu.lwidth() <= 64 && cu.lheight() <= 64) + { + m_BinEncoder.encodeBin((CU::isPLT(cu)), Ctx::PLTFlag(0)); + } } } - -void CABACWriter::pcm_data( const CodingUnit& cu, Partitioner& partitioner ) +void CABACWriter::bdpcm_mode( const CodingUnit& cu, const ComponentID compID ) { - pcm_flag( cu, partitioner ); - if( cu.ipcm ) + if( cu.cs->sps->getBDPCMEnabled() == 0 ) return; + if( !CU::bdpcmAllowed( cu, compID ) ) return; + + int bdpcmMode = isLuma(compID) ? cu.bdpcmMode : cu.bdpcmModeChroma; + + m_BinEncoder.encodeBin(bdpcmMode > 0 ? 1 : 0, Ctx::BDPCMMode(0)); + + if (bdpcmMode) { - m_BinEncoder.pcmAlignBits(); - pcm_samples( *cu.firstTU ); + m_BinEncoder.encodeBin(bdpcmMode > 1 ? 1 : 0, Ctx::BDPCMMode(1)); } -} - -void CABACWriter::pcm_flag( const CodingUnit& cu, Partitioner& partitioner ) -{ - const SPS& sps = *cu.cs->sps; - if( !sps.getPCMEnabledFlag() || partitioner.currArea().lwidth() > (1 << sps.getPCMLog2MaxSize()) || partitioner.currArea().lwidth() < (1 << sps.getPCMLog2MinSize()) - || partitioner.currArea().lheight() > (1 << sps.getPCMLog2MaxSize()) || partitioner.currArea().lheight() < (1 << sps.getPCMLog2MinSize()) ) + if (isLuma(compID)) { - return; + DTRACE(g_trace_ctx, D_SYNTAX, "bdpcm_mode(%d) x=%d, y=%d, w=%d, h=%d, bdpcm=%d\n", CHANNEL_TYPE_LUMA, cu.lumaPos().x, cu.lumaPos().y, cu.lwidth(), cu.lheight(), cu.bdpcmMode); + } + else + { + DTRACE(g_trace_ctx, D_SYNTAX, "bdpcm_mode(%d) x=%d, y=%d, w=%d, h=%d, bdpcm=%d\n", CHANNEL_TYPE_CHROMA, cu.chromaPos().x, cu.chromaPos().y, cu.chromaSize().width, cu.chromaSize().height, cu.bdpcmModeChroma); } - m_BinEncoder.encodeBinTrm( cu.ipcm ); } @@ -743,48 +835,44 @@ void CABACWriter::cu_pred_data( const CodingUnit& cu ) imv_mode ( cu ); affine_amvr_mode( cu ); - cu_gbi_flag( cu ); + cu_bcw_flag( cu ); } -void CABACWriter::cu_gbi_flag(const CodingUnit& cu) +void CABACWriter::cu_bcw_flag(const CodingUnit& cu) { - if(!CU::isGBiIdxCoded(cu)) + if(!CU::isBcwIdxCoded(cu)) { return; } - CHECK(!(GBI_NUM > 1 && (GBI_NUM == 2 || (GBI_NUM & 0x01) == 1)), " !( GBI_NUM > 1 && ( GBI_NUM == 2 || ( GBI_NUM & 0x01 ) == 1 ) ) "); - const uint8_t gbiCodingIdx = (uint8_t)g_GbiCodingOrder[CU::getValidGbiIdx(cu)]; - - const int32_t numGBi = (cu.slice->getCheckLDC()) ? 5 : 3; - - m_BinEncoder.encodeBin((gbiCodingIdx == 0 ? 1 : 0), Ctx::GBiIdx(0)); + CHECK(!(BCW_NUM > 1 && (BCW_NUM == 2 || (BCW_NUM & 0x01) == 1)), " !( BCW_NUM > 1 && ( BCW_NUM == 2 || ( BCW_NUM & 0x01 ) == 1 ) ) "); + const uint8_t bcwCodingIdx = (uint8_t)g_BcwCodingOrder[CU::getValidBcwIdx(cu)]; - if(numGBi > 2 && gbiCodingIdx != 0) + const int32_t numBcw = (cu.slice->getCheckLDC()) ? 5 : 3; + m_BinEncoder.encodeBin((bcwCodingIdx == 0 ? 0 : 1), Ctx::BcwIdx(0)); + if(numBcw > 2 && bcwCodingIdx != 0) { - const uint32_t prefixNumBits = numGBi - 2; + const uint32_t prefixNumBits = numBcw - 2; const uint32_t step = 1; - int ctxIdGBi = 4; uint8_t idx = 1; for(int ui = 0; ui < prefixNumBits; ++ui) { - if (gbiCodingIdx == idx) + if (bcwCodingIdx == idx) { - m_BinEncoder.encodeBin(1, Ctx::GBiIdx(ctxIdGBi)); + m_BinEncoder.encodeBinEP(0); break; } else { - m_BinEncoder.encodeBin(0, Ctx::GBiIdx(ctxIdGBi)); - ctxIdGBi += step; + m_BinEncoder.encodeBinEP(1); idx += step; } } } - DTRACE(g_trace_ctx, D_SYNTAX, "cu_gbi_flag() gbi_idx=%d\n", cu.GBiIdx ? 1 : 0); + DTRACE(g_trace_ctx, D_SYNTAX, "cu_bcw_flag() bcw_idx=%d\n", cu.BcwIdx ? 1 : 0); } void CABACWriter::xWriteTruncBinCode(uint32_t symbol, uint32_t maxSymbol) @@ -827,12 +915,13 @@ void CABACWriter::xWriteTruncBinCode(uint32_t symbol, uint32_t maxSymbol) void CABACWriter::extend_ref_line(const PredictionUnit& pu) { -#if !ENABLE_JVET_L0283_MRL - return; -#endif const CodingUnit& cu = *pu.cu; - if (!cu.Y().valid() || cu.predMode != MODE_INTRA || !isLuma(cu.chType)) + if( !cu.Y().valid() || cu.predMode != MODE_INTRA || !isLuma( cu.chType ) || cu.bdpcmMode ) + { + return; + } + if( !cu.cs->sps->getUseMRL() ) { return; } @@ -848,21 +937,18 @@ void CABACWriter::extend_ref_line(const PredictionUnit& pu) if (MRL_NUM_REF_LINES > 2 && multiRefIdx != MULTI_REF_LINE_IDX[0]) { m_BinEncoder.encodeBin(multiRefIdx != MULTI_REF_LINE_IDX[1], Ctx::MultiRefLineIdx(1)); - if (MRL_NUM_REF_LINES > 3 && multiRefIdx != MULTI_REF_LINE_IDX[1]) - { - m_BinEncoder.encodeBin(multiRefIdx != MULTI_REF_LINE_IDX[2], Ctx::MultiRefLineIdx(2)); - } } } } void CABACWriter::extend_ref_line(const CodingUnit& cu) { -#if !ENABLE_JVET_L0283_MRL - return; -#endif - if (!cu.Y().valid() || cu.predMode != MODE_INTRA || !isLuma(cu.chType) || cu.ipcm) + if ( !cu.Y().valid() || cu.predMode != MODE_INTRA || !isLuma(cu.chType) || cu.bdpcmMode ) + { + return; + } + if( !cu.cs->sps->getUseMRL() ) { return; } @@ -884,10 +970,6 @@ void CABACWriter::extend_ref_line(const CodingUnit& cu) if (MRL_NUM_REF_LINES > 2 && multiRefIdx != MULTI_REF_LINE_IDX[0]) { m_BinEncoder.encodeBin(multiRefIdx != MULTI_REF_LINE_IDX[1], Ctx::MultiRefLineIdx(1)); - if (MRL_NUM_REF_LINES > 3 && multiRefIdx != MULTI_REF_LINE_IDX[1]) - { - m_BinEncoder.encodeBin(multiRefIdx != MULTI_REF_LINE_IDX[2], Ctx::MultiRefLineIdx(2)); - } } } @@ -902,6 +984,22 @@ void CABACWriter::intra_luma_pred_modes( const CodingUnit& cu ) return; } + if( cu.bdpcmMode ) + { + cu.firstPU->intraDir[0] = cu.bdpcmMode == 2? VER_IDX : HOR_IDX; + return; + } + + mip_flag(cu); + if (cu.mipFlag) + { + mip_pred_modes(cu); + return; + } + extend_ref_line( cu ); + + isp_mode( cu ); + const int numMPMs = NUM_MOST_PROBABLE_MODES; const int numBlocks = CU::getNumPUs( cu ); unsigned mpm_preds [4][numMPMs]; @@ -929,7 +1027,7 @@ void CABACWriter::intra_luma_pred_modes( const CodingUnit& cu ) break; } } - if( pu->multiRefIdx || ( cu.ispMode && isLuma( cu.chType ) ) ) + if ( pu->multiRefIdx ) { CHECK(mpm_idx >= numMPMs, "use of non-MPM"); } @@ -950,7 +1048,9 @@ void CABACWriter::intra_luma_pred_modes( const CodingUnit& cu ) if( mpm_idx < numMPMs ) { { - m_BinEncoder.encodeBinEP( mpm_idx > 0 ); + unsigned ctx = (pu->cu->ispMode == NOT_INTRA_SUBPARTITIONS ? 1 : 0); + if (pu->multiRefIdx == 0) + m_BinEncoder.encodeBin(mpm_idx > 0, Ctx::IntraLumaPlanarFlag(ctx)); if( mpm_idx ) { m_BinEncoder.encodeBinEP( mpm_idx > 1 ); @@ -999,6 +1099,16 @@ void CABACWriter::intra_luma_pred_modes( const CodingUnit& cu ) void CABACWriter::intra_luma_pred_mode( const PredictionUnit& pu ) { + if( pu.cu->bdpcmMode ) return; + mip_flag(*pu.cu); + if (pu.cu->mipFlag) + { + mip_pred_mode(pu); + return; + } + extend_ref_line( pu ); + isp_mode( *pu.cu ); + // prev_intra_luma_pred_flag const int numMPMs = NUM_MOST_PROBABLE_MODES; unsigned mpm_pred[numMPMs]; @@ -1016,7 +1126,7 @@ void CABACWriter::intra_luma_pred_mode( const PredictionUnit& pu ) break; } } - if( pu.multiRefIdx || ( pu.cu->ispMode && isLuma( pu.cu->chType ) ) ) + if ( pu.multiRefIdx ) { CHECK(mpm_idx >= numMPMs, "use of non-MPM"); } @@ -1029,7 +1139,9 @@ void CABACWriter::intra_luma_pred_mode( const PredictionUnit& pu ) if( mpm_idx < numMPMs ) { { - m_BinEncoder.encodeBinEP( mpm_idx > 0 ); + unsigned ctx = (pu.cu->ispMode == NOT_INTRA_SUBPARTITIONS ? 1 : 0); + if (pu.multiRefIdx == 0) + m_BinEncoder.encodeBin( mpm_idx > 0, Ctx::IntraLumaPlanarFlag(ctx) ); if( mpm_idx ) { m_BinEncoder.encodeBinEP( mpm_idx > 1 ); @@ -1067,7 +1179,7 @@ void CABACWriter::intra_luma_pred_mode( const PredictionUnit& pu ) void CABACWriter::intra_chroma_pred_modes( const CodingUnit& cu ) { - if( cu.chromaFormat == CHROMA_400 || ( CS::isDualITree( *cu.cs ) && cu.chType == CHANNEL_TYPE_LUMA ) ) + if( cu.chromaFormat == CHROMA_400 || ( cu.isSepTree() && cu.chType == CHANNEL_TYPE_LUMA ) ) { return; } @@ -1076,70 +1188,83 @@ void CABACWriter::intra_chroma_pred_modes( const CodingUnit& cu ) intra_chroma_pred_mode( *pu ); } - -void CABACWriter::intra_chroma_lmc_mode( const PredictionUnit& pu ) +void CABACWriter::intra_chroma_lmc_mode(const PredictionUnit& pu) { const unsigned intraDir = pu.intraDir[1]; - int lmModeList[10]; - int maxSymbol = PU::getLMSymbolList( pu, lmModeList ); - int symbol = -1; - for ( int k = 0; k < LM_SYMBOL_NUM; k++ ) + int lmModeList[10]; + PU::getLMSymbolList(pu, lmModeList); + int symbol = -1; + for (int k = 0; k < LM_SYMBOL_NUM; k++) + { + if (lmModeList[k] == intraDir) { - if ( lmModeList[k] == intraDir || ( lmModeList[k] == -1 && intraDir < LM_CHROMA_IDX ) ) - { - symbol = k; - break; - } + symbol = k; + break; } - CHECK( symbol < 0, "invalid symbol found" ); + } + CHECK(symbol < 0, "invalid symbol found"); + + m_BinEncoder.encodeBin(symbol == 0 ? 0 : 1, Ctx::CclmModeIdx(0)); - unary_max_symbol(symbol, Ctx::IntraChromaPredMode(1), Ctx::IntraChromaPredMode(2), maxSymbol - 1); + if (symbol > 0) + { + CHECK(symbol > 2, "invalid symbol for MMLM"); + unsigned int symbol_minus_1 = symbol - 1; + m_BinEncoder.encodeBinEP(symbol_minus_1); + } } -void CABACWriter::intra_chroma_pred_mode( const PredictionUnit& pu ) +void CABACWriter::intra_chroma_pred_mode(const PredictionUnit& pu) { - const unsigned intraDir = pu.intraDir[1]; - const bool isDerivedMode = intraDir == DM_CHROMA_IDX; - - m_BinEncoder.encodeBin(isDerivedMode ? 0 : 1, Ctx::IntraChromaPredMode(0)); + if (pu.cu->bdpcmModeChroma) + { + return; + } - if (isDerivedMode) + const unsigned intraDir = pu.intraDir[1]; + if (pu.cu->colorTransform) { + CHECK(pu.intraDir[CHANNEL_TYPE_CHROMA] != DM_CHROMA_IDX, "chroma should use DM for adaptive color transform"); return; } - - // LM chroma mode - if( pu.cs->sps->getUseLMChroma() ) + if (pu.cs->sps->getUseLMChroma() && pu.cu->checkCCLMAllowed()) { - intra_chroma_lmc_mode( pu ); - if ( PU::isLMCMode( intraDir ) ) + m_BinEncoder.encodeBin(PU::isLMCMode(intraDir) ? 1 : 0, Ctx::CclmModeFlag(0)); + if (PU::isLMCMode(intraDir)) { + intra_chroma_lmc_mode(pu); return; } } + const bool isDerivedMode = intraDir == DM_CHROMA_IDX; + m_BinEncoder.encodeBin(isDerivedMode ? 0 : 1, Ctx::IntraChromaPredMode(0)); + if (isDerivedMode) + { + return; + } + // chroma candidate index - unsigned chromaCandModes[ NUM_CHROMA_MODE ]; - PU::getIntraChromaCandModes( pu, chromaCandModes ); + unsigned chromaCandModes[NUM_CHROMA_MODE]; + PU::getIntraChromaCandModes(pu, chromaCandModes); int candId = 0; - for ( ; candId < NUM_CHROMA_MODE; candId++ ) + for (; candId < NUM_CHROMA_MODE; candId++) { - if( intraDir == chromaCandModes[ candId ] ) + if (intraDir == chromaCandModes[candId]) { break; } } - CHECK( candId >= NUM_CHROMA_MODE, "Chroma prediction mode index out of bounds" ); - CHECK( chromaCandModes[ candId ] == DM_CHROMA_IDX, "The intra dir cannot be DM_CHROMA for this path" ); + CHECK(candId >= NUM_CHROMA_MODE, "Chroma prediction mode index out of bounds"); + CHECK(chromaCandModes[candId] == DM_CHROMA_IDX, "The intra dir cannot be DM_CHROMA for this path"); { - m_BinEncoder.encodeBinsEP( candId, 2 ); + m_BinEncoder.encodeBinsEP(candId, 2); } } - void CABACWriter::cu_residual( const CodingUnit& cu, Partitioner& partitioner, CUCtx& cuCtx ) { if (!CU::isIntra(cu)) @@ -1156,21 +1281,33 @@ void CABACWriter::cu_residual( const CodingUnit& cu, Partitioner& partitioner, C if( !cu.rootCbf ) { + CHECK(cu.colorTransform, "ACT should not be enabled for root_cbf = 0"); return; } } + if (CU::isInter(cu) || CU::isIBC(cu)) + { + adaptive_color_transform(cu); + } + + cuCtx.violatesLfnstConstrained[CHANNEL_TYPE_LUMA] = false; + cuCtx.violatesLfnstConstrained[CHANNEL_TYPE_CHROMA] = false; + cuCtx.lfnstLastScanPos = false; + cuCtx.violatesMtsCoeffConstraint = false; - ChromaCbfs chromaCbfs; if( cu.ispMode && isLuma( partitioner.chType ) ) { TUIntraSubPartitioner subTuPartitioner( partitioner ); - transform_tree( *cu.cs, subTuPartitioner, cuCtx, chromaCbfs, CU::getISPType( cu, getFirstComponentOfChannel( partitioner.chType ) ), 0 ); + transform_tree( *cu.cs, subTuPartitioner, cuCtx, CU::getISPType( cu, getFirstComponentOfChannel( partitioner.chType) ), 0 ); } else { - transform_tree( *cu.cs, partitioner, cuCtx, chromaCbfs ); + transform_tree( *cu.cs, partitioner, cuCtx ); } + + residual_lfnst_mode( cu, cuCtx ); + mts_idx ( cu, &cuCtx ); } void CABACWriter::rqt_root_cbf( const CodingUnit& cu ) @@ -1180,6 +1317,25 @@ void CABACWriter::rqt_root_cbf( const CodingUnit& cu ) DTRACE( g_trace_ctx, D_SYNTAX, "rqt_root_cbf() ctx=0 root_cbf=%d pos=(%d,%d)\n", cu.rootCbf ? 1 : 0, cu.lumaPos().x, cu.lumaPos().y ); } +void CABACWriter::adaptive_color_transform(const CodingUnit& cu) +{ + if (!cu.slice->getSPS()->getUseColorTrans()) + { + return; + } + + if (cu.isSepTree()) + { + CHECK(cu.colorTransform, "adaptive color transform should be disabled when dualtree and localtree are enabled"); + return; + } + + if (CU::isInter(cu) || CU::isIBC(cu) || CU::isIntra(cu)) + { + m_BinEncoder.encodeBin(cu.colorTransform, Ctx::ACTFlag()); + } +} + void CABACWriter::sbt_mode( const CodingUnit& cu ) { uint8_t sbtAllowed = cu.checkAllowedSbt(); @@ -1239,112 +1395,368 @@ void CABACWriter::sbt_mode( const CodingUnit& cu ) void CABACWriter::end_of_ctu( const CodingUnit& cu, CUCtx& cuCtx ) { - const Slice* slice = cu.cs->slice; -#if HEVC_TILES_WPP - const TileMap& tileMap = *cu.cs->picture->tileMap; - const int currentCTUTsAddr = tileMap.getCtuRsToTsAddrMap( CU::getCtuAddr( cu ) ); -#else - const int currentCTUTsAddr = CU::getCtuAddr( cu ); -#endif const bool isLastSubCUOfCtu = CU::isLastSubCUOfCtu( cu ); if ( isLastSubCUOfCtu - && ( !CS::isDualITree( *cu.cs ) || cu.chromaFormat == CHROMA_400 || isChroma( cu.chType ) ) + && ( !cu.isSepTree() || cu.chromaFormat == CHROMA_400 || isChroma( cu.chType ) ) ) { cuCtx.isDQPCoded = ( cu.cs->pps->getUseDQP() && !cuCtx.isDQPCoded ); - // The 1-terminating bit is added to all streams, so don't add it here when it's 1. - // i.e. when the slice segment CurEnd CTU address is the current CTU address+1. -#if HEVC_DEPENDENT_SLICES - if( slice->getSliceSegmentCurEndCtuTsAddr() != currentCTUTsAddr + 1 ) -#else - if(slice->getSliceCurEndCtuTsAddr() != currentCTUTsAddr + 1) -#endif - { - m_BinEncoder.encodeBinTrm( 0 ); - } } } - - - - -//================================================================================ -// clause 7.3.8.6 -//-------------------------------------------------------------------------------- -// void prediction_unit ( pu ); -// void merge_flag ( pu ); -// void merge_idx ( pu ); -// void inter_pred_idc ( pu ); -// void ref_idx ( pu, refList ); -// void mvp_flag ( pu, refList ); -//================================================================================ - -void CABACWriter::prediction_unit( const PredictionUnit& pu ) +void CABACWriter::cu_palette_info(const CodingUnit& cu, ComponentID compBegin, uint32_t numComp, CUCtx& cuCtx) { -#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM - CHECK( pu.cacheUsed, "Processing a PU that should be in cache!" ); - CHECK( pu.cu->cacheUsed, "Processing a CU that should be in cache!" ); + const SPS& sps = *(cu.cs->sps); + TransformUnit& tu = *cu.firstTU; + uint32_t indexMaxSize = cu.useEscape[compBegin] ? (cu.curPLTSize[compBegin] + 1) : cu.curPLTSize[compBegin]; -#endif - if( pu.cu->skip ) + if (cu.lastPLTSize[compBegin]) { - CHECK( !pu.mergeFlag, "merge_flag must be true for skipped CUs" ); + xEncodePLTPredIndicator(cu, MAXPLTSIZE, compBegin); } - else + + uint32_t reusedPLTnum = 0; + for (int idx = 0; idx < cu.lastPLTSize[compBegin]; idx++) { - merge_flag( pu ); + if (cu.reuseflag[compBegin][idx]) + reusedPLTnum++; } - if( pu.mergeFlag ) + + if (reusedPLTnum < MAXPLTSIZE) { - if (CU::isIBC(*pu.cu)) + exp_golomb_eqprob(cu.curPLTSize[compBegin] - reusedPLTnum, 0); + } + + for (int comp = compBegin; comp < (compBegin + numComp); comp++) + { + for (int idx = cu.reusePLTSize[compBegin]; idx < cu.curPLTSize[compBegin]; idx++) { - merge_idx(pu); - return; + ComponentID compID = (ComponentID)comp; + const int channelBitDepth = sps.getBitDepth(toChannelType(compID)); + m_BinEncoder.encodeBinsEP(cu.curPLT[comp][idx], channelBitDepth); } - subblock_merge_flag( *pu.cu ); - MHIntra_flag( pu ); - if ( pu.mhIntraFlag ) + } + uint32_t signalEscape = (cu.useEscape[compBegin]) ? 1 : 0; + if (cu.curPLTSize[compBegin] > 0) + { + m_BinEncoder.encodeBinEP(signalEscape); + } + //encode index map + uint32_t height = cu.block(compBegin).height; + uint32_t width = cu.block(compBegin).width; + + m_scanOrder = g_scanOrder[SCAN_UNGROUPED][(cu.useRotation[compBegin]) ? SCAN_TRAV_VER : SCAN_TRAV_HOR][gp_sizeIdxInfo->idxFrom(width)][gp_sizeIdxInfo->idxFrom(height)]; + uint32_t total = height * width; + if (indexMaxSize > 1) + codeScanRotationModeFlag(cu, compBegin); + else + assert(!cu.useRotation[compBegin]); + + if (cu.useEscape[compBegin] && cu.cs->pps->getUseDQP() && !cuCtx.isDQPCoded) + { + if (!cu.isSepTree() || isLuma(tu.chType)) { - MHIntra_luma_pred_modes( *pu.cu ); + cu_qp_delta(cu, cuCtx.qp, cu.qp); + cuCtx.qp = cu.qp; + cuCtx.isDQPCoded = true; } - triangle_mode( *pu.cu ); - if (pu.mmvdMergeFlag) + } + if (cu.useEscape[compBegin] && cu.cs->slice->getUseChromaQpAdj() && !cuCtx.isChromaQpAdjCoded) + { + if (!CS::isDualITree(*tu.cs) || isChroma(tu.chType)) { - mmvd_merge_idx(pu); + cu_chroma_qp_offset(cu); + cuCtx.isChromaQpAdjCoded = true; } - else - merge_idx ( pu ); } - else if (CU::isIBC(*pu.cu)) + + uint32_t prevRunPos = 0; + unsigned prevRunType = 0; + for (int subSetId = 0; subSetId <= (total - 1) >> LOG2_PALETTE_CG_SIZE; subSetId++) { - ref_idx(pu, REF_PIC_LIST_0); - mvd_coding(pu.mvd[REF_PIC_LIST_0], pu.cu->imv); - mvp_flag(pu, REF_PIC_LIST_0); + cuPaletteSubblockInfo(cu, compBegin, numComp, subSetId, prevRunPos, prevRunType); } - else +} +void CABACWriter::cuPaletteSubblockInfo(const CodingUnit& cu, ComponentID compBegin, uint32_t numComp, int subSetId, uint32_t& prevRunPos, unsigned& prevRunType) +{ + const SPS& sps = *(cu.cs->sps); + TransformUnit& tu = *cu.firstTU; + PLTtypeBuf runType = tu.getrunType(compBegin); + PelBuf curPLTIdx = tu.getcurPLTIdx(compBegin); + uint32_t indexMaxSize = cu.useEscape[compBegin] ? (cu.curPLTSize[compBegin] + 1) : cu.curPLTSize[compBegin]; + uint32_t totalPel = cu.block(compBegin).height*cu.block(compBegin).width; + + int minSubPos = subSetId << LOG2_PALETTE_CG_SIZE; + int maxSubPos = minSubPos + (1 << LOG2_PALETTE_CG_SIZE); + maxSubPos = (maxSubPos > totalPel) ? totalPel : maxSubPos; // if last position is out of the current CU size + + unsigned runCopyFlag[(1 << LOG2_PALETTE_CG_SIZE)]; + for (int i = 0; i < (1 << LOG2_PALETTE_CG_SIZE); i++) + runCopyFlag[i] = MAX_INT; + + if (minSubPos == 0) + runCopyFlag[0] = 0; + +// PLT runCopy flag and runType - context coded + int curPos = minSubPos; + for (; curPos < maxSubPos && indexMaxSize > 1; curPos++) { - int8_t affineMvdShift = pu.cu->imv ? ( pu.cu->imv == 1 ? -1 : 1 ) : 0; - inter_pred_idc( pu ); - affine_flag ( *pu.cu ); - smvd_mode( pu ); - if( pu.interDir != 2 /* PRED_L1 */ ) + uint32_t posy = m_scanOrder[curPos].y; + uint32_t posx = m_scanOrder[curPos].x; + uint32_t posyprev = (curPos == 0) ? 0 : m_scanOrder[curPos - 1].y; + uint32_t posxprev = (curPos == 0) ? 0 : m_scanOrder[curPos - 1].x; + // encode runCopyFlag + bool identityFlag = !((runType.at(posx, posy) != runType.at(posxprev, posyprev)) + || ((runType.at(posx, posy) == PLT_RUN_INDEX) && (curPLTIdx.at(posx, posy) != curPLTIdx.at(posxprev, posyprev)))); + + const CtxSet& ctxSet = (prevRunType == PLT_RUN_INDEX)? Ctx::IdxRunModel: Ctx::CopyRunModel; + if ( curPos > 0 ) { - ref_idx ( pu, REF_PIC_LIST_0 ); - if ( pu.cu->affine ) + int dist = curPos - prevRunPos - 1; + const unsigned ctxId = DeriveCtx::CtxPltCopyFlag(prevRunType, dist); + runCopyFlag[curPos - minSubPos] = identityFlag; + m_BinEncoder.encodeBin( identityFlag, ctxSet( ctxId ) ); + DTRACE(g_trace_ctx, D_SYNTAX, "plt_copy_flag() bin=%d ctx=%d\n", identityFlag, ctxId); + } + // encode run_type + if ( !identityFlag || curPos == 0 ) + { + prevRunPos = curPos; + prevRunType = runType.at(posx, posy); + if (((posy == 0) && !cu.useRotation[compBegin]) || ((posx == 0) && cu.useRotation[compBegin])) { - mvd_coding( pu.mvdAffi[REF_PIC_LIST_0][0], affineMvdShift ); - mvd_coding( pu.mvdAffi[REF_PIC_LIST_0][1], affineMvdShift ); - if ( pu.cu->affineType == AFFINEMODEL_6PARAM ) + assert(runType.at(posx, posy) == PLT_RUN_INDEX); + } + else if (curPos != 0 && runType.at(posxprev, posyprev) == PLT_RUN_COPY) + { + assert(runType.at(posx, posy) == PLT_RUN_INDEX); + } + else + { + m_BinEncoder.encodeBin(runType.at(posx, posy), Ctx::RunTypeFlag()); + } + DTRACE(g_trace_ctx, D_SYNTAX, "plt_type_flag() bin=%d sp=%d\n", runType.at(posx, posy), curPos); + } + } + +// PLT index values - bypass coded + if (indexMaxSize > 1) + { + curPos = minSubPos; + for (; curPos < maxSubPos; curPos++) + { + uint32_t posy = m_scanOrder[curPos].y; + uint32_t posx = m_scanOrder[curPos].x; + if ( runCopyFlag[curPos - minSubPos] == 0 && runType.at(posx, posy) == PLT_RUN_INDEX) + { + writePLTIndex(cu, curPos, curPLTIdx, runType, indexMaxSize, compBegin); + DTRACE(g_trace_ctx, D_SYNTAX, "plt_idx_idc() value=%d sp=%d\n", curPLTIdx.at(posx, posy), curPos); + } + } + } + +// Quantized escape colors - bypass coded + uint32_t scaleX = getComponentScaleX(COMPONENT_Cb, sps.getChromaFormatIdc()); + uint32_t scaleY = getComponentScaleY(COMPONENT_Cb, sps.getChromaFormatIdc()); + for (int comp = compBegin; comp < (compBegin + numComp); comp++) + { + ComponentID compID = (ComponentID)comp; + for (curPos = minSubPos; curPos < maxSubPos; curPos++) + { + uint32_t posy = m_scanOrder[curPos].y; + uint32_t posx = m_scanOrder[curPos].x; + if (curPLTIdx.at(posx, posy) == cu.curPLTSize[compBegin]) + { + PLTescapeBuf escapeValue = tu.getescapeValue((ComponentID)comp); + if (compID == COMPONENT_Y || compBegin != COMPONENT_Y) + { + exp_golomb_eqprob((unsigned)escapeValue.at(posx, posy), 3); + DTRACE(g_trace_ctx, D_SYNTAX, "plt_escape_val() value=%d etype=%d sp=%d\n", escapeValue.at(posx, posy), comp, curPos); + } + if (compBegin == COMPONENT_Y && compID != COMPONENT_Y && posy % (1 << scaleY) == 0 && posx % (1 << scaleX) == 0) + { + uint32_t posxC = posx >> scaleX; + uint32_t posyC = posy >> scaleY; + exp_golomb_eqprob((unsigned)escapeValue.at(posxC, posyC), 3); + DTRACE(g_trace_ctx, D_SYNTAX, "plt_escape_val() value=%d etype=%d sp=%d\n", escapeValue.at(posx, posy), comp, curPos); + } + } + } + } +} +void CABACWriter::codeScanRotationModeFlag(const CodingUnit& cu, ComponentID compBegin) +{ + m_BinEncoder.encodeBin((cu.useRotation[compBegin]), Ctx::RotationFlag()); +} +void CABACWriter::xEncodePLTPredIndicator(const CodingUnit& cu, uint32_t maxPLTSize, ComponentID compBegin) +{ + int lastPredIdx = -1; + uint32_t run = 0; + uint32_t numPLTPredicted = 0; + for (uint32_t idx = 0; idx < cu.lastPLTSize[compBegin]; idx++) + { + if (cu.reuseflag[compBegin][idx]) + { + numPLTPredicted++; + lastPredIdx = idx; + } + } + + int idx = 0; + while (idx <= lastPredIdx) + { + if (cu.reuseflag[compBegin][idx]) + { + exp_golomb_eqprob(run ? run + 1 : run, 0); + run = 0; + } + else + { + run++; + } + idx++; + } + if ((numPLTPredicted < maxPLTSize && lastPredIdx + 1 < cu.lastPLTSize[compBegin]) || !numPLTPredicted) + { + exp_golomb_eqprob(1, 0); + } +} +Pel CABACWriter::writePLTIndex(const CodingUnit& cu, uint32_t idx, PelBuf& paletteIdx, PLTtypeBuf& paletteRunType, int maxSymbol, ComponentID compBegin) +{ + uint32_t posy = m_scanOrder[idx].y; + uint32_t posx = m_scanOrder[idx].x; + Pel curLevel = (paletteIdx.at(posx, posy) == cu.curPLTSize[compBegin]) ? (maxSymbol - 1) : paletteIdx.at(posx, posy); + if (idx) // R0348: remove index redundancy + { + uint32_t prevposy = m_scanOrder[idx - 1].y; + uint32_t prevposx = m_scanOrder[idx - 1].x; + if (paletteRunType.at(prevposx, prevposy) == PLT_RUN_INDEX) + { + Pel leftLevel = paletteIdx.at(prevposx, prevposy); // left index + if (leftLevel == cu.curPLTSize[compBegin]) // escape mode + { + leftLevel = maxSymbol - 1; + } + assert(leftLevel != curLevel); + if (curLevel > leftLevel) + { + curLevel--; + } + } + else + { + Pel aboveLevel; + if (cu.useRotation[compBegin]) + { + assert(prevposx > 0); + aboveLevel = paletteIdx.at(posx - 1, posy); + if (paletteIdx.at(posx - 1, posy) == cu.curPLTSize[compBegin]) // escape mode + { + aboveLevel = maxSymbol - 1; + } + } + else + { + assert(prevposy > 0); + aboveLevel = paletteIdx.at(posx, posy - 1); + if (paletteIdx.at(posx, posy - 1) == cu.curPLTSize[compBegin]) // escape mode + { + aboveLevel = maxSymbol - 1; + } + } + assert(curLevel != aboveLevel); + if (curLevel > aboveLevel) + { + curLevel--; + } + } + maxSymbol--; + } + assert(maxSymbol > 0); + assert(curLevel >= 0); + assert(maxSymbol > curLevel); + if (maxSymbol > 1) + { + xWriteTruncBinCode(curLevel, maxSymbol); + } + return curLevel; +} + + +//================================================================================ +// clause 7.3.8.6 +//-------------------------------------------------------------------------------- +// void prediction_unit ( pu ); +// void merge_flag ( pu ); +// void merge_idx ( pu ); +// void inter_pred_idc ( pu ); +// void ref_idx ( pu, refList ); +// void mvp_flag ( pu, refList ); +//================================================================================ + +void CABACWriter::prediction_unit( const PredictionUnit& pu ) +{ + CHECK( pu.cu->treeType == TREE_C, "cannot be chroma CU" ); +#if ENABLE_SPLIT_PARALLELISM + CHECK( pu.cacheUsed, "Processing a PU that should be in cache!" ); + CHECK( pu.cu->cacheUsed, "Processing a CU that should be in cache!" ); + +#endif + if( pu.cu->skip ) + { + CHECK( !pu.mergeFlag, "merge_flag must be true for skipped CUs" ); + } + else + { + merge_flag( pu ); + } + if( pu.mergeFlag ) + { + merge_data(pu); + } + else if (CU::isIBC(*pu.cu)) + { + ref_idx(pu, REF_PIC_LIST_0); + Mv mvd = pu.mvd[REF_PIC_LIST_0]; + mvd.changeIbcPrecInternal2Amvr(pu.cu->imv); + mvd_coding(mvd, 0); // already changed to signaling precision + if ( pu.cu->slice->getPicHeader()->getMaxNumIBCMergeCand() == 1 ) + { + CHECK( pu.mvpIdx[REF_PIC_LIST_0], "mvpIdx for IBC mode should be 0" ); + } + else + mvp_flag(pu, REF_PIC_LIST_0); + } + else + { + inter_pred_idc( pu ); + affine_flag ( *pu.cu ); + smvd_mode( pu ); + if( pu.interDir != 2 /* PRED_L1 */ ) + { + ref_idx ( pu, REF_PIC_LIST_0 ); + if ( pu.cu->affine ) + { + Mv mvd = pu.mvdAffi[REF_PIC_LIST_0][0]; + mvd.changeAffinePrecInternal2Amvr(pu.cu->imv); + mvd_coding(mvd, 0); // already changed to signaling precision + mvd = pu.mvdAffi[REF_PIC_LIST_0][1]; + mvd.changeAffinePrecInternal2Amvr(pu.cu->imv); + mvd_coding(mvd, 0); // already changed to signaling precision + if ( pu.cu->affineType == AFFINEMODEL_6PARAM ) { - mvd_coding( pu.mvdAffi[REF_PIC_LIST_0][2], affineMvdShift ); + mvd = pu.mvdAffi[REF_PIC_LIST_0][2]; + mvd.changeAffinePrecInternal2Amvr(pu.cu->imv); + mvd_coding(mvd, 0); // already changed to signaling precision } } else { - mvd_coding( pu.mvd[REF_PIC_LIST_0], pu.cu->imv ); + Mv mvd = pu.mvd[REF_PIC_LIST_0]; + mvd.changeTransPrecInternal2Amvr(pu.cu->imv); + mvd_coding(mvd, 0); // already changed to signaling precision } mvp_flag ( pu, REF_PIC_LIST_0 ); } @@ -1353,20 +1765,28 @@ void CABACWriter::prediction_unit( const PredictionUnit& pu ) if ( pu.cu->smvdMode != 1 ) { ref_idx ( pu, REF_PIC_LIST_1 ); - if( !pu.cs->slice->getMvdL1ZeroFlag() || pu.interDir != 3 /* PRED_BI */ ) + if( !pu.cs->picHeader->getMvdL1ZeroFlag() || pu.interDir != 3 /* PRED_BI */ ) { if ( pu.cu->affine ) { - mvd_coding( pu.mvdAffi[REF_PIC_LIST_1][0], affineMvdShift ); - mvd_coding( pu.mvdAffi[REF_PIC_LIST_1][1], affineMvdShift ); + Mv mvd = pu.mvdAffi[REF_PIC_LIST_1][0]; + mvd.changeAffinePrecInternal2Amvr(pu.cu->imv); + mvd_coding(mvd, 0); // already changed to signaling precision + mvd = pu.mvdAffi[REF_PIC_LIST_1][1]; + mvd.changeAffinePrecInternal2Amvr(pu.cu->imv); + mvd_coding(mvd, 0); // already changed to signaling precision if ( pu.cu->affineType == AFFINEMODEL_6PARAM ) { - mvd_coding( pu.mvdAffi[REF_PIC_LIST_1][2], affineMvdShift ); + mvd = pu.mvdAffi[REF_PIC_LIST_1][2]; + mvd.changeAffinePrecInternal2Amvr(pu.cu->imv); + mvd_coding(mvd, 0); // already changed to signaling precision } } else { - mvd_coding( pu.mvd[REF_PIC_LIST_1], pu.cu->imv ); + Mv mvd = pu.mvd[REF_PIC_LIST_1]; + mvd.changeTransPrecInternal2Amvr(pu.cu->imv); + mvd_coding(mvd, 0); // already changed to signaling precision } } } @@ -1394,15 +1814,11 @@ void CABACWriter::smvd_mode( const PredictionUnit& pu ) void CABACWriter::subblock_merge_flag( const CodingUnit& cu ) { - if ( cu.firstPU->mergeFlag && (cu.firstPU->mmvdMergeFlag || cu.mmvdSkip) ) - { - return; - } - if ( !cu.cs->slice->isIntra() && (cu.cs->sps->getUseAffine() || cu.cs->sps->getSBTMVPEnabledFlag()) && cu.lumaSize().width >= 8 && cu.lumaSize().height >= 8 ) + if ( !cu.cs->slice->isIntra() && (cu.slice->getPicHeader()->getMaxNumAffineMergeCand() > 0) && cu.lumaSize().width >= 8 && cu.lumaSize().height >= 8 ) { unsigned ctxId = DeriveCtx::CtxAffineFlag( cu ); - m_BinEncoder.encodeBin( cu.affine, Ctx::AffineFlag( ctxId ) ); + m_BinEncoder.encodeBin( cu.affine, Ctx::SubblockMergeFlag( ctxId ) ); DTRACE( g_trace_ctx, D_SYNTAX, "subblock_merge_flag() subblock_merge_flag=%d ctx=%d pos=(%d,%d)\n", cu.affine ? 1 : 0, ctxId, cu.Y().x, cu.Y().y ); } } @@ -1430,17 +1846,52 @@ void CABACWriter::merge_flag( const PredictionUnit& pu ) DTRACE( g_trace_ctx, D_SYNTAX, "merge_flag() merge=%d pos=(%d,%d) size=%dx%d\n", pu.mergeFlag ? 1 : 0, pu.lumaPos().x, pu.lumaPos().y, pu.lumaSize().width, pu.lumaSize().height ); - if (pu.mergeFlag && CU::isIBC(*pu.cu)) +} + +void CABACWriter::merge_data(const PredictionUnit& pu) +{ + if (CU::isIBC(*pu.cu)) + { + merge_idx(pu); + return; + } + subblock_merge_flag(*pu.cu); + if (pu.cu->affine) { + merge_idx(pu); return; } -#if !JVET_MMVD_OFF_MACRO - if (pu.mergeFlag) + const bool triangleAvailable = pu.cu->cs->slice->getSPS()->getUseTriangle() && pu.cu->cs->slice->isInterB() && pu.cu->cs->picHeader->getMaxNumTriangleCand() > 1; + const bool ciipAvailable = pu.cs->sps->getUseCiip() && !pu.cu->skip && pu.cu->lwidth() < MAX_CU_SIZE && pu.cu->lheight() < MAX_CU_SIZE; + if (pu.cu->lwidth() * pu.cu->lheight() >= 64 + && (triangleAvailable || ciipAvailable)) { - m_BinEncoder.encodeBin(pu.mmvdMergeFlag, Ctx::MmvdFlag(0)); - DTRACE(g_trace_ctx, D_SYNTAX, "mmvd_merge_flag() mmvd_merge=%d pos=(%d,%d) size=%dx%d\n", pu.mmvdMergeFlag ? 1 : 0, pu.lumaPos().x, pu.lumaPos().y, pu.lumaSize().width, pu.lumaSize().height); + m_BinEncoder.encodeBin(pu.regularMergeFlag, Ctx::RegularMergeFlag(pu.cu->skip ? 0 : 1)); + } + if (pu.regularMergeFlag) + { + if (pu.cs->sps->getUseMMVD()) + { + m_BinEncoder.encodeBin(pu.mmvdMergeFlag, Ctx::MmvdFlag(0)); + DTRACE(g_trace_ctx, D_SYNTAX, "mmvd_merge_flag() mmvd_merge=%d pos=(%d,%d) size=%dx%d\n", pu.mmvdMergeFlag ? 1 : 0, pu.lumaPos().x, pu.lumaPos().y, pu.lumaSize().width, pu.lumaSize().height); + } + if (pu.mmvdMergeFlag || pu.cu->mmvdSkip) + { + mmvd_merge_idx(pu); + } + else + { + merge_idx(pu); + } + } + else + { + if (triangleAvailable && ciipAvailable) + { + Ciip_flag(pu); + } + merge_idx(pu); } -#endif } void CABACWriter::imv_mode( const CodingUnit& cu ) @@ -1462,15 +1913,22 @@ void CABACWriter::imv_mode( const CodingUnit& cu ) return; } - unsigned ctxId = DeriveCtx::CtxIMVFlag( cu ); if (CU::isIBC(cu) == false) - m_BinEncoder.encodeBin( ( cu.imv > 0 ), Ctx::ImvFlag( ctxId ) ); - DTRACE( g_trace_ctx, D_SYNTAX, "imv_mode() value=%d ctx=%d\n", (cu.imv > 0), ctxId ); + m_BinEncoder.encodeBin( (cu.imv > 0), Ctx::ImvFlag( 0 ) ); + DTRACE( g_trace_ctx, D_SYNTAX, "imv_mode() value=%d ctx=%d\n", (cu.imv > 0), 0 ); if( sps->getAMVREnabledFlag() && cu.imv > 0 ) { - m_BinEncoder.encodeBin( ( cu.imv > 1 ), Ctx::ImvFlag( 3 ) ); - DTRACE( g_trace_ctx, D_SYNTAX, "imv_mode() value=%d ctx=%d\n", ( cu.imv > 1 ), 3 ); + if (!CU::isIBC(cu)) + { + m_BinEncoder.encodeBin(cu.imv < IMV_HPEL, Ctx::ImvFlag(4)); + DTRACE(g_trace_ctx, D_SYNTAX, "imv_mode() value=%d ctx=%d\n", cu.imv < 3, 4); + } + if (cu.imv < IMV_HPEL) + { + m_BinEncoder.encodeBin( (cu.imv > 1), Ctx::ImvFlag( 1 ) ); + DTRACE( g_trace_ctx, D_SYNTAX, "imv_mode() value=%d ctx=%d\n", (cu.imv > 1), 1 ); + } } DTRACE( g_trace_ctx, D_SYNTAX, "imv_mode() IMVFlag=%d\n", cu.imv ); @@ -1490,13 +1948,13 @@ void CABACWriter::affine_amvr_mode( const CodingUnit& cu ) return; } - m_BinEncoder.encodeBin( ( cu.imv > 0 ), Ctx::ImvFlag( 4 ) ); - DTRACE( g_trace_ctx, D_SYNTAX, "affine_amvr_mode() value=%d ctx=%d\n", ( cu.imv > 0 ), 4 ); + m_BinEncoder.encodeBin( (cu.imv > 0), Ctx::ImvFlag( 2 ) ); + DTRACE( g_trace_ctx, D_SYNTAX, "affine_amvr_mode() value=%d ctx=%d\n", (cu.imv > 0), 2 ); if( cu.imv > 0 ) { - m_BinEncoder.encodeBin( ( cu.imv > 1 ), Ctx::ImvFlag( 5 ) ); - DTRACE( g_trace_ctx, D_SYNTAX, "affine_amvr_mode() value=%d ctx=%d\n", ( cu.imv > 1 ), 5 ); + m_BinEncoder.encodeBin( (cu.imv > 1), Ctx::ImvFlag( 3 ) ); + DTRACE( g_trace_ctx, D_SYNTAX, "affine_amvr_mode() value=%d ctx=%d\n", (cu.imv > 1), 3 ); } DTRACE( g_trace_ctx, D_SYNTAX, "affine_amvr_mode() IMVFlag=%d\n", cu.imv ); } @@ -1506,7 +1964,7 @@ void CABACWriter::merge_idx( const PredictionUnit& pu ) if ( pu.cu->affine ) { - int numCandminus1 = int( pu.cs->slice->getMaxNumAffineMergeCand() ) - 1; + int numCandminus1 = int( pu.cs->picHeader->getMaxNumAffineMergeCand() ) - 1; if ( numCandminus1 > 0 ) { if ( pu.mergeIdx == 0 ) @@ -1543,6 +2001,11 @@ void CABACWriter::merge_idx( const PredictionUnit& pu ) candIdx1 -= candIdx1 < candIdx0 ? 0 : 1; auto encodeOneIdx = [this](uint8_t mrgIdx, int numCandminus1) { + if (numCandminus1 == 0) + { + CHECK(mrgIdx, "Incorrect index!"); + return; + } if(mrgIdx == 0) { this->m_BinEncoder.encodeBin( 0, Ctx::MergeIdx() ); @@ -1562,11 +2025,19 @@ void CABACWriter::merge_idx( const PredictionUnit& pu ) } }; m_BinEncoder.encodeBinEP(splitDir); - encodeOneIdx(candIdx0, TRIANGLE_MAX_NUM_UNI_CANDS - 1); - encodeOneIdx(candIdx1, TRIANGLE_MAX_NUM_UNI_CANDS - 2); + const int maxNumTriangleCand = pu.cs->picHeader->getMaxNumTriangleCand(); + CHECK(maxNumTriangleCand < 2, "Incorrect max number of triangle candidates"); + CHECK(candIdx0 >= maxNumTriangleCand, "Incorrect candIdx0"); + CHECK(candIdx1 >= maxNumTriangleCand, "Incorrect candIdx1"); + encodeOneIdx(candIdx0, maxNumTriangleCand - 1); + encodeOneIdx(candIdx1, maxNumTriangleCand - 2); return; } - int numCandminus1 = int( pu.cs->slice->getMaxNumMergeCand() ) - 1; + int numCandminus1; + if (pu.cu->predMode == MODE_IBC) + numCandminus1 = int(pu.cs->picHeader->getMaxNumIBCMergeCand()) - 1; + else + numCandminus1 = int(pu.cs->picHeader->getMaxNumMergeCand()) - 1; if( numCandminus1 > 0 ) { if( pu.mergeIdx == 0 ) @@ -1599,25 +2070,11 @@ void CABACWriter::mmvd_merge_idx(const PredictionUnit& pu) var1 = (mvpIdx - (var0 * MMVD_MAX_REFINE_NUM)) / 4; var2 = mvpIdx - (var0 * MMVD_MAX_REFINE_NUM) - var1 * 4; - int numCandminus1_base = MMVD_BASE_MV_NUM - 1; - if (numCandminus1_base > 0) + if (pu.cs->picHeader->getMaxNumMergeCand() > 1) { - if (var0 == 0) - { - m_BinEncoder.encodeBin(0, Ctx::MmvdMergeIdx()); - } - else - { - m_BinEncoder.encodeBin(1, Ctx::MmvdMergeIdx()); - for (unsigned idx = 1; idx < numCandminus1_base; idx++) - { - m_BinEncoder.encodeBinEP(var0 == idx ? 0 : 1); - if (var0 == idx) - { - break; - } - } - } + static_assert(MMVD_BASE_MV_NUM == 2, ""); + assert(var0 < 2); + m_BinEncoder.encodeBin(var0, Ctx::MmvdMergeIdx()); } DTRACE(g_trace_ctx, D_SYNTAX, "base_mvp_idx() base_mvp_idx=%d\n", var0); @@ -1668,8 +2125,8 @@ void CABACWriter::inter_pred_idc( const PredictionUnit& pu ) m_BinEncoder.encodeBin( 0, Ctx::InterDir(ctxId) ); } } - m_BinEncoder.encodeBin( ( pu.interDir == 2 ), Ctx::InterDir( 4 ) ); - DTRACE( g_trace_ctx, D_SYNTAX, "inter_pred_idc() ctx=4 value=%d pos=(%d,%d)\n", pu.interDir, pu.lumaPos().x, pu.lumaPos().y ); + m_BinEncoder.encodeBin( ( pu.interDir == 2 ), Ctx::InterDir( 5 ) ); + DTRACE( g_trace_ctx, D_SYNTAX, "inter_pred_idc() ctx=5 value=%d pos=(%d,%d)\n", pu.interDir, pu.lumaPos().x, pu.lumaPos().y ); } @@ -1728,152 +2185,23 @@ void CABACWriter::mvp_flag( const PredictionUnit& pu, RefPicList eRefList ) DTRACE( g_trace_ctx, D_SYNTAX, "mvpIdx(refList:%d)=%d\n", eRefList, pu.mvpIdx[eRefList] ); } -void CABACWriter::MHIntra_flag(const PredictionUnit& pu) +void CABACWriter::Ciip_flag(const PredictionUnit& pu) { - if (!pu.cs->sps->getUseMHIntra()) + if (!pu.cs->sps->getUseCiip()) { - CHECK(pu.mhIntraFlag == true, "invalid MHIntra SPS"); + CHECK(pu.ciipFlag == true, "invalid Ciip SPS"); return; } if (pu.cu->skip) { - CHECK(pu.mhIntraFlag == true, "invalid MHIntra and skip"); - return; - } - if (pu.mmvdMergeFlag) - { - CHECK(pu.mhIntraFlag == true, "invalid MHIntra and mmvd"); - return; - } - if (pu.cu->affine) - { - CHECK(pu.mhIntraFlag == true, "invalid MHIntra and affine"); - return; - } - if (pu.cu->lwidth() * pu.cu->lheight() < 64 || pu.cu->lwidth() >= MAX_CU_SIZE || pu.cu->lheight() >= MAX_CU_SIZE) - { - CHECK(pu.mhIntraFlag == true, "invalid MHIntra and blk"); - return; - } - m_BinEncoder.encodeBin(pu.mhIntraFlag, Ctx::MHIntraFlag()); - DTRACE(g_trace_ctx, D_SYNTAX, "MHIntra_flag() MHIntra=%d pos=(%d,%d) size=%dx%d\n", pu.mhIntraFlag ? 1 : 0, pu.lumaPos().x, pu.lumaPos().y, pu.lumaSize().width, pu.lumaSize().height); -} - -void CABACWriter::MHIntra_luma_pred_modes(const CodingUnit& cu) -{ - if (!cu.Y().valid()) - { - return; - } - - const int numMPMs = 3; - int numBlocks = CU::getNumPUs(cu); - unsigned mpm_idxs[4]; - unsigned pred_modes[4]; - - const PredictionUnit* pu = cu.firstPU; - - unsigned mpm_pred[numMPMs]; - for (int k = 0; k < numBlocks; k++) - { - unsigned& mpm_idx = mpm_idxs[k]; - unsigned& pred_mode = pred_modes[k]; - - PU::getMHIntraMPMs(*pu, mpm_pred); - - pred_mode = pu->intraDir[0]; - - mpm_idx = numMPMs; - - for (int idx = 0; idx < numMPMs; idx++) - { - if (pred_mode == mpm_pred[idx]) - { - mpm_idx = idx; - break; - } - } - if (PU::getNarrowShape(pu->lwidth(), pu->lheight()) == 0) - { - m_BinEncoder.encodeBin(mpm_idx < numMPMs, Ctx::MHIntraPredMode()); - } - pu = pu->next; - } - - pu = cu.firstPU; - - // mpm_idx / rem_intra_luma_pred_mode - for (int k = 0; k < numBlocks; k++) - { - const unsigned& mpm_idx = mpm_idxs[k]; - if (mpm_idx < numMPMs) - { - m_BinEncoder.encodeBinEP(mpm_idx > 0); - if (mpm_idx) - { - m_BinEncoder.encodeBinEP(mpm_idx > 1); - } - } - DTRACE(g_trace_ctx, D_SYNTAX, "intra_luma_pred_modes() idx=%d pos=(%d,%d) mode=%d\n", k, pu->lumaPos().x, pu->lumaPos().y, pu->intraDir[0]); - pu = pu->next; - } -} - -void CABACWriter::triangle_mode( const CodingUnit& cu ) -{ - if( !cu.cs->slice->getSPS()->getUseTriangle() || !cu.cs->slice->isInterB() || cu.lwidth() * cu.lheight() < TRIANGLE_MIN_SIZE || cu.affine ) - { - return; - } - - if ( cu.firstPU->mmvdMergeFlag || cu.mmvdSkip ) - { - return; - } - - if ( cu.firstPU->mhIntraFlag ) - { + CHECK(pu.ciipFlag == true, "invalid Ciip and skip"); return; } - - unsigned flag_idx = DeriveCtx::CtxTriangleFlag( cu ); - - m_BinEncoder.encodeBin( cu.triangle, Ctx::TriangleFlag(flag_idx) ); - - DTRACE( g_trace_ctx, D_SYNTAX, "triangle_mode() triangle_mode=%d pos=(%d,%d) size: %dx%d\n", cu.triangle, cu.Y().x, cu.Y().y, cu.lumaSize().width, cu.lumaSize().height ); + m_BinEncoder.encodeBin(pu.ciipFlag, Ctx::CiipFlag()); + DTRACE(g_trace_ctx, D_SYNTAX, "Ciip_flag() Ciip=%d pos=(%d,%d) size=%dx%d\n", pu.ciipFlag ? 1 : 0, pu.lumaPos().x, pu.lumaPos().y, pu.lumaSize().width, pu.lumaSize().height); } -//================================================================================ -// clause 7.3.8.7 -//-------------------------------------------------------------------------------- -// void pcm_samples( tu ) -//================================================================================ -void CABACWriter::pcm_samples( const TransformUnit& tu ) -{ - CHECK( !tu.cu->ipcm, "pcm mode expected" ); - - const SPS& sps = *tu.cu->cs->sps; - - const CodingStructure *cs = tu.cs; - const ChannelType chType = tu.chType; - - ComponentID compStr = (CS::isDualITree(*cs) && !isLuma(chType)) ? COMPONENT_Cb: COMPONENT_Y; - ComponentID compEnd = (CS::isDualITree(*cs) && isLuma(chType)) ? COMPONENT_Y : COMPONENT_Cr; - for( ComponentID compID = compStr; compID <= compEnd; compID = ComponentID(compID+1) ) - { - const CPelBuf samples = tu.getPcmbuf( compID ); - const unsigned sampleBits = sps.getPCMBitDepth( toChannelType(compID) ); - for( unsigned y = 0; y < samples.height; y++ ) - { - for( unsigned x = 0; x < samples.width; x++ ) - { - m_BinEncoder.encodeBinsPCM( samples.at(x, y), sampleBits ); - } - } - } - m_BinEncoder.restart(); -} @@ -1884,19 +2212,14 @@ void CABACWriter::pcm_samples( const TransformUnit& tu ) // bool split_transform_flag( split, depth ) // bool cbf_comp ( cbf, area, depth ) //================================================================================ - -void CABACWriter::transform_tree( const CodingStructure& cs, Partitioner& partitioner, CUCtx& cuCtx, ChromaCbfs& chromaCbfs, const PartSplit ispType, const int subTuIdx ) +void CABACWriter::transform_tree( const CodingStructure& cs, Partitioner& partitioner, CUCtx& cuCtx, const PartSplit ispType, const int subTuIdx ) { - ChromaCbfs chromaCbfsLastDepth; - chromaCbfsLastDepth.Cb = chromaCbfs.Cb; - chromaCbfsLastDepth.Cr = chromaCbfs.Cr; - const UnitArea& area = partitioner.currArea(); - int subTuCounter = subTuIdx; - const TransformUnit& tu = *cs.getTU( area.blocks[partitioner.chType].pos(), partitioner.chType, subTuIdx ); - const CodingUnit& cu = *tu.cu; - const unsigned trDepth = partitioner.currTrDepth; - const bool split = ( tu.depth > trDepth ); - const bool chromaCbfISP = area.blocks[COMPONENT_Cb].valid() && cu.ispMode && !split; + const UnitArea& area = partitioner.currArea(); + int subTuCounter = subTuIdx; + const TransformUnit& tu = *cs.getTU(area.blocks[partitioner.chType].pos(), partitioner.chType, subTuIdx); + const CodingUnit& cu = *tu.cu; + const unsigned trDepth = partitioner.currTrDepth; + const bool split = (tu.depth > trDepth); // split_transform_flag if( partitioner.canSplit( TU_MAX_TR_SPLIT, cs ) ) @@ -1911,46 +2234,8 @@ void CABACWriter::transform_tree( const CodingStructure& cs, Partitioner& partit CHECK( split && !cu.ispMode, "transform split not allowed with QTBT" ); - // cbf_cb & cbf_cr - if( area.chromaFormat != CHROMA_400 && area.blocks[COMPONENT_Cb].valid() && ( !CS::isDualITree( cs ) || partitioner.chType == CHANNEL_TYPE_CHROMA ) && ( !cu.ispMode || chromaCbfISP ) ) - { - { - unsigned cbfDepth = chromaCbfISP ? trDepth - 1 : trDepth; - if( trDepth == 0 || chromaCbfs.Cb || chromaCbfISP ) - { - chromaCbfs.Cb = TU::getCbfAtDepth( tu, COMPONENT_Cb, trDepth ); - if( !( cu.sbtInfo && trDepth == 1 ) ) - cbf_comp( cs, chromaCbfs.Cb, area.blocks[COMPONENT_Cb], cbfDepth ); - } - else - { - CHECK( TU::getCbfAtDepth( tu, COMPONENT_Cb, cbfDepth ) != chromaCbfs.Cb, "incorrect Cb cbf" ); - } - - if( trDepth == 0 || chromaCbfs.Cr || chromaCbfISP ) - { - chromaCbfs.Cr = TU::getCbfAtDepth( tu, COMPONENT_Cr, trDepth ); - if( !( cu.sbtInfo && trDepth == 1 ) ) - cbf_comp( cs, chromaCbfs.Cr, area.blocks[COMPONENT_Cr], cbfDepth, chromaCbfs.Cb ); - } - else - { - CHECK( TU::getCbfAtDepth( tu, COMPONENT_Cr, cbfDepth ) != chromaCbfs.Cr, "incorrect Cr cbf" ); - } - } - } - else if( CS::isDualITree( cs ) ) - { - chromaCbfs = ChromaCbfs( false ); - } - if( split ) { - if( area.chromaFormat != CHROMA_400 ) - { - chromaCbfs.Cb = TU::getCbfAtDepth( tu, COMPONENT_Cb, trDepth ); - chromaCbfs.Cr = TU::getCbfAtDepth( tu, COMPONENT_Cr, trDepth ); - } if( partitioner.canSplit( TU_MAX_TR_SPLIT, cs ) ) { @@ -1974,8 +2259,7 @@ void CABACWriter::transform_tree( const CodingStructure& cs, Partitioner& partit do { - ChromaCbfs subChromaCbfs = chromaCbfs; - transform_tree( cs, partitioner, cuCtx, subChromaCbfs, ispType, subTuCounter ); + transform_tree( cs, partitioner, cuCtx, ispType, subTuCounter ); subTuCounter += subTuCounter != -1 ? 1 : 0; } while( partitioner.nextPart( cs ) ); @@ -1985,65 +2269,30 @@ void CABACWriter::transform_tree( const CodingStructure& cs, Partitioner& partit { DTRACE( g_trace_ctx, D_SYNTAX, "transform_unit() pos=(%d,%d) size=%dx%d depth=%d trDepth=%d\n", tu.blocks[tu.chType].x, tu.blocks[tu.chType].y, tu.blocks[tu.chType].width, tu.blocks[tu.chType].height, cu.depth, partitioner.currTrDepth ); - if( !isChroma( partitioner.chType ) ) - { - if( !CU::isIntra( cu ) && trDepth == 0 && !chromaCbfs.sigChroma( area.chromaFormat ) ) - { - CHECK( !TU::getCbfAtDepth( tu, COMPONENT_Y, trDepth ), "Luma cbf must be true for inter units with no chroma coeffs" ); - } - else if( cu.sbtInfo && tu.noResidual ) - { - CHECK( TU::getCbfAtDepth( tu, COMPONENT_Y, trDepth ), "Luma cbf must be false for inter sbt no-residual tu" ); - } - else if( cu.sbtInfo && !chromaCbfsLastDepth.sigChroma( area.chromaFormat ) ) - { - assert( !tu.noResidual ); - CHECK( !TU::getCbfAtDepth( tu, COMPONENT_Y, trDepth ), "Luma cbf must be true for inter sbt residual tu" ); - } - else - { - bool previousCbf = false; - bool rootCbfSoFar = false; - bool lastCbfIsInferred = false; - if( cu.ispMode ) - { - uint32_t nTus = cu.ispMode == HOR_INTRA_SUBPARTITIONS ? cu.lheight() >> g_aucLog2[tu.lheight()] : cu.lwidth() >> g_aucLog2[tu.lwidth()]; - if( subTuCounter == nTus - 1 ) - { - TransformUnit* tuPointer = cu.firstTU; - for( int tuIdx = 0; tuIdx < subTuCounter; tuIdx++ ) - { - rootCbfSoFar |= TU::getCbfAtDepth( *tuPointer, COMPONENT_Y, trDepth ); - tuPointer = tuPointer->next; - } - if( !rootCbfSoFar ) - { - lastCbfIsInferred = true; - } - } - if( !lastCbfIsInferred ) - { - previousCbf = TU::getPrevTuCbfAtDepth( tu, COMPONENT_Y, partitioner.currTrDepth ); - } - } - if( !lastCbfIsInferred ) - { - cbf_comp( cs, TU::getCbfAtDepth( tu, COMPONENT_Y, trDepth ), tu.Y(), trDepth, previousCbf, cu.ispMode ); - } - } - } - - - transform_unit( tu, cuCtx, chromaCbfs ); + transform_unit( tu, cuCtx, partitioner, subTuCounter); } } -void CABACWriter::cbf_comp( const CodingStructure& cs, bool cbf, const CompArea& area, unsigned depth, const bool prevCbCbf, const bool useISP ) +void CABACWriter::cbf_comp( const CodingStructure& cs, bool cbf, const CompArea& area, unsigned depth, const bool prevCbf, const bool useISP ) { - const unsigned ctxId = DeriveCtx::CtxQtCbf( area.compID, depth, prevCbCbf, useISP && isLuma(area.compID) ); + unsigned ctxId = DeriveCtx::CtxQtCbf(area.compID, prevCbf, useISP && isLuma(area.compID)); const CtxSet& ctxSet = Ctx::QtCbf[ area.compID ]; + if ((area.compID == COMPONENT_Y && cs.getCU(area.pos(), ChannelType(area.compID))->bdpcmMode) + || (area.compID != COMPONENT_Y && cs.getCU(area.pos(), ChannelType(area.compID)) != NULL && cs.getCU(area.pos(), ChannelType(area.compID))->bdpcmModeChroma)) + { + if (area.compID == COMPONENT_Y) + ctxId = 1; + else if (area.compID == COMPONENT_Cb) + ctxId = 1; + else + ctxId = 2; + m_BinEncoder.encodeBin(cbf, ctxSet(ctxId)); + } + else + { m_BinEncoder.encodeBin( cbf, ctxSet( ctxId ) ); + } DTRACE( g_trace_ctx, D_SYNTAX, "cbf_comp() etype=%d pos=(%d,%d) ctx=%d cbf=%d\n", area.compID, area.x, area.y, ctxId, cbf ); } @@ -2062,14 +2311,20 @@ void CABACWriter::mvd_coding( const Mv &rMvd, int8_t imv ) int verMvd = rMvd.getVer(); if ( imv > 0 ) { - CHECK( (horMvd % 4) != 0 && (verMvd % 4) != 0, "IMV: MVD is not a multiple of 4" ); - horMvd >>= 2; - verMvd >>= 2; - if( imv == 2 )//IMV_4PEL + CHECK((horMvd % 2) != 0 && (verMvd % 2) != 0, "IMV: MVD is not a multiple of 2"); + horMvd >>= 1; + verMvd >>= 1; + if (imv < IMV_HPEL) { - CHECK( (horMvd % 4) != 0 && (verMvd % 4) != 0, "IMV: MVD is not a multiple of 8" ); - horMvd >>= 2; - verMvd >>= 2; + CHECK((horMvd % 2) != 0 && (verMvd % 2) != 0, "IMV: MVD is not a multiple of 4"); + horMvd >>= 1; + verMvd >>= 1; + if (imv == IMV_4PEL)//IMV_4PEL + { + CHECK((horMvd % 4) != 0 && (verMvd % 4) != 0, "IMV: MVD is not a multiple of 16"); + horMvd >>= 2; + verMvd >>= 2; + } } } unsigned horAbs = unsigned( horMvd < 0 ? -horMvd : horMvd ); @@ -2095,7 +2350,7 @@ void CABACWriter::mvd_coding( const Mv &rMvd, int8_t imv ) { if( horAbs > 1 ) { - exp_golomb_eqprob( horAbs - 2, 1 ); + m_BinEncoder.encodeRemAbsEP(horAbs - 2, 1, 0, MV_BITS - 1); } m_BinEncoder.encodeBinEP( (horMvd < 0) ); } @@ -2103,7 +2358,7 @@ void CABACWriter::mvd_coding( const Mv &rMvd, int8_t imv ) { if( verAbs > 1 ) { - exp_golomb_eqprob( verAbs - 2, 1 ); + m_BinEncoder.encodeRemAbsEP(verAbs - 2, 1, 0, MV_BITS - 1); } m_BinEncoder.encodeBinEP( (verMvd < 0) ); } @@ -2119,10 +2374,90 @@ void CABACWriter::mvd_coding( const Mv &rMvd, int8_t imv ) // void cu_qp_delta ( cu ) // void cu_chroma_qp_offset ( cu ) //================================================================================ - -void CABACWriter::transform_unit( const TransformUnit& tu, CUCtx& cuCtx, ChromaCbfs& chromaCbfs ) +void CABACWriter::transform_unit( const TransformUnit& tu, CUCtx& cuCtx, Partitioner& partitioner, const int subTuCounter) { - CodingUnit& cu = *tu.cu; + const CodingStructure& cs = *tu.cs; + const CodingUnit& cu = *tu.cu; + const UnitArea& area = partitioner.currArea(); + const unsigned trDepth = partitioner.currTrDepth; + const bool chromaCbfISP = area.blocks[COMPONENT_Cb].valid() && cu.ispMode; + ChromaCbfs chromaCbfs; + CHECK(tu.depth != trDepth, " transform unit should be not be futher partitioned"); + + // cbf_cb & cbf_cr + if (area.chromaFormat != CHROMA_400 && area.blocks[COMPONENT_Cb].valid() && (!cu.isSepTree() || partitioner.chType == CHANNEL_TYPE_CHROMA) && (!cu.ispMode || chromaCbfISP)) + { + { + unsigned cbfDepth = chromaCbfISP ? trDepth - 1 : trDepth; + { + chromaCbfs.Cb = TU::getCbfAtDepth(tu, COMPONENT_Cb, trDepth); + //if (!(cu.sbtInfo && trDepth == 1)) + if (!(cu.sbtInfo && tu.noResidual)) + cbf_comp(cs, chromaCbfs.Cb, area.blocks[COMPONENT_Cb], cbfDepth); + } + + { + chromaCbfs.Cr = TU::getCbfAtDepth(tu, COMPONENT_Cr, trDepth); + //if (!(cu.sbtInfo && trDepth == 1)) + if (!(cu.sbtInfo && tu.noResidual)) + cbf_comp(cs, chromaCbfs.Cr, area.blocks[COMPONENT_Cr], cbfDepth, chromaCbfs.Cb); + } + } + } + else if (cu.isSepTree()) + { + chromaCbfs = ChromaCbfs(false); + } + + if (!isChroma(partitioner.chType)) + { + if (!CU::isIntra(cu) && trDepth == 0 && !chromaCbfs.sigChroma(area.chromaFormat)) + { + CHECK(!TU::getCbfAtDepth(tu, COMPONENT_Y, trDepth), "Luma cbf must be true for inter units with no chroma coeffs"); + } + else if (cu.sbtInfo && tu.noResidual) + { + CHECK(TU::getCbfAtDepth(tu, COMPONENT_Y, trDepth), "Luma cbf must be false for inter sbt no-residual tu"); + } + else if (cu.sbtInfo && !chromaCbfs.sigChroma(area.chromaFormat)) + { + assert(!tu.noResidual); + CHECK(!TU::getCbfAtDepth(tu, COMPONENT_Y, trDepth), "Luma cbf must be true for inter sbt residual tu"); + } + else + { + bool lumaCbfIsInferredACT = (cu.colorTransform && cu.predMode == MODE_INTRA && trDepth == 0 && !chromaCbfs.sigChroma(area.chromaFormat)); + CHECK(lumaCbfIsInferredACT && !TU::getCbfAtDepth(tu, COMPONENT_Y, trDepth), "adaptive color transform cannot have all zero coefficients"); + bool lastCbfIsInferred = lumaCbfIsInferredACT; // ISP and ACT are mutually exclusive + bool previousCbf = false; + bool rootCbfSoFar = false; + if (cu.ispMode) + { + uint32_t nTus = cu.ispMode == HOR_INTRA_SUBPARTITIONS ? cu.lheight() >> floorLog2(tu.lheight()) : cu.lwidth() >> floorLog2(tu.lwidth()); + if (subTuCounter == nTus - 1) + { + TransformUnit* tuPointer = cu.firstTU; + for (int tuIdx = 0; tuIdx < subTuCounter; tuIdx++) + { + rootCbfSoFar |= TU::getCbfAtDepth(*tuPointer, COMPONENT_Y, trDepth); + tuPointer = tuPointer->next; + } + if (!rootCbfSoFar) + { + lastCbfIsInferred = true; + } + } + if (!lastCbfIsInferred) + { + previousCbf = TU::getPrevTuCbfAtDepth(tu, COMPONENT_Y, partitioner.currTrDepth); + } + } + if (!lastCbfIsInferred) + { + cbf_comp(cs, TU::getCbfAtDepth(tu, COMPONENT_Y, trDepth), tu.Y(), trDepth, previousCbf, cu.ispMode); + } + } + } bool lumaOnly = ( cu.chromaFormat == CHROMA_400 || !tu.blocks[COMPONENT_Cb].valid() ); bool cbf[3] = { TU::getCbf( tu, COMPONENT_Y ), chromaCbfs.Cb, chromaCbfs.Cr }; bool cbfLuma = ( cbf[ COMPONENT_Y ] != 0 ); @@ -2137,25 +2472,37 @@ void CABACWriter::transform_unit( const TransformUnit& tu, CUCtx& cuCtx, ChromaC } cbfChroma = ( cbf[ COMPONENT_Cb ] || cbf[ COMPONENT_Cr ] ); } - if( cbfLuma || cbfChroma ) + + if( ( cu.lwidth() > 64 || cu.lheight() > 64 || cbfLuma || cbfChroma ) && + (!tu.cu->isSepTree() || isLuma(tu.chType)) ) { if( cu.cs->pps->getUseDQP() && !cuCtx.isDQPCoded ) { - if (!CS::isDualITree(*tu.cs) || isLuma(tu.chType)) - { - cu_qp_delta(cu, cuCtx.qp, cu.qp); - cuCtx.qp = cu.qp; - cuCtx.isDQPCoded = true; - } + cu_qp_delta(cu, cuCtx.qp, cu.qp); + cuCtx.qp = cu.qp; + cuCtx.isDQPCoded = true; } - if( cu.cs->slice->getUseChromaQpAdj() && cbfChroma && !cu.transQuantBypass && !cuCtx.isChromaQpAdjCoded ) + } + if (!cu.isSepTree() || isChroma(tu.chType)) // !DUAL_TREE_LUMA + { + SizeType channelWidth = !cu.isSepTree() ? cu.lwidth() : cu.chromaSize().width; + SizeType channelHeight = !cu.isSepTree() ? cu.lheight() : cu.chromaSize().height; + + if (cu.cs->slice->getUseChromaQpAdj() && (channelWidth > 64 || channelHeight > 64 || cbfChroma) && !cuCtx.isChromaQpAdjCoded) { - cu_chroma_qp_offset( cu ); + cu_chroma_qp_offset(cu); cuCtx.isChromaQpAdjCoded = true; } + } + + if( !lumaOnly ) + { + joint_cb_cr( tu, ( cbf[COMPONENT_Cb] ? 2 : 0 ) + ( cbf[COMPONENT_Cr] ? 1 : 0 ) ); + } + if( cbfLuma ) { - residual_coding( tu, COMPONENT_Y ); + residual_coding( tu, COMPONENT_Y, &cuCtx ); } if( !lumaOnly ) { @@ -2167,8 +2514,7 @@ void CABACWriter::transform_unit( const TransformUnit& tu, CUCtx& cuCtx, ChromaC } if( cbf[ compID ] ) { - residual_coding( tu, compID ); - } + residual_coding( tu, compID, &cuCtx ); } } } @@ -2208,7 +2554,7 @@ void CABACWriter::cu_chroma_qp_offset( const CodingUnit& cu ) else { m_BinEncoder.encodeBin( 1, Ctx::ChromaQpAdjFlag() ); - int length = cu.cs->pps->getPpsRangeExtension().getChromaQpOffsetListLen(); + int length = cu.cs->pps->getChromaQpOffsetListLen(); if( length > 1 ) { unary_max_symbol( qpAdj-1, Ctx::ChromaQpAdjIdc(), Ctx::ChromaQpAdjIdc(), length-1 ); @@ -2230,19 +2576,41 @@ void CABACWriter::cu_chroma_qp_offset( const CodingUnit& cu ) // void residual_coding_subblock( coeffCtx ) //================================================================================ -void CABACWriter::residual_coding( const TransformUnit& tu, ComponentID compID ) +void CABACWriter::joint_cb_cr( const TransformUnit& tu, const int cbfMask ) +{ + if ( !tu.cu->slice->getSPS()->getJointCbCrEnabledFlag() ) + { + return; + } + + CHECK( tu.jointCbCr && tu.jointCbCr != cbfMask, "wrong value of jointCbCr (" << (int)tu.jointCbCr << " vs " << (int)cbfMask << ")" ); + if( ( CU::isIntra( *tu.cu ) && cbfMask ) || ( cbfMask == 3 ) ) + { + m_BinEncoder.encodeBin( tu.jointCbCr ? 1 : 0, Ctx::JointCbCrFlag( cbfMask - 1 ) ); + } +} + +void CABACWriter::residual_coding( const TransformUnit& tu, ComponentID compID, CUCtx* cuCtx ) { const CodingUnit& cu = *tu.cu; DTRACE( g_trace_ctx, D_SYNTAX, "residual_coding() etype=%d pos=(%d,%d) size=%dx%d predMode=%d\n", tu.blocks[compID].compID, tu.blocks[compID].x, tu.blocks[compID].y, tu.blocks[compID].width, tu.blocks[compID].height, cu.predMode ); + if( compID == COMPONENT_Cr && tu.jointCbCr == 3 ) + return; + // code transform skip and explicit rdpcm mode - mts_coding ( tu, compID ); + ts_flag ( tu, compID ); explicit_rdpcm_mode( tu, compID ); -#if HEVC_USE_SIGN_HIDING + if (tu.mtsIdx[compID] == MTS_SKIP) + { + residual_codingTS( tu, compID ); + return; + } + // determine sign hiding - bool signHiding = ( cu.cs->slice->getSignDataHidingEnabledFlag() && !cu.transQuantBypass && tu.rdpcm[compID] == RDPCM_OFF ); - if( signHiding && CU::isIntra(cu) && CU::isRDPCMEnabled(cu) && tu.mtsIdx==1 ) + bool signHiding = ( cu.cs->picHeader->getSignDataHidingEnabledFlag() && tu.rdpcm[compID] == RDPCM_OFF ); + if( signHiding && CU::isIntra(cu) && CU::isRDPCMEnabled(cu) && tu.mtsIdx[compID] == MTS_SKIP) { const ChannelType chType = toChannelType( compID ); const unsigned intraMode = PU::getFinalIntraMode( *cu.cs->getPU( tu.blocks[compID].pos(), chType ), chType ); @@ -2251,14 +2619,9 @@ void CABACWriter::residual_coding( const TransformUnit& tu, ComponentID compID ) signHiding = false; } } -#endif // init coeff coding context -#if HEVC_USE_SIGN_HIDING CoeffCodingContext cctx ( tu, compID, signHiding ); -#else - CoeffCodingContext cctx ( tu, compID ); -#endif const TCoeff* coeff = tu.getCoeffs( compID ).buf; // determine and set last coeff position and sig group flags @@ -2276,17 +2639,38 @@ void CABACWriter::residual_coding( const TransformUnit& tu, ComponentID compID ) CHECK( scanPosLast < 0, "Coefficient coding called for empty TU" ); cctx.setScanPosLast(scanPosLast); + if (cuCtx && tu.mtsIdx[compID] != MTS_SKIP && tu.blocks[compID].height >= 4 && tu.blocks[compID].width >= 4) + { + const int maxLfnstPos = ((tu.blocks[compID].height == 4 && tu.blocks[compID].width == 4) || (tu.blocks[compID].height == 8 && tu.blocks[compID].width == 8)) ? 7 : 15; + cuCtx->violatesLfnstConstrained[ toChannelType(compID) ] |= cctx.scanPosLast() > maxLfnstPos; + } + if (cuCtx && tu.mtsIdx[compID] != MTS_SKIP && tu.blocks[compID].height >= 4 && tu.blocks[compID].width >= 4) + { + const int lfnstLastScanPosTh = isLuma( compID ) ? LFNST_LAST_SIG_LUMA : LFNST_LAST_SIG_CHROMA; + cuCtx->lfnstLastScanPos |= cctx.scanPosLast() >= lfnstLastScanPosTh; + } +#if !JVET_Q0055_MTS_SIGNALLING + if( cuCtx && isLuma(compID) && ( cctx.posX(cctx.scanPosLast()) >= 16 || cctx.posY(cctx.scanPosLast()) >= 16 ) ) + { + cuCtx->violatesMtsCoeffConstraint = true; + } +#endif + // code last coeff position last_sig_coeff( cctx, tu, compID ); // code subblocks - const int stateTab = ( tu.cs->slice->getDepQuantEnabledFlag() ? 32040 : 0 ); + const int stateTab = ( tu.cs->picHeader->getDepQuantEnabledFlag() ? 32040 : 0 ); int state = 0; + int ctxBinSampleRatio = (compID == COMPONENT_Y) ? MAX_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT_LUMA : MAX_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT_CHROMA; + cctx.regBinLimit = (tu.getTbAreaAfterCoefZeroOut(compID) * ctxBinSampleRatio) >> 4; + for( int subSetId = ( cctx.scanPosLast() >> cctx.log2CGSize() ); subSetId >= 0; subSetId--) { cctx.initSubblock ( subSetId, sigGroupFlags[subSetId] ); - if( ( tu.mtsIdx > 1 || ( tu.cu->sbtInfo != 0 && tu.blocks[ compID ].height <= 32 && tu.blocks[ compID ].width <= 32 ) ) && !tu.cu->transQuantBypass && compID == COMPONENT_Y ) + + if( tu.cs->sps->getUseMTS() && tu.cu->sbtInfo != 0 && tu.blocks[ compID ].height <= 32 && tu.blocks[ compID ].width <= 32 && compID == COMPONENT_Y ) { if( ( tu.blocks[ compID ].height == 32 && cctx.cgPosY() >= ( 16 >> cctx.log2CGHeight() ) ) || ( tu.blocks[ compID ].width == 32 && cctx.cgPosX() >= ( 16 >> cctx.log2CGWidth() ) ) ) @@ -2295,80 +2679,74 @@ void CABACWriter::residual_coding( const TransformUnit& tu, ComponentID compID ) } } residual_coding_subblock( cctx, coeff, stateTab, state ); - + +#if JVET_Q0055_MTS_SIGNALLING + if ( cuCtx && isLuma(compID) && cctx.isSigGroup() && ( cctx.cgPosY() > 3 || cctx.cgPosX() > 3 ) ) + { + cuCtx->violatesMtsCoeffConstraint = true; + } +#endif } - - } -void CABACWriter::mts_coding( const TransformUnit& tu, ComponentID compID ) +void CABACWriter::ts_flag( const TransformUnit& tu, ComponentID compID ) { - const CodingUnit &cu = *tu.cu; - const bool tsAllowed = TU::isTSAllowed ( tu, compID ); - const bool mtsAllowed = TU::isMTSAllowed( tu, compID ); - - if( !mtsAllowed && !tsAllowed ) return; - - int symbol = 0; - int ctxIdx = 0; - - if( tsAllowed ) + int tsFlag = tu.mtsIdx[compID] == MTS_SKIP ? 1 : 0; + int ctxIdx = isLuma(compID) ? 0 : 1; + + if( TU::isTSAllowed ( tu, compID ) ) { - symbol = 1 - ( tu.mtsIdx == 1 ? 1 : 0 ); - ctxIdx = 6; - m_BinEncoder.encodeBin( symbol, Ctx::MTSIndex( ctxIdx ) ); + m_BinEncoder.encodeBin( tsFlag, Ctx::TransformSkipFlag(ctxIdx)); } + DTRACE( g_trace_ctx, D_SYNTAX, "ts_flag() etype=%d pos=(%d,%d) mtsIdx=%d\n", COMPONENT_Y, tu.cu->lx(), tu.cu->ly(), tsFlag ); +} - if( tu.mtsIdx != 1 ) +void CABACWriter::mts_idx( const CodingUnit& cu, CUCtx* cuCtx ) +{ + TransformUnit &tu = *cu.firstTU; + int mtsIdx = tu.mtsIdx[COMPONENT_Y]; + + if( CU::isMTSAllowed( cu, COMPONENT_Y ) && cuCtx && !cuCtx->violatesMtsCoeffConstraint && + cu.lfnstIdx == 0 && mtsIdx != MTS_SKIP && TU::getCbf(tu, COMPONENT_Y) ) { - if( mtsAllowed ) + int symbol = mtsIdx != MTS_DCT2_DCT2 ? 1 : 0; + int ctxIdx = 0; + + m_BinEncoder.encodeBin( symbol, Ctx::MTSIdx(ctxIdx)); + + if( symbol ) { - symbol = tu.mtsIdx != 0 ? 1 : 0; - ctxIdx = std::min( (int)cu.qtDepth, 5 ); - m_BinEncoder.encodeBin( symbol, Ctx::MTSIndex( ctxIdx ) ); - - if( symbol ) + ctxIdx = 1; + for( int i = 0; i < 3; i++, ctxIdx++ ) { - ctxIdx = 7; - for( int i = 0; i < 3; i++, ctxIdx++ ) + symbol = mtsIdx > i + MTS_DST7_DST7 ? 1 : 0; + m_BinEncoder.encodeBin( symbol, Ctx::MTSIdx(ctxIdx)); + + if( !symbol ) { - symbol = tu.mtsIdx > i + 2 ? 1 : 0; - m_BinEncoder.encodeBin( symbol, Ctx::MTSIndex( ctxIdx ) ); - - if( !symbol ) - { - break; - } + break; } } } } - - DTRACE( g_trace_ctx, D_SYNTAX, "mts_coding() etype=%d pos=(%d,%d) mtsIdx=%d\n", COMPONENT_Y, cu.lx(), cu.ly(), tu.mtsIdx ); + DTRACE( g_trace_ctx, D_SYNTAX, "mts_idx() etype=%d pos=(%d,%d) mtsIdx=%d\n", COMPONENT_Y, tu.cu->lx(), tu.cu->ly(), mtsIdx); } void CABACWriter::isp_mode( const CodingUnit& cu ) { - if( !CU::isIntra( cu ) || !isLuma( cu.chType ) || cu.firstPU->multiRefIdx || cu.ipcm ) + if( !CU::isIntra( cu ) || !isLuma( cu.chType ) || cu.firstPU->multiRefIdx || !cu.cs->sps->getUseISP() || cu.bdpcmMode || !CU::canUseISP( cu, getFirstComponentOfChannel( cu.chType ) ) || cu.colorTransform ) { - CHECK( cu.ispMode != NOT_INTRA_SUBPARTITIONS, "error: cu.intraSubPartitions != 0" ); + CHECK( cu.ispMode != NOT_INTRA_SUBPARTITIONS, "cu.ispMode != 0" ); return; } - const ISPType allowedSplits = CU::canUseISPSplit( cu, getFirstComponentOfChannel( cu.chType ) ); - if( allowedSplits == NOT_INTRA_SUBPARTITIONS ) return; - - if( cu.ispMode == NOT_INTRA_SUBPARTITIONS ) + if ( cu.ispMode == NOT_INTRA_SUBPARTITIONS ) { m_BinEncoder.encodeBin( 0, Ctx::ISPMode( 0 ) ); } else { m_BinEncoder.encodeBin( 1, Ctx::ISPMode( 0 ) ); - - if( allowedSplits == CAN_USE_VER_AND_HORL_SPLITS ) - { - m_BinEncoder.encodeBin( cu.ispMode - 1, Ctx::ISPMode( 1 ) ); - } + m_BinEncoder.encodeBin( cu.ispMode - 1, Ctx::ISPMode( 1 ) ); } DTRACE( g_trace_ctx, D_SYNTAX, "intra_subPartitions() etype=%d pos=(%d,%d) ispIdx=%d\n", cu.chType, cu.blocks[cu.chType].x, cu.blocks[cu.chType].y, (int)cu.ispMode ); } @@ -2376,7 +2754,7 @@ void CABACWriter::isp_mode( const CodingUnit& cu ) void CABACWriter::explicit_rdpcm_mode( const TransformUnit& tu, ComponentID compID ) { const CodingUnit& cu = *tu.cu; - if( !CU::isIntra(cu) && CU::isRDPCMEnabled(cu) && ( tu.mtsIdx==1 || cu.transQuantBypass ) ) + if (!CU::isIntra(cu) && CU::isRDPCMEnabled(cu) && (tu.mtsIdx[compID] == MTS_SKIP)) { ChannelType chType = toChannelType( compID ); switch( tu.rdpcm[compID] ) @@ -2395,19 +2773,55 @@ void CABACWriter::explicit_rdpcm_mode( const TransformUnit& tu, ComponentID comp } } +void CABACWriter::residual_lfnst_mode( const CodingUnit& cu, CUCtx& cuCtx ) +{ + int chIdx = cu.isSepTree() && cu.chType == CHANNEL_TYPE_CHROMA ? 1 : 0; + if( ( cu.ispMode && !CU::canUseLfnstWithISP( cu, cu.chType ) ) || + (cu.cs->sps->getUseLFNST() && CU::isIntra(cu) && cu.mipFlag && !allowLfnstWithMip(cu.firstPU->lumaSize())) || + ( cu.isSepTree() && cu.chType == CHANNEL_TYPE_CHROMA && std::min( cu.blocks[ 1 ].width, cu.blocks[ 1 ].height ) < 4 ) + || ( cu.blocks[ chIdx ].lumaSize().width > cu.cs->sps->getMaxTbSize() || cu.blocks[ chIdx ].lumaSize().height > cu.cs->sps->getMaxTbSize() ) + ) + { + return; + } + + if( cu.cs->sps->getUseLFNST() && CU::isIntra( cu ) ) + { + const bool lumaFlag = cu.isSepTree() ? ( isLuma( cu.chType ) ? true : false ) : true; + const bool chromaFlag = cu.isSepTree() ? ( isChroma( cu.chType ) ? true : false ) : true; + bool nonZeroCoeffNonTsCorner8x8 = ( lumaFlag && cuCtx.violatesLfnstConstrained[CHANNEL_TYPE_LUMA] ) || (chromaFlag && cuCtx.violatesLfnstConstrained[CHANNEL_TYPE_CHROMA] ); + + const bool isTrSkip = TU::getCbf(*cu.firstTU, COMPONENT_Y) && cu.firstTU->mtsIdx[COMPONENT_Y] == MTS_SKIP; + if( (!cuCtx.lfnstLastScanPos && !cu.ispMode) || nonZeroCoeffNonTsCorner8x8 || isTrSkip ) + { + return; + } + } + else + { + return; + } + + + unsigned cctx = 0; + if ( cu.isSepTree() ) cctx++; + + const uint32_t idxLFNST = cu.lfnstIdx; + assert( idxLFNST < 3 ); + m_BinEncoder.encodeBin( idxLFNST ? 1 : 0, Ctx::LFNSTIdx( cctx ) ); + + if( idxLFNST ) + { + m_BinEncoder.encodeBin( (idxLFNST - 1) ? 1 : 0, Ctx::LFNSTIdx(2)); + } + + DTRACE( g_trace_ctx, D_SYNTAX, "residual_lfnst_mode() etype=%d pos=(%d,%d) mode=%d\n", COMPONENT_Y, cu.lx(), cu.ly(), ( int ) cu.lfnstIdx ); +} void CABACWriter::last_sig_coeff( CoeffCodingContext& cctx, const TransformUnit& tu, ComponentID compID ) { unsigned blkPos = cctx.blockPos( cctx.scanPosLast() ); unsigned posX, posY; -#if HEVC_USE_MDCS - if( cctx.scanType() == SCAN_VER ) - { - posX = blkPos / cctx.width(); - posY = blkPos - ( posX * cctx.width() ); - } - else -#endif { posY = blkPos / cctx.width(); posX = blkPos - ( posY * cctx.width() ); @@ -2420,7 +2834,7 @@ void CABACWriter::last_sig_coeff( CoeffCodingContext& cctx, const TransformUnit& unsigned maxLastPosX = cctx.maxLastPosX(); unsigned maxLastPosY = cctx.maxLastPosY(); - if( ( tu.mtsIdx > 1 || ( tu.cu->sbtInfo != 0 && tu.blocks[ compID ].width <= 32 && tu.blocks[ compID ].height <= 32 ) ) && !tu.cu->transQuantBypass && compID == COMPONENT_Y ) + if( tu.cs->sps->getUseMTS() && tu.cu->sbtInfo != 0 && tu.blocks[ compID ].width <= 32 && tu.blocks[ compID ].height <= 32 && compID == COMPONENT_Y ) { maxLastPosX = ( tu.blocks[compID].width == 32 ) ? g_uiGroupIdx[ 15 ] : maxLastPosX; maxLastPosY = ( tu.blocks[compID].height == 32 ) ? g_uiGroupIdx[ 15 ] : maxLastPosY; @@ -2488,15 +2902,12 @@ void CABACWriter::residual_coding_subblock( CoeffCodingContext& cctx, const TCoe //===== encode absolute values ===== const int inferSigPos = nextSigPos != cctx.scanPosLast() ? ( cctx.isNotFirst() ? minSubPos : -1 ) : nextSigPos; -#if HEVC_USE_SIGN_HIDING int firstNZPos = nextSigPos; int lastNZPos = -1; -#endif int remAbsLevel = -1; int numNonZero = 0; unsigned signPattern = 0; - bool is2x2subblock = ( cctx.log2CGSize() == 2 ); - int remRegBins = ( is2x2subblock ? MAX_NUM_REG_BINS_2x2SUBBLOCK : MAX_NUM_REG_BINS_4x4SUBBLOCK ); + int remRegBins = cctx.regBinLimit; int firstPosMode2 = minSubPos - 1; for( ; nextSigPos >= minSubPos && remRegBins >= 4; nextSigPos-- ) @@ -2510,16 +2921,18 @@ void CABACWriter::residual_coding_subblock( CoeffCodingContext& cctx, const TCoe DTRACE( g_trace_ctx, D_SYNTAX_RESI, "sig_bin() bin=%d ctx=%d\n", sigFlag, sigCtxId ); remRegBins--; } + else if( nextSigPos != cctx.scanPosLast() ) + { + cctx.sigCtxIdAbs( nextSigPos, coeff, state ); // required for setting variables that are needed for gtx/par context selection + } if( sigFlag ) { uint8_t& ctxOff = ctxOffset[ nextSigPos - minSubPos ]; ctxOff = cctx.ctxOffsetAbs(); numNonZero++; -#if HEVC_USE_SIGN_HIDING firstNZPos = nextSigPos; lastNZPos = std::max<int>( lastNZPos, nextSigPos ); -#endif remAbsLevel = abs( Coeff ) - 1; if( nextSigPos != cctx.scanPosLast() ) signPattern <<= 1; @@ -2548,23 +2961,21 @@ void CABACWriter::residual_coding_subblock( CoeffCodingContext& cctx, const TCoe state = ( stateTransTable >> ((state<<2)+((Coeff&1)<<1)) ) & 3; } firstPosMode2 = nextSigPos; - + cctx.regBinLimit = remRegBins; //===== 2nd PASS: Go-rice codes ===== unsigned ricePar = 0; for( int scanPos = firstSigPos; scanPos > firstPosMode2; scanPos-- ) { + int sumAll = cctx.templateAbsSum(scanPos, coeff, 4); + ricePar = g_auiGoRiceParsCoeff[sumAll]; unsigned absLevel = abs( coeff[ cctx.blockPos( scanPos ) ] ); if( absLevel >= 4 ) { unsigned rem = ( absLevel - 4 ) >> 1; - m_BinEncoder.encodeRemAbsEP( rem, ricePar, cctx.extPrec(), cctx.maxLog2TrDRange() ); + m_BinEncoder.encodeRemAbsEP( rem, ricePar, COEF_REMAIN_BIN_REDUCTION, cctx.maxLog2TrDRange() ); DTRACE( g_trace_ctx, D_SYNTAX_RESI, "rem_val() bin=%d ctx=%d\n", rem, ricePar ); - if( ricePar < 3 && rem > (3<<ricePar)-1 ) - { - ricePar++; - } } } @@ -2573,26 +2984,24 @@ void CABACWriter::residual_coding_subblock( CoeffCodingContext& cctx, const TCoe { TCoeff Coeff = coeff[ cctx.blockPos( scanPos ) ]; unsigned absLevel = abs( Coeff ); - int sumAll = cctx.templateAbsSum(scanPos, coeff); + int sumAll = cctx.templateAbsSum(scanPos, coeff, 0); int rice = g_auiGoRiceParsCoeff [sumAll]; - int pos0 = g_auiGoRicePosCoeff0[std::max(0, state - 1)][sumAll]; + int pos0 = g_auiGoRicePosCoeff0(state, rice); unsigned rem = ( absLevel == 0 ? pos0 : absLevel <= pos0 ? absLevel-1 : absLevel ); - m_BinEncoder.encodeRemAbsEP( rem, rice, cctx.extPrec(), cctx.maxLog2TrDRange() ); + m_BinEncoder.encodeRemAbsEP( rem, rice, COEF_REMAIN_BIN_REDUCTION, cctx.maxLog2TrDRange() ); DTRACE( g_trace_ctx, D_SYNTAX_RESI, "rem_val() bin=%d ctx=%d\n", rem, rice ); state = ( stateTransTable >> ((state<<2)+((absLevel&1)<<1)) ) & 3; if( absLevel ) { numNonZero++; -#if HEVC_USE_SIGN_HIDING + firstNZPos = scanPos; lastNZPos = std::max<int>( lastNZPos, scanPos ); -#endif signPattern <<= 1; if( Coeff < 0 ) signPattern++; } } //===== encode sign's ===== -#if HEVC_USE_SIGN_HIDING unsigned numSigns = numNonZero; if( cctx.hideSign( firstNZPos, lastNZPos ) ) { @@ -2600,11 +3009,155 @@ void CABACWriter::residual_coding_subblock( CoeffCodingContext& cctx, const TCoe signPattern >>= 1; } m_BinEncoder.encodeBinsEP( signPattern, numSigns ); -#else - m_BinEncoder.encodeBinsEP( signPattern, numNonZero ); -#endif } +void CABACWriter::residual_codingTS( const TransformUnit& tu, ComponentID compID ) +{ + DTRACE( g_trace_ctx, D_SYNTAX, "residual_codingTS() etype=%d pos=(%d,%d) size=%dx%d\n", tu.blocks[compID].compID, tu.blocks[compID].x, tu.blocks[compID].y, tu.blocks[compID].width, tu.blocks[compID].height ); + + // init coeff coding context + CoeffCodingContext cctx ( tu, compID, false, isLuma(compID) ? tu.cu->bdpcmMode : tu.cu->bdpcmModeChroma); + const TCoeff* coeff = tu.getCoeffs( compID ).buf; + int maxCtxBins = (cctx.maxNumCoeff() * 7) >> 2; + cctx.setNumCtxBins(maxCtxBins); + + // determine and set last coeff position and sig group flags + std::bitset<MLS_GRP_NUM> sigGroupFlags; + for( int scanPos = 0; scanPos < cctx.maxNumCoeff(); scanPos++) + { + unsigned blkPos = cctx.blockPos( scanPos ); + if( coeff[blkPos] ) + { + sigGroupFlags.set( scanPos >> cctx.log2CGSize() ); + } + } + + // code subblocks + for( int subSetId = 0; subSetId <= ( cctx.maxNumCoeff() - 1 ) >> cctx.log2CGSize(); subSetId++ ) + { + cctx.initSubblock ( subSetId, sigGroupFlags[subSetId] ); + residual_coding_subblockTS( cctx, coeff ); + } +} + +void CABACWriter::residual_coding_subblockTS( CoeffCodingContext& cctx, const TCoeff* coeff ) +{ + //===== init ===== + const int minSubPos = cctx.maxSubPos(); + int firstSigPos = cctx.minSubPos(); + int nextSigPos = firstSigPos; + + //===== encode significant_coeffgroup_flag ===== + if( !cctx.isLastSubSet() || !cctx.only1stSigGroup() ) + { + if( cctx.isSigGroup() ) + { + m_BinEncoder.encodeBin( 1, cctx.sigGroupCtxId( true ) ); + DTRACE( g_trace_ctx, D_SYNTAX_RESI, "ts_sigGroup() bin=%d ctx=%d\n", 1, cctx.sigGroupCtxId() ); + } + else + { + m_BinEncoder.encodeBin( 0, cctx.sigGroupCtxId( true ) ); + DTRACE( g_trace_ctx, D_SYNTAX_RESI, "ts_sigGroup() bin=%d ctx=%d\n", 0, cctx.sigGroupCtxId() ); + return; + } + } + + //===== encode absolute values ===== + const int inferSigPos = minSubPos; + int remAbsLevel = -1; + int numNonZero = 0; + + int rightPixel, belowPixel, modAbsCoeff; + + int lastScanPosPass1 = -1; + int lastScanPosPass2 = -1; + for (; nextSigPos <= minSubPos && cctx.numCtxBins() >= 4; nextSigPos++) + { + TCoeff Coeff = coeff[ cctx.blockPos( nextSigPos ) ]; + unsigned sigFlag = ( Coeff != 0 ); + if( numNonZero || nextSigPos != inferSigPos ) + { + const unsigned sigCtxId = cctx.sigCtxIdAbsTS( nextSigPos, coeff ); + m_BinEncoder.encodeBin( sigFlag, sigCtxId ); + DTRACE( g_trace_ctx, D_SYNTAX_RESI, "ts_sig_bin() bin=%d ctx=%d\n", sigFlag, sigCtxId ); + cctx.decimateNumCtxBins(1); + } + + if( sigFlag ) + { + //===== encode sign's ===== + int sign = Coeff < 0; + const unsigned signCtxId = cctx.signCtxIdAbsTS(nextSigPos, coeff, cctx.bdpcm()); + m_BinEncoder.encodeBin(sign, signCtxId); + cctx.decimateNumCtxBins(1); + numNonZero++; + cctx.neighTS(rightPixel, belowPixel, nextSigPos, coeff); + modAbsCoeff = cctx.deriveModCoeff(rightPixel, belowPixel, abs(Coeff), cctx.bdpcm()); + remAbsLevel = modAbsCoeff - 1; + + unsigned gt1 = !!remAbsLevel; + const unsigned gt1CtxId = cctx.lrg1CtxIdAbsTS(nextSigPos, coeff, cctx.bdpcm()); + m_BinEncoder.encodeBin(gt1, gt1CtxId); + DTRACE(g_trace_ctx, D_SYNTAX_RESI, "ts_gt1_flag() bin=%d ctx=%d\n", gt1, gt1CtxId); + cctx.decimateNumCtxBins(1); + + if( gt1 ) + { + remAbsLevel -= 1; + m_BinEncoder.encodeBin( remAbsLevel&1, cctx.parityCtxIdAbsTS() ); + DTRACE( g_trace_ctx, D_SYNTAX_RESI, "ts_par_flag() bin=%d ctx=%d\n", remAbsLevel&1, cctx.parityCtxIdAbsTS() ); + cctx.decimateNumCtxBins(1); + } + } + lastScanPosPass1 = nextSigPos; + } + + int cutoffVal = 2; + int numGtBins = 4; + for (int scanPos = firstSigPos; scanPos <= minSubPos && cctx.numCtxBins() >= 4; scanPos++) + { + unsigned absLevel; + cctx.neighTS(rightPixel, belowPixel, scanPos, coeff); + absLevel = cctx.deriveModCoeff(rightPixel, belowPixel, abs(coeff[cctx.blockPos(scanPos)]), cctx.bdpcm()); + cutoffVal = 2; + for (int i = 0; i < numGtBins; i++) + { + if (absLevel >= cutoffVal) + { + unsigned gt2 = (absLevel >= (cutoffVal + 2)); + m_BinEncoder.encodeBin(gt2, cctx.greaterXCtxIdAbsTS(cutoffVal >> 1)); + DTRACE(g_trace_ctx, D_SYNTAX_RESI, "ts_gt%d_flag() bin=%d ctx=%d sp=%d coeff=%d\n", i, gt2, cctx.greaterXCtxIdAbsTS(cutoffVal >> 1), scanPos, min<int>(absLevel, cutoffVal + 2)); + cctx.decimateNumCtxBins(1); + } + cutoffVal += 2; + } + lastScanPosPass2 = scanPos; + } + + //===== coeff bypass ==== + for( int scanPos = firstSigPos; scanPos <= minSubPos; scanPos++ ) + { + unsigned absLevel; + cctx.neighTS(rightPixel, belowPixel, scanPos, coeff); + cutoffVal = (scanPos <= lastScanPosPass2 ? 10 : (scanPos <= lastScanPosPass1 ? 2 : 0)); + absLevel = cctx.deriveModCoeff(rightPixel, belowPixel, abs(coeff[cctx.blockPos(scanPos)]), cctx.bdpcm()||!cutoffVal); + + if( absLevel >= cutoffVal ) + { + int rice = cctx.templateAbsSumTS( scanPos, coeff ); + unsigned rem = scanPos <= lastScanPosPass1 ? (absLevel - cutoffVal) >> 1 : absLevel; + m_BinEncoder.encodeRemAbsEP( rem, rice, COEF_REMAIN_BIN_REDUCTION, cctx.maxLog2TrDRange() ); + DTRACE( g_trace_ctx, D_SYNTAX_RESI, "ts_rem_val() bin=%d ctx=%d sp=%d\n", rem, rice, scanPos ); + + if (absLevel && scanPos > lastScanPosPass1) + { + int sign = coeff[cctx.blockPos(scanPos)] < 0; + m_BinEncoder.encodeBinEP(sign); + } + } + } +} @@ -2712,13 +3265,12 @@ void CABACWriter::exp_golomb_eqprob( unsigned symbol, unsigned count ) } bins <<= 1; numBins++; - bins = (bins << count) | symbol; - numBins += count; - CHECK(!( numBins <= 32 ), "Unspecified error"); - m_BinEncoder.encodeBinsEP( bins, numBins ); + //CHECK(!( numBins + count <= 32 ), "Unspecified error"); + m_BinEncoder.encodeBinsEP(bins, numBins); + m_BinEncoder.encodeBinsEP(symbol, count); } -void CABACWriter::codeAlfCtuEnableFlags( CodingStructure& cs, ChannelType channel, AlfSliceParam* alfParam) +void CABACWriter::codeAlfCtuEnableFlags( CodingStructure& cs, ChannelType channel, AlfParam* alfParam) { if( isLuma( channel ) ) { @@ -2733,7 +3285,7 @@ void CABACWriter::codeAlfCtuEnableFlags( CodingStructure& cs, ChannelType channe codeAlfCtuEnableFlags( cs, COMPONENT_Cr, alfParam ); } } -void CABACWriter::codeAlfCtuEnableFlags( CodingStructure& cs, ComponentID compID, AlfSliceParam* alfParam) +void CABACWriter::codeAlfCtuEnableFlags( CodingStructure& cs, ComponentID compID, AlfParam* alfParam) { uint32_t numCTUs = cs.pcv->sizeInCtus; @@ -2743,11 +3295,11 @@ void CABACWriter::codeAlfCtuEnableFlags( CodingStructure& cs, ComponentID compID } } -void CABACWriter::codeAlfCtuEnableFlag( CodingStructure& cs, uint32_t ctuRsAddr, const int compIdx, AlfSliceParam* alfParam) +void CABACWriter::codeAlfCtuEnableFlag( CodingStructure& cs, uint32_t ctuRsAddr, const int compIdx, AlfParam* alfParam) { - const AlfSliceParam& alfSliceParam = alfParam ? (*alfParam) : cs.aps->getAlfAPSParam(); + const bool alfComponentEnabled = (alfParam != NULL) ? alfParam->enabledFlag[compIdx] : cs.slice->getTileGroupAlfEnabledFlag((ComponentID)compIdx); - if( cs.sps->getALFEnabledFlag() && alfSliceParam.enabledFlag[compIdx] ) + if( cs.sps->getALFEnabledFlag() && alfComponentEnabled ) { const PreCalcValues& pcv = *cs.pcv; int frame_width_in_ctus = pcv.widthInCtus; @@ -2755,27 +3307,165 @@ void CABACWriter::codeAlfCtuEnableFlag( CodingStructure& cs, uint32_t ctuRsAddr, int rx = ctuRsAddr - ry * frame_width_in_ctus; const Position pos( rx * cs.pcv->maxCUWidth, ry * cs.pcv->maxCUHeight ); const uint32_t curSliceIdx = cs.slice->getIndependentSliceIdx(); -#if HEVC_TILES_WPP - const uint32_t curTileIdx = cs.picture->tileMap->getTileIdxMap( pos ); - bool leftAvail = cs.getCURestricted( pos.offset( -(int)pcv.maxCUWidth, 0 ), curSliceIdx, curTileIdx, CH_L ) ? true : false; - bool aboveAvail = cs.getCURestricted( pos.offset( 0, -(int)pcv.maxCUHeight ), curSliceIdx, curTileIdx, CH_L ) ? true : false; -#else - bool leftAvail = cs.getCURestricted( pos.offset( -(int)pcv.maxCUWidth, 0 ), curSliceIdx, CH_L ) ? true : false; - bool aboveAvail = cs.getCURestricted( pos.offset( 0, -(int)pcv.maxCUHeight ), curSliceIdx, CH_L ) ? true : false; -#endif + const uint32_t curTileIdx = cs.pps->getTileIdx( pos ); + bool leftAvail = cs.getCURestricted( pos.offset( -(int)pcv.maxCUWidth, 0 ), pos, curSliceIdx, curTileIdx, CH_L ) ? true : false; + bool aboveAvail = cs.getCURestricted( pos.offset( 0, -(int)pcv.maxCUHeight ), pos, curSliceIdx, curTileIdx, CH_L ) ? true : false; int leftCTUAddr = leftAvail ? ctuRsAddr - 1 : -1; int aboveCTUAddr = aboveAvail ? ctuRsAddr - frame_width_in_ctus : -1; - if( alfSliceParam.enabledFlag[compIdx] ) + uint8_t* ctbAlfFlag = cs.slice->getPic()->getAlfCtuEnableFlag( compIdx ); + int ctx = 0; + ctx += leftCTUAddr > -1 ? ( ctbAlfFlag[leftCTUAddr] ? 1 : 0 ) : 0; + ctx += aboveCTUAddr > -1 ? ( ctbAlfFlag[aboveCTUAddr] ? 1 : 0 ) : 0; + m_BinEncoder.encodeBin( ctbAlfFlag[ctuRsAddr], Ctx::ctbAlfFlag( compIdx * 3 + ctx ) ); + } +} + +void CABACWriter::code_unary_fixed( unsigned symbol, unsigned ctxId, unsigned unary_max, unsigned fixed ) +{ + bool unary = (symbol <= unary_max); + m_BinEncoder.encodeBin( unary, ctxId ); + if( unary ) + { + unary_max_eqprob( symbol, unary_max ); + } + else + { + m_BinEncoder.encodeBinsEP( symbol - unary_max - 1, fixed ); + } +} + +void CABACWriter::mip_flag( const CodingUnit& cu ) +{ + if( !cu.Y().valid() ) + { + return; + } + if( !cu.cs->sps->getUseMIP() ) + { + return; + } + + unsigned ctxId = DeriveCtx::CtxMipFlag( cu ); + m_BinEncoder.encodeBin( cu.mipFlag, Ctx::MipFlag( ctxId ) ); + DTRACE( g_trace_ctx, D_SYNTAX, "mip_flag() pos=(%d,%d) mode=%d\n", cu.lumaPos().x, cu.lumaPos().y, cu.mipFlag ? 1 : 0 ); +} + +void CABACWriter::mip_pred_modes( const CodingUnit& cu ) +{ + if( !cu.Y().valid() ) + { + return; + } + for( const auto &pu : CU::traversePUs( cu ) ) + { + mip_pred_mode( pu ); + } +} + +void CABACWriter::mip_pred_mode( const PredictionUnit& pu ) +{ + m_BinEncoder.encodeBinEP( (pu.mipTransposedFlag ? 1 : 0) ); + + const int numModes = getNumModesMip( pu.Y() ); + CHECKD( pu.intraDir[CHANNEL_TYPE_LUMA] < 0 || pu.intraDir[CHANNEL_TYPE_LUMA] >= numModes, "Invalid MIP mode" ); + xWriteTruncBinCode( pu.intraDir[CHANNEL_TYPE_LUMA], numModes ); + + DTRACE( g_trace_ctx, D_SYNTAX, "mip_pred_mode() pos=(%d,%d) mode=%d transposed=%d\n", pu.lumaPos().x, pu.lumaPos().y, pu.intraDir[CHANNEL_TYPE_LUMA], pu.mipTransposedFlag ? 1 : 0 ); +} + +void CABACWriter::codeAlfCtuFilterIndex(CodingStructure& cs, uint32_t ctuRsAddr, bool alfEnableLuma) +{ + if ( (!cs.sps->getALFEnabledFlag()) || (!alfEnableLuma)) + { + return; + } + + uint8_t* ctbAlfFlag = cs.slice->getPic()->getAlfCtuEnableFlag(COMPONENT_Y); + if (!ctbAlfFlag[ctuRsAddr]) + { + return; + } + + short* alfCtbFilterIndex = cs.slice->getPic()->getAlfCtbFilterIndex(); + const unsigned filterSetIdx = alfCtbFilterIndex[ctuRsAddr]; + unsigned numAps = cs.slice->getTileGroupNumAps(); + unsigned numAvailableFiltSets = numAps + NUM_FIXED_FILTER_SETS; + if (numAvailableFiltSets > NUM_FIXED_FILTER_SETS) + { + int useTemporalFilt = (filterSetIdx >= NUM_FIXED_FILTER_SETS) ? 1 : 0; + m_BinEncoder.encodeBin(useTemporalFilt, Ctx::AlfUseTemporalFilt()); + if (useTemporalFilt) + { + CHECK((filterSetIdx - NUM_FIXED_FILTER_SETS) >= (numAvailableFiltSets - NUM_FIXED_FILTER_SETS), "temporal non-latest set"); + if (numAps > 1) + { + xWriteTruncBinCode(filterSetIdx - NUM_FIXED_FILTER_SETS, numAvailableFiltSets - NUM_FIXED_FILTER_SETS); + } + } + else + { + CHECK(filterSetIdx >= NUM_FIXED_FILTER_SETS, "fixed set larger than temporal"); + xWriteTruncBinCode(filterSetIdx, NUM_FIXED_FILTER_SETS); + } + } + else + { + CHECK(filterSetIdx >= NUM_FIXED_FILTER_SETS, "fixed set numavail < num_fixed"); + xWriteTruncBinCode(filterSetIdx, NUM_FIXED_FILTER_SETS); + } +} +void CABACWriter::codeAlfCtuAlternatives( CodingStructure& cs, ChannelType channel, AlfParam* alfParam) +{ + if( isChroma( channel ) ) + { + if (alfParam->enabledFlag[COMPONENT_Cb]) + codeAlfCtuAlternatives( cs, COMPONENT_Cb, alfParam ); + if (alfParam->enabledFlag[COMPONENT_Cr]) + codeAlfCtuAlternatives( cs, COMPONENT_Cr, alfParam ); + } +} +void CABACWriter::codeAlfCtuAlternatives( CodingStructure& cs, ComponentID compID, AlfParam* alfParam) +{ + if( compID == COMPONENT_Y ) + return; + uint32_t numCTUs = cs.pcv->sizeInCtus; + uint8_t* ctbAlfFlag = cs.slice->getPic()->getAlfCtuEnableFlag( compID ); + + for( int ctuIdx = 0; ctuIdx < numCTUs; ctuIdx++ ) + { + if( ctbAlfFlag[ctuIdx] ) + { + codeAlfCtuAlternative( cs, ctuIdx, compID, alfParam ); + } + } +} + +void CABACWriter::codeAlfCtuAlternative( CodingStructure& cs, uint32_t ctuRsAddr, const int compIdx, const AlfParam* alfParam) +{ + if( compIdx == COMPONENT_Y ) + return; + int apsIdx = alfParam ? 0 : cs.slice->getTileGroupApsIdChroma(); + const AlfParam& alfParamRef = alfParam ? (*alfParam) : cs.slice->getAlfAPSs()[apsIdx]->getAlfAPSParam(); + + if( alfParam || (cs.sps->getALFEnabledFlag() && cs.slice->getTileGroupAlfEnabledFlag( (ComponentID)compIdx )) ) + { + uint8_t* ctbAlfFlag = cs.slice->getPic()->getAlfCtuEnableFlag( compIdx ); + + if( ctbAlfFlag[ctuRsAddr] ) { - uint8_t* ctbAlfFlag = cs.slice->getPic()->getAlfCtuEnableFlag( compIdx ); - int ctx = 0; - ctx += leftCTUAddr > -1 ? ( ctbAlfFlag[leftCTUAddr] ? 1 : 0 ) : 0; - ctx += aboveCTUAddr > -1 ? ( ctbAlfFlag[aboveCTUAddr] ? 1 : 0 ) : 0; - m_BinEncoder.encodeBin( ctbAlfFlag[ctuRsAddr], Ctx::ctbAlfFlag( compIdx * 3 + ctx ) ); + const int numAlts = alfParamRef.numAlternativesChroma; + uint8_t* ctbAlfAlternative = cs.slice->getPic()->getAlfCtuAlternativeData( compIdx ); + unsigned numOnes = ctbAlfAlternative[ctuRsAddr]; + assert( ctbAlfAlternative[ctuRsAddr] < numAlts ); + for( int i = 0; i < numOnes; ++i ) + m_BinEncoder.encodeBin( 1, Ctx::ctbAlfAlternative( compIdx-1 ) ); + if( numOnes < numAlts-1 ) + m_BinEncoder.encodeBin( 0, Ctx::ctbAlfAlternative( compIdx-1 ) ); } } } + //! \} diff --git a/source/Lib/EncoderLib/CABACWriter.h b/source/Lib/EncoderLib/CABACWriter.h index bebfb8af099a70fe67191e6524c9207ac279cca7..940fa4c22be37adc3cf9c970d17187ee34b4dca9 100644 --- a/source/Lib/EncoderLib/CABACWriter.h +++ b/source/Lib/EncoderLib/CABACWriter.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * -* Copyright (c) 2010-2019, ITU/ISO/IEC +* Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -74,7 +74,7 @@ public: void end_of_slice (); // coding tree unit (clause 7.3.8.2) - void coding_tree_unit ( CodingStructure& cs, const UnitArea& area, int (&qps)[2], unsigned ctuRsAddr, bool skipSao = false ); + void coding_tree_unit ( CodingStructure& cs, const UnitArea& area, int (&qps)[2], unsigned ctuRsAddr, bool skipSao = false, bool skipAlf = false ); // sao (clause 7.3.8.3) void sao ( const Slice& slice, unsigned ctuRsAddr ); @@ -83,16 +83,16 @@ public: // coding (quad)tree (clause 7.3.8.4) void coding_tree ( const CodingStructure& cs, Partitioner& pm, CUCtx& cuCtx, Partitioner* pPartitionerChroma = nullptr, CUCtx* pCuCtxChroma = nullptr); void split_cu_mode ( const PartSplit split, const CodingStructure& cs, Partitioner& pm ); + void mode_constraint ( const PartSplit split, const CodingStructure& cs, Partitioner& pm, const ModeType modeType ); // coding unit (clause 7.3.8.5) void coding_unit ( const CodingUnit& cu, Partitioner& pm, CUCtx& cuCtx ); - void cu_transquant_bypass_flag ( const CodingUnit& cu ); void cu_skip_flag ( const CodingUnit& cu ); void pred_mode ( const CodingUnit& cu ); - void pcm_data ( const CodingUnit& cu, Partitioner& pm ); - void pcm_flag ( const CodingUnit& cu, Partitioner& pm ); + void bdpcm_mode ( const CodingUnit& cu, const ComponentID compID ); + void cu_pred_data ( const CodingUnit& cu ); - void cu_gbi_flag ( const CodingUnit& cu ); + void cu_bcw_flag ( const CodingUnit& cu ); void extend_ref_line (const PredictionUnit& pu ); void extend_ref_line (const CodingUnit& cu ); void intra_luma_pred_modes ( const CodingUnit& cu ); @@ -102,12 +102,19 @@ public: void intra_chroma_pred_mode ( const PredictionUnit& pu ); void cu_residual ( const CodingUnit& cu, Partitioner& pm, CUCtx& cuCtx ); void rqt_root_cbf ( const CodingUnit& cu ); + void adaptive_color_transform(const CodingUnit& cu); void sbt_mode ( const CodingUnit& cu ); void end_of_ctu ( const CodingUnit& cu, CUCtx& cuCtx ); - + void mip_flag ( const CodingUnit& cu ); + void mip_pred_modes ( const CodingUnit& cu ); + void mip_pred_mode ( const PredictionUnit& pu ); + void cu_palette_info ( const CodingUnit& cu, ComponentID compBegin, uint32_t numComp, CUCtx& cuCtx); + void cuPaletteSubblockInfo ( const CodingUnit& cu, ComponentID compBegin, uint32_t numComp, int subSetId, uint32_t& prevRunPos, unsigned& prevRunType ); + Pel writePLTIndex ( const CodingUnit& cu, uint32_t idx, PelBuf& paletteIdx, PLTtypeBuf& paletteRunType, int maxSymbol, ComponentID compBegin ); // prediction unit (clause 7.3.8.6) void prediction_unit ( const PredictionUnit& pu ); void merge_flag ( const PredictionUnit& pu ); + void merge_data ( const PredictionUnit& pu ); void affine_flag ( const CodingUnit& cu ); void subblock_merge_flag ( const CodingUnit& cu ); void merge_idx ( const PredictionUnit& pu ); @@ -118,55 +125,64 @@ public: void ref_idx ( const PredictionUnit& pu, RefPicList eRefList ); void mvp_flag ( const PredictionUnit& pu, RefPicList eRefList ); - void MHIntra_flag ( const PredictionUnit& pu ); - void MHIntra_luma_pred_modes ( const CodingUnit& cu ); - void triangle_mode ( const CodingUnit& cu ); + void Ciip_flag ( const PredictionUnit& pu ); void smvd_mode ( const PredictionUnit& pu ); - // pcm samples (clause 7.3.8.7) - void pcm_samples ( const TransformUnit& tu ); // transform tree (clause 7.3.8.8) - void transform_tree ( const CodingStructure& cs, Partitioner& pm, CUCtx& cuCtx, ChromaCbfs& chromaCbfs, const PartSplit ispType = TU_NO_ISP, const int subTuIdx = -1 ); - void cbf_comp ( const CodingStructure& cs, bool cbf, const CompArea& area, unsigned depth, const bool prevCbCbf = false, const bool useISP = false ); + void transform_tree ( const CodingStructure& cs, Partitioner& pm, CUCtx& cuCtx, const PartSplit ispType = TU_NO_ISP, const int subTuIdx = -1 ); + void cbf_comp ( const CodingStructure& cs, bool cbf, const CompArea& area, unsigned depth, const bool prevCbf = false, const bool useISP = false ); // mvd coding (clause 7.3.8.9) void mvd_coding ( const Mv &rMvd, int8_t imv ); // transform unit (clause 7.3.8.10) - void transform_unit ( const TransformUnit& tu, CUCtx& cuCtx, ChromaCbfs& chromaCbfs ); + void transform_unit ( const TransformUnit& tu, CUCtx& cuCtx, Partitioner& pm, const int subTuCounter = -1 ); void cu_qp_delta ( const CodingUnit& cu, int predQP, const int8_t qp ); void cu_chroma_qp_offset ( const CodingUnit& cu ); // residual coding (clause 7.3.8.11) - void residual_coding ( const TransformUnit& tu, ComponentID compID ); - void mts_coding ( const TransformUnit& tu, ComponentID compID ); + void residual_coding ( const TransformUnit& tu, ComponentID compID, CUCtx* cuCtx = nullptr ); + void ts_flag ( const TransformUnit& tu, ComponentID compID ); + void mts_idx ( const CodingUnit& cu, CUCtx* cuCtx ); + void residual_lfnst_mode ( const CodingUnit& cu, CUCtx& cuCtx ); void isp_mode ( const CodingUnit& cu ); void explicit_rdpcm_mode ( const TransformUnit& tu, ComponentID compID ); void last_sig_coeff ( CoeffCodingContext& cctx, const TransformUnit& tu, ComponentID compID ); void residual_coding_subblock ( CoeffCodingContext& cctx, const TCoeff* coeff, const int stateTransTable, int& state ); + void residual_codingTS ( const TransformUnit& tu, ComponentID compID ); + void residual_coding_subblockTS( CoeffCodingContext& cctx, const TCoeff* coeff ); + void joint_cb_cr ( const TransformUnit& tu, const int cbfMask ); // cross component prediction (clause 7.3.8.12) void cross_comp_pred ( const TransformUnit& tu, ComponentID compID ); - void codeAlfCtuEnableFlags ( CodingStructure& cs, ChannelType channel, AlfSliceParam* alfParam); - void codeAlfCtuEnableFlags ( CodingStructure& cs, ComponentID compID, AlfSliceParam* alfParam); - void codeAlfCtuEnableFlag ( CodingStructure& cs, uint32_t ctuRsAddr, const int compIdx, AlfSliceParam* alfParam = NULL ); + void codeAlfCtuEnableFlags ( CodingStructure& cs, ChannelType channel, AlfParam* alfParam); + void codeAlfCtuEnableFlags ( CodingStructure& cs, ComponentID compID, AlfParam* alfParam); + void codeAlfCtuEnableFlag ( CodingStructure& cs, uint32_t ctuRsAddr, const int compIdx, AlfParam* alfParam ); + void codeAlfCtuFilterIndex(CodingStructure& cs, uint32_t ctuRsAddr, bool alfEnableLuma); + + void codeAlfCtuAlternatives ( CodingStructure& cs, ChannelType channel, AlfParam* alfParam); + void codeAlfCtuAlternatives ( CodingStructure& cs, ComponentID compID, AlfParam* alfParam); + void codeAlfCtuAlternative ( CodingStructure& cs, uint32_t ctuRsAddr, const int compIdx, const AlfParam* alfParam = NULL ); private: void unary_max_symbol ( unsigned symbol, unsigned ctxId0, unsigned ctxIdN, unsigned maxSymbol ); void unary_max_eqprob ( unsigned symbol, unsigned maxSymbol ); void exp_golomb_eqprob ( unsigned symbol, unsigned count ); + void code_unary_fixed ( unsigned symbol, unsigned ctxId, unsigned unary_max, unsigned fixed ); // statistic unsigned get_num_written_bits() { return m_BinEncoder.getNumWrittenBits(); } void xWriteTruncBinCode(uint32_t uiSymbol, uint32_t uiMaxSymbol); - + void codeScanRotationModeFlag ( const CodingUnit& cu, ComponentID compBegin); + void xEncodePLTPredIndicator ( const CodingUnit& cu, uint32_t maxPltSize, ComponentID compBegin); private: BinEncIf& m_BinEncoder; OutputBitstream* m_Bitstream; Ctx m_TestCtx; EncCu* m_EncCu; + ScanElement* m_scanOrder; }; diff --git a/source/Lib/EncoderLib/CMakeLists.txt b/source/Lib/EncoderLib/CMakeLists.txt index 89286b308f417c0aa6795129e38fa0612deba306..2a50346f2c45c4a57d7c73788df086a19645d7f0 100644 --- a/source/Lib/EncoderLib/CMakeLists.txt +++ b/source/Lib/EncoderLib/CMakeLists.txt @@ -20,6 +20,10 @@ if( EXTENSION_360_VIDEO ) target_compile_definitions( ${LIB_NAME} PUBLIC EXTENSION_360_VIDEO=1 ) endif() +if( EXTENSION_HDRTOOLS ) + target_compile_definitions( ${LIB_NAME} PUBLIC EXTENSION_HDRTOOLS=1 ) +endif() + if( SET_ENABLE_TRACING ) if( ENABLE_TRACING ) target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_TRACING=1 ) diff --git a/source/Lib/EncoderLib/EncAdaptiveLoopFilter.cpp b/source/Lib/EncoderLib/EncAdaptiveLoopFilter.cpp index 8259f17589ceca365198962e31c3c931e6ba0c66..6de3317a96841f45a070bbe482aac24164d9fed8 100644 --- a/source/Lib/EncoderLib/EncAdaptiveLoopFilter.cpp +++ b/source/Lib/EncoderLib/EncAdaptiveLoopFilter.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -39,11 +39,370 @@ #include "CommonLib/Picture.h" #include "CommonLib/CodingStructure.h" -#define AlfCtx(c) SubCtx( Ctx::ctbAlfFlag, c ) +#define AlfCtx(c) SubCtx( Ctx::Alf, c) std::vector<double> EncAdaptiveLoopFilter::m_lumaLevelToWeightPLUT; -EncAdaptiveLoopFilter::EncAdaptiveLoopFilter() +void AlfCovariance::getClipMax(const AlfFilterShape& alfShape, int *clip_max) const +{ + for( int k = 0; k < numCoeff-1; ++k ) + { + clip_max[k] = 0; + + bool inc = true; + while( inc && clip_max[k]+1 < numBins && y[clip_max[k]+1][k] == y[clip_max[k]][k] ) + { + for( int l = 0; inc && l < numCoeff; ++l ) + if( E[clip_max[k]][0][k][l] != E[clip_max[k]+1][0][k][l] ) + { + inc = false; + } + if( inc ) + { + ++clip_max[k]; + } + } + } + clip_max[numCoeff-1] = 0; +} + +void AlfCovariance::reduceClipCost(const AlfFilterShape& alfShape, int *clip) const +{ + for( int k = 0; k < numCoeff-1; ++k ) + { + bool dec = true; + while( dec && clip[k] > 0 && y[clip[k]-1][k] == y[clip[k]][k] ) + { + for( int l = 0; dec && l < numCoeff; ++l ) + if( E[clip[k]][clip[l]][k][l] != E[clip[k]-1][clip[l]][k][l] ) + { + dec = false; + } + if( dec ) + { + --clip[k]; + } + } + } +} + +double AlfCovariance::optimizeFilter(const AlfFilterShape& alfShape, int* clip, double *f, bool optimize_clip) const +{ + const int size = alfShape.numCoeff; + int clip_max[MAX_NUM_ALF_LUMA_COEFF]; + + double err_best, err_last; + + TE kE; + Ty ky; + + if( optimize_clip ) + { + // Start by looking for min clipping that has no impact => max_clipping + getClipMax(alfShape, clip_max); + for (int k=0; k<size; ++k) + { + clip[k] = std::max(clip_max[k], clip[k]); + clip[k] = std::min(clip[k], numBins-1); + } + } + + setEyFromClip( clip, kE, ky, size ); + + gnsSolveByChol( kE, ky, f, size ); + err_best = calculateError( clip, f, size ); + + int step = optimize_clip ? (numBins+1)/2 : 0; + + while( step > 0 ) + { + double err_min = err_best; + int idx_min = -1; + int inc_min = 0; + + for( int k = 0; k < size-1; ++k ) + { + if( clip[k] - step >= clip_max[k] ) + { + clip[k] -= step; + ky[k] = y[clip[k]][k]; + for( int l = 0; l < size; l++ ) + { + kE[k][l] = E[clip[k]][clip[l]][k][l]; + kE[l][k] = E[clip[l]][clip[k]][l][k]; + } + + gnsSolveByChol( kE, ky, f, size ); + err_last = calculateError( clip, f, size ); + + if( err_last < err_min ) + { + err_min = err_last; + idx_min = k; + inc_min = -step; + } + clip[k] += step; + } + if( clip[k] + step < numBins ) + { + clip[k] += step; + ky[k] = y[clip[k]][k]; + for( int l = 0; l < size; l++ ) + { + kE[k][l] = E[clip[k]][clip[l]][k][l]; + kE[l][k] = E[clip[l]][clip[k]][l][k]; + } + + gnsSolveByChol( kE, ky, f, size ); + err_last = calculateError( clip, f, size ); + + if( err_last < err_min ) + { + err_min = err_last; + idx_min = k; + inc_min = step; + } + clip[k] -= step; + + } + ky[k] = y[clip[k]][k]; + for( int l = 0; l < size; l++ ) + { + kE[k][l] = E[clip[k]][clip[l]][k][l]; + kE[l][k] = E[clip[l]][clip[k]][l][k]; + } + } + + if( idx_min >= 0 ) + { + err_best = err_min; + clip[idx_min] += inc_min; + ky[idx_min] = y[clip[idx_min]][idx_min]; + for( int l = 0; l < size; l++ ) + { + kE[idx_min][l] = E[clip[idx_min]][clip[l]][idx_min][l]; + kE[l][idx_min] = E[clip[l]][clip[idx_min]][l][idx_min]; + } + } + else + { + --step; + } + } + + if( optimize_clip ) { + // test all max + for( int k = 0; k < size-1; ++k ) + { + clip_max[k] = 0; + } + TE kE_max; + Ty ky_max; + setEyFromClip( clip_max, kE_max, ky_max, size ); + + gnsSolveByChol( kE_max, ky_max, f, size ); + err_last = calculateError( clip_max, f, size ); + if( err_last < err_best ) + { + err_best = err_last; + for (int k=0; k<size; ++k) + { + clip[k] = clip_max[k]; + } + } + else + { + // update clip to reduce coding cost + reduceClipCost(alfShape, clip); + + // update f with best solution + gnsSolveByChol( kE, ky, f, size ); + } + } + + return err_best; +} + +double AlfCovariance::calcErrorForCoeffs( const int *clip, const int *coeff, const int numCoeff, const int bitDepth ) const +{ + double factor = 1 << ( bitDepth - 1 ); + double error = 0; + + for( int i = 0; i < numCoeff; i++ ) //diagonal + { + double sum = 0; + for( int j = i + 1; j < numCoeff; j++ ) + { + // E[j][i] = E[i][j], sum will be multiplied by 2 later + sum += E[clip[i]][clip[j]][i][j] * coeff[j]; + } + error += ( ( E[clip[i]][clip[i]][i][i] * coeff[i] + sum * 2 ) / factor - 2 * y[clip[i]][i] ) * coeff[i]; + } + + return error / factor; +} + +double AlfCovariance::calculateError( const int *clip, const double *coeff, const int numCoeff ) const +{ + double sum = 0; + for( int i = 0; i < numCoeff; i++ ) + { + sum += coeff[i] * y[clip[i]][i]; + } + + return pixAcc - sum; +} + +double AlfCovariance::calculateError( const int *clip ) const +{ + Ty c; + + return optimizeFilter( clip, c, numCoeff ); +} +//******************************** +// Cholesky decomposition +//******************************** + +#define ROUND(a) (((a) < 0)? (int)((a) - 0.5) : (int)((a) + 0.5)) +#define REG 0.0001 +#define REG_SQR 0.0000001 + +//Find filter coeff related +int AlfCovariance::gnsCholeskyDec( TE inpMatr, TE outMatr, int numEq ) const +{ + Ty invDiag; /* Vector of the inverse of diagonal entries of outMatr */ + + for( int i = 0; i < numEq; i++ ) + { + for( int j = i; j < numEq; j++ ) + { + /* Compute the scaling factor */ + double scale = inpMatr[i][j]; + if( i > 0 ) + { + for( int k = i - 1; k >= 0; k-- ) + { + scale -= outMatr[k][j] * outMatr[k][i]; + } + } + + /* Compute i'th row of outMatr */ + if( i == j ) + { + if( scale <= REG_SQR ) // if(scale <= 0 ) /* If inpMatr is singular */ + { + return 0; + } + else /* Normal operation */ + invDiag[i] = 1.0 / ( outMatr[i][i] = sqrt( scale ) ); + } + else + { + outMatr[i][j] = scale * invDiag[i]; /* Upper triangular part */ + outMatr[j][i] = 0.0; /* Lower triangular part set to 0 */ + } + } + } + return 1; /* Signal that Cholesky factorization is successfully performed */ +} + +void AlfCovariance::gnsTransposeBacksubstitution( TE U, double* rhs, double* x, int order ) const +{ + /* Backsubstitution starts */ + x[0] = rhs[0] / U[0][0]; /* First row of U' */ + for( int i = 1; i < order; i++ ) + { /* For the rows 1..order-1 */ + + double sum = 0; //Holds backsubstitution from already handled rows + + for( int j = 0; j < i; j++ ) /* Backsubst already solved unknowns */ + { + sum += x[j] * U[j][i]; + } + + x[i] = ( rhs[i] - sum ) / U[i][i]; /* i'th component of solution vect. */ + } +} + +void AlfCovariance::gnsBacksubstitution( TE R, double* z, int size, double* A ) const +{ + size--; + A[size] = z[size] / R[size][size]; + + for( int i = size - 1; i >= 0; i-- ) + { + double sum = 0; + + for( int j = i + 1; j <= size; j++ ) + { + sum += R[i][j] * A[j]; + } + + A[i] = ( z[i] - sum ) / R[i][i]; + } +} + +int AlfCovariance::gnsSolveByChol( const int *clip, double *x, int numEq ) const +{ + TE LHS; + Ty rhs; + + setEyFromClip( clip, LHS, rhs, numEq ); + return gnsSolveByChol( LHS, rhs, x, numEq ); +} + +int AlfCovariance::gnsSolveByChol( TE LHS, double* rhs, double *x, int numEq ) const +{ + Ty aux; /* Auxiliary vector */ + TE U; /* Upper triangular Cholesky factor of LHS */ + + int res = 1; // Signal that Cholesky factorization is successfully performed + + /* The equation to be solved is LHSx = rhs */ + + /* Compute upper triangular U such that U'*U = LHS */ + if( gnsCholeskyDec( LHS, U, numEq ) ) /* If Cholesky decomposition has been successful */ + { + /* Now, the equation is U'*U*x = rhs, where U is upper triangular + * Solve U'*aux = rhs for aux + */ + gnsTransposeBacksubstitution( U, rhs, aux, numEq ); + + /* The equation is now U*x = aux, solve it for x (new motion coefficients) */ + gnsBacksubstitution( U, aux, numEq, x ); + + } + else /* LHS was singular */ + { + res = 0; + + /* Regularize LHS */ + for( int i = 0; i < numEq; i++ ) + { + LHS[i][i] += REG; + } + + /* Compute upper triangular U such that U'*U = regularized LHS */ + res = gnsCholeskyDec( LHS, U, numEq ); + + if( !res ) + { + std::memset( x, 0, sizeof( double )*numEq ); + return 0; + } + + /* Solve U'*aux = rhs for aux */ + gnsTransposeBacksubstitution( U, rhs, aux, numEq ); + + /* Solve U*x = aux for x */ + gnsBacksubstitution( U, aux, numEq, x ); + } + return res; +} +////////////////////////////////////////////////////////////////////////////////////////// + +EncAdaptiveLoopFilter::EncAdaptiveLoopFilter( int& apsIdStart ) : m_CABACEstimator( nullptr ) + , m_apsIdStart( apsIdStart ) { for( int i = 0; i < MAX_NUM_COMPONENT; i++ ) { @@ -53,21 +412,23 @@ EncAdaptiveLoopFilter::EncAdaptiveLoopFilter() { m_alfCovarianceFrame[i] = nullptr; } - m_filterCoeffQuant = nullptr; m_filterCoeffSet = nullptr; + m_filterClippSet = nullptr; m_diffFilterCoeff = nullptr; m_alfWSSD = 0; } -void EncAdaptiveLoopFilter::create( const int picWidth, const int picHeight, const ChromaFormat chromaFormatIDC, const int maxCUWidth, const int maxCUHeight, const int maxCUDepth, const int inputBitDepth[MAX_NUM_CHANNEL_TYPE], const int internalBitDepth[MAX_NUM_CHANNEL_TYPE] ) +void EncAdaptiveLoopFilter::create( const EncCfg* encCfg, const int picWidth, const int picHeight, const ChromaFormat chromaFormatIDC, const int maxCUWidth, const int maxCUHeight, const int maxCUDepth, const int inputBitDepth[MAX_NUM_CHANNEL_TYPE], const int internalBitDepth[MAX_NUM_CHANNEL_TYPE] ) { AdaptiveLoopFilter::create( picWidth, picHeight, chromaFormatIDC, maxCUWidth, maxCUHeight, maxCUDepth, inputBitDepth ); + CHECK( encCfg == nullptr, "encCfg must not be null" ); + m_encCfg = encCfg; for( int channelIdx = 0; channelIdx < MAX_NUM_CHANNEL_TYPE; channelIdx++ ) { ChannelType chType = (ChannelType)channelIdx; - int numClasses = channelIdx ? 1 : MAX_NUM_ALF_CLASSES; + int numClasses = channelIdx ? MAX_NUM_ALF_ALTERNATIVES_CHROMA : MAX_NUM_ALF_CLASSES; m_alfCovarianceFrame[chType] = new AlfCovariance*[m_filterShapes[chType].size()]; for( int i = 0; i != m_filterShapes[chType].size(); i++ ) { @@ -82,6 +443,16 @@ void EncAdaptiveLoopFilter::create( const int picWidth, const int picHeight, con for( int compIdx = 0; compIdx < MAX_NUM_COMPONENT; compIdx++ ) { m_ctuEnableFlagTmp[compIdx] = new uint8_t[m_numCTUsInPic]; + m_ctuEnableFlagTmp2[compIdx] = new uint8_t[m_numCTUsInPic]; + if( isLuma( ComponentID(compIdx) ) ) + { + m_ctuAlternativeTmp[compIdx] = nullptr; + } + else + { + m_ctuAlternativeTmp[compIdx] = new uint8_t[m_numCTUsInPic]; + std::fill_n( m_ctuAlternativeTmp[compIdx], m_numCTUsInPic, 0 ); + } ChannelType chType = toChannelType( ComponentID( compIdx ) ); int numClasses = compIdx ? 1 : MAX_NUM_ALF_CLASSES; @@ -103,25 +474,39 @@ void EncAdaptiveLoopFilter::create( const int picWidth, const int picHeight, con for( int i = 0; i != m_filterShapes[COMPONENT_Y].size(); i++ ) { - for( int j = 0; j <= MAX_NUM_ALF_CLASSES; j++ ) + for (int j = 0; j <= MAX_NUM_ALF_CLASSES + 1; j++) { m_alfCovarianceMerged[i][j].create( m_filterShapes[COMPONENT_Y][i].numCoeff ); } } - m_filterCoeffQuant = new int[MAX_NUM_ALF_LUMA_COEFF]; - m_filterCoeffSet = new int*[MAX_NUM_ALF_CLASSES]; + m_filterCoeffSet = new int*[std::max(MAX_NUM_ALF_CLASSES, MAX_NUM_ALF_ALTERNATIVES_CHROMA)]; + m_filterClippSet = new int*[std::max(MAX_NUM_ALF_CLASSES, MAX_NUM_ALF_ALTERNATIVES_CHROMA)]; m_diffFilterCoeff = new int*[MAX_NUM_ALF_CLASSES]; for( int i = 0; i < MAX_NUM_ALF_CLASSES; i++ ) { m_filterCoeffSet[i] = new int[MAX_NUM_ALF_LUMA_COEFF]; + m_filterClippSet[i] = new int[MAX_NUM_ALF_LUMA_COEFF]; m_diffFilterCoeff[i] = new int[MAX_NUM_ALF_LUMA_COEFF]; } + + + m_ctbDistortionFixedFilter = new double[m_numCTUsInPic]; + for (int comp = 0; comp < MAX_NUM_COMPONENT; comp++) + { + m_ctbDistortionUnfilter[comp] = new double[m_numCTUsInPic]; + } + m_alfCtbFilterSetIndexTmp.resize(m_numCTUsInPic); + memset(m_clipDefaultEnc, 0, sizeof(m_clipDefaultEnc)); } void EncAdaptiveLoopFilter::destroy() { + if (!m_created) + { + return; + } for( int channelIdx = 0; channelIdx < MAX_NUM_CHANNEL_TYPE; channelIdx++ ) { if( m_alfCovarianceFrame[channelIdx] ) @@ -150,12 +535,24 @@ void EncAdaptiveLoopFilter::destroy() m_ctuEnableFlagTmp[compIdx] = nullptr; } - if( m_alfCovariance[compIdx] ) + if( m_ctuEnableFlagTmp2[compIdx] ) { - ChannelType chType = toChannelType( ComponentID( compIdx ) ); - int numClasses = compIdx ? 1 : MAX_NUM_ALF_CLASSES; + delete[] m_ctuEnableFlagTmp2[compIdx]; + m_ctuEnableFlagTmp2[compIdx] = nullptr; + } - for( int i = 0; i != m_filterShapes[chType].size(); i++ ) + if( m_ctuAlternativeTmp[compIdx] ) + { + delete[] m_ctuAlternativeTmp[compIdx]; + m_ctuAlternativeTmp[compIdx] = nullptr; + } + + if( m_alfCovariance[compIdx] ) + { + ChannelType chType = toChannelType( ComponentID( compIdx ) ); + int numClasses = compIdx ? 1 : MAX_NUM_ALF_CLASSES; + + for( int i = 0; i != m_filterShapes[chType].size(); i++ ) { for( int j = 0; j < m_numCTUsInPic; j++ ) { @@ -178,7 +575,7 @@ void EncAdaptiveLoopFilter::destroy() for( int i = 0; i != m_filterShapes[COMPONENT_Y].size(); i++ ) { - for( int j = 0; j <= MAX_NUM_ALF_CLASSES; j++ ) + for (int j = 0; j <= MAX_NUM_ALF_CLASSES + 1; j++) { m_alfCovarianceMerged[i][j].destroy(); } @@ -195,6 +592,17 @@ void EncAdaptiveLoopFilter::destroy() m_filterCoeffSet = nullptr; } + if( m_filterClippSet ) + { + for( int i = 0; i < MAX_NUM_ALF_CLASSES; i++ ) + { + delete[] m_filterClippSet[i]; + m_filterClippSet[i] = nullptr; + } + delete[] m_filterClippSet; + m_filterClippSet = nullptr; + } + if( m_diffFilterCoeff ) { for( int i = 0; i < MAX_NUM_ALF_CLASSES; i++ ) @@ -206,28 +614,57 @@ void EncAdaptiveLoopFilter::destroy() m_diffFilterCoeff = nullptr; } - delete[] m_filterCoeffQuant; - m_filterCoeffQuant = nullptr; + delete[] m_ctbDistortionFixedFilter; + m_ctbDistortionFixedFilter = nullptr; + for (int comp = 0; comp < MAX_NUM_COMPONENT; comp++) + { + delete[] m_ctbDistortionUnfilter[comp]; + m_ctbDistortionUnfilter[comp] = nullptr; + } AdaptiveLoopFilter::destroy(); } - -void EncAdaptiveLoopFilter::initCABACEstimator( CABACEncoder* cabacEncoder, CtxCache* ctxCache, Slice* pcSlice ) +void EncAdaptiveLoopFilter::initCABACEstimator( CABACEncoder* cabacEncoder, CtxCache* ctxCache, Slice* pcSlice +, ParameterSetMap<APS>* apsMap ) { + m_apsMap = apsMap; m_CABACEstimator = cabacEncoder->getCABACEstimator( pcSlice->getSPS() ); m_CtxCache = ctxCache; m_CABACEstimator->initCtxModels( *pcSlice ); m_CABACEstimator->resetBits(); } -void EncAdaptiveLoopFilter::ALFProcess( CodingStructure& cs, const double *lambdas, +void EncAdaptiveLoopFilter::ALFProcess(CodingStructure& cs, const double *lambdas #if ENABLE_QPA - const double lambdaChromaWeight, + , const double lambdaChromaWeight #endif - AlfSliceParam& alfSliceParam ) + ) { + int layerIdx = cs.vps == nullptr ? 0 : cs.vps->getGeneralLayerIdx( cs.slice->getPic()->layerId ); + + // IRAP AU is assumed + if( !layerIdx && ( cs.slice->getPendingRasInit() || cs.slice->isIDRorBLA() ) ) + { + memset(cs.slice->getAlfAPSs(), 0, sizeof(*cs.slice->getAlfAPSs())*ALF_CTB_MAX_NUM_APS); + m_apsIdStart = ALF_CTB_MAX_NUM_APS; + + m_apsMap->clear(); + for (int i = 0; i < ALF_CTB_MAX_NUM_APS; i++) + { + APS* alfAPS = m_apsMap->getPS((i << NUM_APS_TYPE_LEN) + ALF_APS); + m_apsMap->clearChangedFlag((i << NUM_APS_TYPE_LEN) + ALF_APS); + if (alfAPS) + { + alfAPS->getAlfAPSParam().reset(); + alfAPS = nullptr; + } + } + } + AlfParam alfParam; + alfParam.reset(); + const TempCtx ctxStart(m_CtxCache, AlfCtx(m_CABACEstimator->getCtx())); // set available filter shapes - alfSliceParam.filterShapes = m_filterShapes; + alfParam.filterShapes = m_filterShapes; // set clipping range m_clpRngs = cs.slice->getClpRngs(); @@ -236,10 +673,11 @@ void EncAdaptiveLoopFilter::ALFProcess( CodingStructure& cs, const double *lambd for( int compIdx = 0; compIdx < MAX_NUM_COMPONENT; compIdx++ ) { m_ctuEnableFlag[compIdx] = cs.picture->getAlfCtuEnableFlag( compIdx ); + m_ctuAlternative[compIdx] = cs.picture->getAlfCtuAlternativeData( compIdx ); } // reset ALF parameters - alfSliceParam.reset(); + alfParam.reset(); int shiftLuma = 2 * DISTORTION_PRECISION_ADJUSTMENT(m_inputBitDepth[CHANNEL_TYPE_LUMA]); int shiftChroma = 2 * DISTORTION_PRECISION_ADJUSTMENT(m_inputBitDepth[CHANNEL_TYPE_CHROMA]); m_lambda[COMPONENT_Y] = lambdas[COMPONENT_Y] * double(1 << shiftLuma); @@ -254,30 +692,110 @@ void EncAdaptiveLoopFilter::ALFProcess( CodingStructure& cs, const double *lambd // derive classification const CPelBuf& recLuma = recYuv.get( COMPONENT_Y ); - Area blk( 0, 0, recLuma.width, recLuma.height ); - deriveClassification( m_classifier, recLuma, blk ); - Area blkPCM(0, 0, recLuma.width, recLuma.height); - resetPCMBlkClassInfo(cs, m_classifier, recLuma, blkPCM); + const PreCalcValues& pcv = *cs.pcv; + bool clipTop = false, clipBottom = false, clipLeft = false, clipRight = false; + int numHorVirBndry = 0, numVerVirBndry = 0; + int horVirBndryPos[] = { 0, 0, 0 }; + int verVirBndryPos[] = { 0, 0, 0 }; + + for( int yPos = 0; yPos < pcv.lumaHeight; yPos += pcv.maxCUHeight ) + { + for( int xPos = 0; xPos < pcv.lumaWidth; xPos += pcv.maxCUWidth ) + { + const int width = ( xPos + pcv.maxCUWidth > pcv.lumaWidth ) ? ( pcv.lumaWidth - xPos ) : pcv.maxCUWidth; + const int height = ( yPos + pcv.maxCUHeight > pcv.lumaHeight ) ? ( pcv.lumaHeight - yPos ) : pcv.maxCUHeight; + int rasterSliceAlfPad = 0; + if (isCrossedByVirtualBoundaries( cs, xPos, yPos, width, height, clipTop, clipBottom, clipLeft, clipRight, numHorVirBndry, numVerVirBndry, horVirBndryPos, verVirBndryPos, rasterSliceAlfPad ) ) + { + int yStart = yPos; + for( int i = 0; i <= numHorVirBndry; i++ ) + { + const int yEnd = i == numHorVirBndry ? yPos + height : horVirBndryPos[i]; + const int h = yEnd - yStart; + const bool clipT = ( i == 0 && clipTop ) || ( i > 0 ) || ( yStart == 0 ); + const bool clipB = ( i == numHorVirBndry && clipBottom ) || ( i < numHorVirBndry ) || ( yEnd == pcv.lumaHeight ); + int xStart = xPos; + for( int j = 0; j <= numVerVirBndry; j++ ) + { + const int xEnd = j == numVerVirBndry ? xPos + width : verVirBndryPos[j]; + const int w = xEnd - xStart; + const bool clipL = ( j == 0 && clipLeft ) || ( j > 0 ) || ( xStart == 0 ); + const bool clipR = ( j == numVerVirBndry && clipRight ) || ( j < numVerVirBndry ) || ( xEnd == pcv.lumaWidth ); + const int wBuf = w + (clipL ? 0 : MAX_ALF_PADDING_SIZE) + (clipR ? 0 : MAX_ALF_PADDING_SIZE); + const int hBuf = h + (clipT ? 0 : MAX_ALF_PADDING_SIZE) + (clipB ? 0 : MAX_ALF_PADDING_SIZE); + PelUnitBuf buf = m_tempBuf2.subBuf( UnitArea( cs.area.chromaFormat, Area( 0, 0, wBuf, hBuf ) ) ); + buf.copyFrom( recYuv.subBuf( UnitArea( cs.area.chromaFormat, Area( xStart - (clipL ? 0 : MAX_ALF_PADDING_SIZE), yStart - (clipT ? 0 : MAX_ALF_PADDING_SIZE), wBuf, hBuf ) ) ) ); + // pad top-left unavailable samples for raster slice + if ( xStart == xPos && yStart == yPos && ( rasterSliceAlfPad & 1 ) ) + { + buf.padBorderPel( MAX_ALF_PADDING_SIZE, 1 ); + } + + // pad bottom-right unavailable samples for raster slice + if ( xEnd == xPos + width && yEnd == yPos + height && ( rasterSliceAlfPad & 2 ) ) + { + buf.padBorderPel( MAX_ALF_PADDING_SIZE, 2 ); + } + buf.extendBorderPel( MAX_ALF_PADDING_SIZE ); + buf = buf.subBuf( UnitArea ( cs.area.chromaFormat, Area( clipL ? 0 : MAX_ALF_PADDING_SIZE, clipT ? 0 : MAX_ALF_PADDING_SIZE, w, h ) ) ); + + const Area blkSrc( 0, 0, w, h ); + const Area blkDst( xStart, yStart, w, h ); + deriveClassification( m_classifier, buf.get(COMPONENT_Y), blkDst, blkSrc ); + + xStart = xEnd; + } + + yStart = yEnd; + } + } + else + { + Area blk( xPos, yPos, width, height ); + deriveClassification( m_classifier, recLuma, blk, blk ); + } + } + } // get CTB stats for filtering - deriveStatsForFiltering( orgYuv, recYuv ); + deriveStatsForFiltering( orgYuv, recYuv, cs ); + for (int ctbIdx = 0; ctbIdx < m_numCTUsInPic; ctbIdx++) + { + cs.slice->getPic()->getAlfCtbFilterIndex()[ctbIdx] = NUM_FIXED_FILTER_SETS; + } + // consider using new filter (only) + alfParam.newFilterFlag[CHANNEL_TYPE_LUMA] = true; + alfParam.newFilterFlag[CHANNEL_TYPE_CHROMA] = true; + cs.slice->setTileGroupNumAps(1); // Only new filter for RD cost optimization // derive filter (luma) - alfEncoder( cs, alfSliceParam, orgYuv, recYuv, cs.getRecoBuf(), CHANNEL_TYPE_LUMA + alfEncoder( cs, alfParam, orgYuv, recYuv, cs.getRecoBuf(), CHANNEL_TYPE_LUMA #if ENABLE_QPA , lambdaChromaWeight #endif ); // derive filter (chroma) - if( alfSliceParam.enabledFlag[COMPONENT_Y] ) { - alfEncoder( cs, alfSliceParam, orgYuv, recYuv, cs.getRecoBuf(), CHANNEL_TYPE_CHROMA + alfEncoder( cs, alfParam, orgYuv, recYuv, cs.getRecoBuf(), CHANNEL_TYPE_CHROMA #if ENABLE_QPA , lambdaChromaWeight #endif ); } + + // let alfEncoderCtb decide now + alfParam.newFilterFlag[CHANNEL_TYPE_LUMA] = false; + alfParam.newFilterFlag[CHANNEL_TYPE_CHROMA] = false; + cs.slice->setTileGroupNumAps(0); + m_CABACEstimator->getCtx() = AlfCtx(ctxStart); + alfEncoderCtb(cs, alfParam +#if ENABLE_QPA + , lambdaChromaWeight +#endif + ); + + alfReconstructor(cs, recYuv); } double EncAdaptiveLoopFilter::deriveCtbAlfEnableFlags( CodingStructure& cs, const int iShapeIdx, ChannelType channel, @@ -288,41 +806,97 @@ double EncAdaptiveLoopFilter::deriveCtbAlfEnableFlags( CodingStructure& cs, cons { TempCtx ctxTempStart( m_CtxCache ); TempCtx ctxTempBest( m_CtxCache ); + TempCtx ctxTempAltStart( m_CtxCache ); + TempCtx ctxTempAltBest( m_CtxCache ); const ComponentID compIDFirst = isLuma( channel ) ? COMPONENT_Y : COMPONENT_Cb; const ComponentID compIDLast = isLuma( channel ) ? COMPONENT_Y : COMPONENT_Cr; + const int numAlts = isLuma( channel ) ? 1 : m_alfParamTemp.numAlternativesChroma; double cost = 0; distUnfilter = 0; - setEnableFlag(m_alfSliceParamTemp, channel, true); + setEnableFlag(m_alfParamTemp, channel, true); #if ENABLE_QPA - CHECK ((chromaWeight > 0.0) && (cs.slice->getSliceCurStartCtuTsAddr() != 0), "incompatible start CTU address, must be 0"); + CHECK ((chromaWeight > 0.0) && (cs.slice->getFirstCtuRsAddrInSlice() != 0), "incompatible start CTU address, must be 0"); #endif + reconstructCoeff(m_alfParamTemp, channel, true, isLuma(channel)); + for( int altIdx = 0; altIdx < (isLuma(channel) ? 1 : MAX_NUM_ALF_ALTERNATIVES_CHROMA); altIdx++) + { + for (int classIdx = 0; classIdx < (isLuma(channel) ? MAX_NUM_ALF_CLASSES : 1); classIdx++) + { + for (int i = 0; i < (isLuma(channel) ? MAX_NUM_ALF_LUMA_COEFF : MAX_NUM_ALF_CHROMA_COEFF); i++) + { + m_filterCoeffSet[isLuma(channel) ? classIdx : altIdx][i] = isLuma(channel) ? m_coeffFinal[classIdx * MAX_NUM_ALF_LUMA_COEFF + i] : m_chromaCoeffFinal[altIdx][i]; + m_filterClippSet[isLuma(channel) ? classIdx : altIdx][i] = isLuma(channel) ? m_clippFinal[classIdx * MAX_NUM_ALF_LUMA_COEFF + i] : m_chromaClippFinal[altIdx][i]; + } + } + } + for( int ctuIdx = 0; ctuIdx < m_numCTUsInPic; ctuIdx++ ) { for( int compID = compIDFirst; compID <= compIDLast; compID++ ) { - double distUnfilterCtu = getUnfilteredDistortion( m_alfCovariance[compID][iShapeIdx][ctuIdx], numClasses ); - - ctxTempStart = AlfCtx( m_CABACEstimator->getCtx() ); - m_CABACEstimator->resetBits(); - m_ctuEnableFlag[compID][ctuIdx] = 1; - m_CABACEstimator->codeAlfCtuEnableFlag( cs, ctuIdx, compID, &m_alfSliceParamTemp ); - double costOn = distUnfilterCtu + getFilteredDistortion( m_alfCovariance[compID][iShapeIdx][ctuIdx], numClasses, m_alfSliceParamTemp.numLumaFilters - 1, numCoeff ); #if ENABLE_QPA const double ctuLambda = chromaWeight > 0.0 ? (isLuma (channel) ? cs.picture->m_uEnerHpCtu[ctuIdx] : cs.picture->m_uEnerHpCtu[ctuIdx] / chromaWeight) : m_lambda[compID]; #else const double ctuLambda = m_lambda[compID]; #endif - costOn += ctuLambda * FracBitsScale*(double)m_CABACEstimator->getEstFracBits(); + + double distUnfilterCtu = getUnfilteredDistortion( m_alfCovariance[compID][iShapeIdx][ctuIdx], numClasses ); + + ctxTempStart = AlfCtx( m_CABACEstimator->getCtx() ); + m_CABACEstimator->resetBits(); + m_ctuEnableFlag[compID][ctuIdx] = 1; + m_CABACEstimator->codeAlfCtuEnableFlag( cs, ctuIdx, compID, &m_alfParamTemp ); + if( isLuma( channel ) ) + { + // Evaluate cost of signaling filter set index for convergence of filters enabled flag / filter derivation + assert( cs.slice->getPic()->getAlfCtbFilterIndex()[ctuIdx] == NUM_FIXED_FILTER_SETS ); + assert( cs.slice->getTileGroupNumAps() == 1 ); + m_CABACEstimator->codeAlfCtuFilterIndex(cs, ctuIdx, &m_alfParamTemp.enabledFlag[COMPONENT_Y]); + } + double costOn = distUnfilterCtu + ctuLambda * FRAC_BITS_SCALE * m_CABACEstimator->getEstFracBits(); + ctxTempBest = AlfCtx( m_CABACEstimator->getCtx() ); + if( isLuma( channel ) ) + { + costOn += getFilteredDistortion( m_alfCovariance[compID][iShapeIdx][ctuIdx], numClasses, m_alfParamTemp.numLumaFilters - 1, numCoeff ); + } + else + { + double bestAltCost = MAX_DOUBLE; + int bestAltIdx = -1; + ctxTempAltStart = AlfCtx( ctxTempBest ); + for( int altIdx = 0; altIdx < numAlts; ++altIdx ) + { + if( altIdx ) + m_CABACEstimator->getCtx() = AlfCtx( ctxTempAltStart ); + m_CABACEstimator->resetBits(); + m_ctuAlternative[compID][ctuIdx] = altIdx; + m_CABACEstimator->codeAlfCtuAlternative( cs, ctuIdx, compID, &m_alfParamTemp ); + double r_altCost = ctuLambda * FRAC_BITS_SCALE * m_CABACEstimator->getEstFracBits(); + + double altDist = 0.; + altDist += m_alfCovariance[compID][iShapeIdx][ctuIdx][0].calcErrorForCoeffs( m_filterClippSet[altIdx], m_filterCoeffSet[altIdx], numCoeff, m_NUM_BITS ); + + double altCost = altDist + r_altCost; + if( altCost < bestAltCost ) + { + bestAltCost = altCost; + bestAltIdx = altIdx; + ctxTempBest = AlfCtx( m_CABACEstimator->getCtx() ); + } + } + m_ctuAlternative[compID][ctuIdx] = bestAltIdx; + costOn += bestAltCost; + } m_CABACEstimator->getCtx() = AlfCtx( ctxTempStart ); m_CABACEstimator->resetBits(); m_ctuEnableFlag[compID][ctuIdx] = 0; - m_CABACEstimator->codeAlfCtuEnableFlag( cs, ctuIdx, compID, &m_alfSliceParamTemp); - double costOff = distUnfilterCtu + ctuLambda * FracBitsScale*(double)m_CABACEstimator->getEstFracBits(); + m_CABACEstimator->codeAlfCtuEnableFlag( cs, ctuIdx, compID, &m_alfParamTemp); + double costOff = distUnfilterCtu + ctuLambda * FRAC_BITS_SCALE * m_CABACEstimator->getEstFracBits(); if( costOn < costOff ) { @@ -341,15 +915,13 @@ double EncAdaptiveLoopFilter::deriveCtbAlfEnableFlags( CodingStructure& cs, cons if( isChroma( channel ) ) { - setEnableFlag(m_alfSliceParamTemp, channel, m_ctuEnableFlag); - const int alfChromaIdc = m_alfSliceParamTemp.enabledFlag[COMPONENT_Cb] * 2 + m_alfSliceParamTemp.enabledFlag[COMPONENT_Cr]; - cost += lengthTruncatedUnary(alfChromaIdc, 3) * m_lambda[channel]; + setEnableFlag(m_alfParamTemp, channel, m_ctuEnableFlag); } return cost; } -void EncAdaptiveLoopFilter::alfEncoder( CodingStructure& cs, AlfSliceParam& alfSliceParam, const PelUnitBuf& orgUnitBuf, const PelUnitBuf& recExtBuf, const PelUnitBuf& recBuf, const ChannelType channel +void EncAdaptiveLoopFilter::alfEncoder( CodingStructure& cs, AlfParam& alfParam, const PelUnitBuf& orgUnitBuf, const PelUnitBuf& recExtBuf, const PelUnitBuf& recBuf, const ChannelType channel #if ENABLE_QPA , const double lambdaChromaWeight // = 0.0 #endif @@ -360,134 +932,145 @@ void EncAdaptiveLoopFilter::alfEncoder( CodingStructure& cs, AlfSliceParam& alfS double costMin = MAX_DOUBLE; - std::vector<AlfFilterShape>& alfFilterShape = alfSliceParam.filterShapes[channel]; - const ComponentID compIDFirst = isLuma( channel ) ? COMPONENT_Y : COMPONENT_Cb; - const ComponentID compIDLast = isLuma( channel ) ? COMPONENT_Y : COMPONENT_Cr; + std::vector<AlfFilterShape>& alfFilterShape = alfParam.filterShapes[channel]; + m_bitsNewFilter[channel] = 0; const int numClasses = isLuma( channel ) ? MAX_NUM_ALF_CLASSES : 1; int uiCoeffBits = 0; for( int iShapeIdx = 0; iShapeIdx < alfFilterShape.size(); iShapeIdx++ ) { - m_alfSliceParamTemp = alfSliceParam; + m_alfParamTemp = alfParam; //1. get unfiltered distortion + if( isChroma(channel) ) + m_alfParamTemp.numAlternativesChroma = 1; double cost = getUnfilteredDistortion( m_alfCovarianceFrame[channel][iShapeIdx], channel ); cost /= 1.001; // slight preference for unfiltered choice if( cost < costMin ) { costMin = cost; - setEnableFlag( alfSliceParam, channel, false ); + setEnableFlag( alfParam, channel, false ); // no CABAC signalling ctxBest = AlfCtx( ctxStart ); setCtuEnableFlag( m_ctuEnableFlagTmp, channel, 0 ); + if( isChroma(channel) ) + setCtuAlternativeChroma( m_ctuAlternativeTmp, 0 ); } - //2. all CTUs are on - setEnableFlag( m_alfSliceParamTemp, channel, true ); - m_CABACEstimator->getCtx() = AlfCtx( ctxStart ); - setCtuEnableFlag( m_ctuEnableFlag, channel, 1 ); - cost = getFilterCoeffAndCost( cs, 0, channel, false, iShapeIdx, uiCoeffBits ); +#if JVET_Q0249_ALF_CHROMA_CLIPFLAG + const int nonLinearFlagMax = + ( isLuma( channel ) ? m_encCfg->getUseNonLinearAlfLuma() : m_encCfg->getUseNonLinearAlfChroma()) // For Chroma non linear flag is check for each alternative filter + ? 2 : 1; +#else + const int nonLinearFlagMax = + ( isLuma( channel ) ? m_encCfg->getUseNonLinearAlfLuma() : 0 ) // For Chroma non linear flag is check for each alternative filter + ? 2 : 1; +#endif - if( cost < costMin ) + for( int nonLinearFlag = 0; nonLinearFlag < nonLinearFlagMax; nonLinearFlag++ ) { - costMin = cost; - copyAlfSliceParam( alfSliceParam, m_alfSliceParamTemp, channel ); - ctxBest = AlfCtx( m_CABACEstimator->getCtx() ); - setCtuEnableFlag( m_ctuEnableFlagTmp, channel, 1 ); - } - - //3. CTU decision - double distUnfilter = 0; - const int iterNum = isLuma(channel) ? (2 * 4 + 1) : (2 * 2 + 1); - - for( int iter = 0; iter < iterNum; iter++ ) + for( int numAlternatives = isLuma( channel ) ? 1 : getMaxNumAlternativesChroma(); numAlternatives > 0; numAlternatives-- ) { - if ((iter & 0x01) == 0) - { - m_CABACEstimator->getCtx() = AlfCtx(ctxStart); - cost = m_lambda[channel] * uiCoeffBits; - cost += deriveCtbAlfEnableFlags(cs, iShapeIdx, channel, -#if ENABLE_QPA - lambdaChromaWeight, + if( isChroma( channel ) ) + m_alfParamTemp.numAlternativesChroma = numAlternatives; + //2. all CTUs are on + setEnableFlag( m_alfParamTemp, channel, true ); +#if JVET_Q0249_ALF_CHROMA_CLIPFLAG + m_alfParamTemp.nonLinearFlag[channel] = nonLinearFlag; +#else + if (isLuma(channel)) + m_alfParamTemp.nonLinearFlag[channel][0] = nonLinearFlag; #endif - numClasses, alfFilterShape[iShapeIdx].numCoeff, distUnfilter); - if (cost < costMin) - { - costMin = cost; - ctxBest = AlfCtx(m_CABACEstimator->getCtx()); - copyCtuEnableFlag(m_ctuEnableFlagTmp, m_ctuEnableFlag, channel); - copyAlfSliceParam(alfSliceParam, m_alfSliceParamTemp, channel); - } - } - else + m_CABACEstimator->getCtx() = AlfCtx( ctxStart ); + setCtuEnableFlag( m_ctuEnableFlag, channel, 1 ); + // all alternatives are on + if( isChroma( channel ) ) + initCtuAlternativeChroma( m_ctuAlternative ); + cost = getFilterCoeffAndCost( cs, 0, channel, true, iShapeIdx, uiCoeffBits ); + + if( cost < costMin ) { - // unfiltered distortion is added due to some CTBs may not use filter - cost = getFilterCoeffAndCost(cs, distUnfilter, channel, true, iShapeIdx, uiCoeffBits); + m_bitsNewFilter[channel] = uiCoeffBits; + costMin = cost; + copyAlfParam( alfParam, m_alfParamTemp, channel ); + ctxBest = AlfCtx( m_CABACEstimator->getCtx() ); + setCtuEnableFlag( m_ctuEnableFlagTmp, channel, 1 ); + if( isChroma(channel) ) + copyCtuAlternativeChroma( m_ctuAlternativeTmp, m_ctuAlternative ); } - }//for iter - }//for shapeIdx - m_CABACEstimator->getCtx() = AlfCtx( ctxBest ); - copyCtuEnableFlag( m_ctuEnableFlag, m_ctuEnableFlagTmp, channel ); - - //filtering - reconstructCoeff( alfSliceParam, channel, isLuma( channel ) ); - for( int compIdx = compIDFirst; compIdx <= compIDLast; compIdx++ ) - { - ComponentID compID = (ComponentID)compIdx; - if( alfSliceParam.enabledFlag[compID] ) - { - const PreCalcValues& pcv = *cs.pcv; - int ctuIdx = 0; - const int chromaScaleX = getComponentScaleX( compID, recBuf.chromaFormat ); - const int chromaScaleY = getComponentScaleY( compID, recBuf.chromaFormat ); - AlfFilterType filterType = isLuma( compID ) ? ALF_FILTER_7 : ALF_FILTER_5; - short* coeff = isLuma( compID ) ? m_coeffFinal : alfSliceParam.chromaCoeff; + //3. CTU decision + double distUnfilter = 0; + double prevItCost = MAX_DOUBLE; + const int iterNum = isLuma(channel) ? (2 * 4 + 1) : (2 * (2 + m_alfParamTemp.numAlternativesChroma - 1) + 1); - for( int yPos = 0; yPos < pcv.lumaHeight; yPos += pcv.maxCUHeight ) + for( int iter = 0; iter < iterNum; iter++ ) { - for( int xPos = 0; xPos < pcv.lumaWidth; xPos += pcv.maxCUWidth ) + if ((iter & 0x01) == 0) { - const int width = ( xPos + pcv.maxCUWidth > pcv.lumaWidth ) ? ( pcv.lumaWidth - xPos ) : pcv.maxCUWidth; - const int height = ( yPos + pcv.maxCUHeight > pcv.lumaHeight ) ? ( pcv.lumaHeight - yPos ) : pcv.maxCUHeight; - Area blk( xPos >> chromaScaleX, yPos >> chromaScaleY, width >> chromaScaleX, height >> chromaScaleY ); - - if( m_ctuEnableFlag[compID][ctuIdx] ) + m_CABACEstimator->getCtx() = AlfCtx(ctxStart); + cost = m_lambda[channel] * uiCoeffBits; + cost += deriveCtbAlfEnableFlags(cs, iShapeIdx, channel, +#if ENABLE_QPA + lambdaChromaWeight, +#endif + numClasses, alfFilterShape[iShapeIdx].numCoeff, distUnfilter); + if (cost < costMin) { - if( filterType == ALF_FILTER_5 ) - { - m_filter5x5Blk( m_classifier, recBuf, recExtBuf, blk, compID, coeff, m_clpRngs.comp[compIdx], cs ); - } - else if( filterType == ALF_FILTER_7 ) - { - m_filter7x7Blk( m_classifier, recBuf, recExtBuf, blk, compID, coeff, m_clpRngs.comp[compIdx], cs ); - } - else - { - CHECK( 0, "Wrong ALF filter type" ); - } + m_bitsNewFilter[channel] = uiCoeffBits; + costMin = cost; + ctxBest = AlfCtx(m_CABACEstimator->getCtx()); + copyCtuEnableFlag(m_ctuEnableFlagTmp, m_ctuEnableFlag, channel); + if( isChroma(channel) ) + copyCtuAlternativeChroma( m_ctuAlternativeTmp, m_ctuAlternative ); + copyAlfParam(alfParam, m_alfParamTemp, channel); + } + else if ( cost >= prevItCost ) + { + // High probability that we have converged or we are diverging + break; } - ctuIdx++; + prevItCost = cost; } - } + else + { + // unfiltered distortion is added due to some CTBs may not use filter + // no need to reset CABAC here, since uiCoeffBits is not affected + /*cost = */getFilterCoeffAndCost( cs, distUnfilter, channel, true, iShapeIdx, uiCoeffBits ); + } + }//for iter + // Decrease number of alternatives and reset ctu params and filters } - } + }// for nonLineaFlag + }//for shapeIdx + m_CABACEstimator->getCtx() = AlfCtx( ctxBest ); + if( isChroma(channel) ) + copyCtuAlternativeChroma( m_ctuAlternative, m_ctuAlternativeTmp ); + copyCtuEnableFlag( m_ctuEnableFlag, m_ctuEnableFlagTmp, channel ); } -void EncAdaptiveLoopFilter::copyAlfSliceParam( AlfSliceParam& alfSliceParamDst, AlfSliceParam& alfSliceParamSrc, ChannelType channel ) +void EncAdaptiveLoopFilter::copyAlfParam( AlfParam& alfParamDst, AlfParam& alfParamSrc, ChannelType channel ) { if( isLuma( channel ) ) { - memcpy( &alfSliceParamDst, &alfSliceParamSrc, sizeof( AlfSliceParam ) ); + memcpy( &alfParamDst, &alfParamSrc, sizeof( AlfParam ) ); } else { - alfSliceParamDst.enabledFlag[COMPONENT_Cb] = alfSliceParamSrc.enabledFlag[COMPONENT_Cb]; - alfSliceParamDst.enabledFlag[COMPONENT_Cr] = alfSliceParamSrc.enabledFlag[COMPONENT_Cr]; - memcpy( alfSliceParamDst.chromaCoeff, alfSliceParamSrc.chromaCoeff, sizeof( alfSliceParamDst.chromaCoeff ) ); + alfParamDst.enabledFlag[COMPONENT_Cb] = alfParamSrc.enabledFlag[COMPONENT_Cb]; + alfParamDst.enabledFlag[COMPONENT_Cr] = alfParamSrc.enabledFlag[COMPONENT_Cr]; + alfParamDst.numAlternativesChroma = alfParamSrc.numAlternativesChroma; +#if JVET_Q0249_ALF_CHROMA_CLIPFLAG + alfParamDst.nonLinearFlag[CHANNEL_TYPE_CHROMA] = alfParamSrc.nonLinearFlag[CHANNEL_TYPE_CHROMA]; +#else + memcpy( alfParamDst.nonLinearFlag[CHANNEL_TYPE_CHROMA], alfParamSrc.nonLinearFlag[CHANNEL_TYPE_CHROMA], sizeof( alfParamDst.nonLinearFlag[CHANNEL_TYPE_CHROMA] ) ); +#endif + memcpy( alfParamDst.chromaCoeff, alfParamSrc.chromaCoeff, sizeof( alfParamDst.chromaCoeff ) ); + memcpy( alfParamDst.chromaClipp, alfParamSrc.chromaClipp, sizeof( alfParamDst.chromaClipp ) ); } } -double EncAdaptiveLoopFilter::getFilterCoeffAndCost( CodingStructure& cs, double distUnfilter, ChannelType channel, bool bReCollectStat, int iShapeIdx, int& uiCoeffBits ) + +double EncAdaptiveLoopFilter::getFilterCoeffAndCost( CodingStructure& cs, double distUnfilter, ChannelType channel, bool bReCollectStat, int iShapeIdx, int& uiCoeffBits, bool onlyFilterCost ) { //collect stat based on CTU decision if( bReCollectStat ) @@ -497,108 +1080,119 @@ double EncAdaptiveLoopFilter::getFilterCoeffAndCost( CodingStructure& cs, double double dist = distUnfilter; uiCoeffBits = 0; - int uiSliceFlag = 0; - AlfFilterShape& alfFilterShape = m_alfSliceParamTemp.filterShapes[channel][iShapeIdx]; + AlfFilterShape& alfFilterShape = m_alfParamTemp.filterShapes[channel][iShapeIdx]; //get filter coeff if( isLuma( channel ) ) { +#if JVET_Q0249_ALF_CHROMA_CLIPFLAG + std::fill_n(m_alfClipMerged[iShapeIdx][0][0], MAX_NUM_ALF_LUMA_COEFF*MAX_NUM_ALF_CLASSES*MAX_NUM_ALF_CLASSES, m_alfParamTemp.nonLinearFlag[channel] ? AlfNumClippingValues[CHANNEL_TYPE_LUMA] / 2 : 0); +#else + std::fill_n(m_alfClipMerged[iShapeIdx][0][0], MAX_NUM_ALF_LUMA_COEFF*MAX_NUM_ALF_CLASSES*MAX_NUM_ALF_CLASSES, m_alfParamTemp.nonLinearFlag[channel][0] ? AlfNumClippingValues[CHANNEL_TYPE_LUMA] / 2 : 0); +#endif + // Reset Merge Tmp Cov + m_alfCovarianceMerged[iShapeIdx][MAX_NUM_ALF_CLASSES].reset(AlfNumClippingValues[channel]); + m_alfCovarianceMerged[iShapeIdx][MAX_NUM_ALF_CLASSES + 1].reset(AlfNumClippingValues[channel]); //distortion - dist += mergeFiltersAndCost( m_alfSliceParamTemp, alfFilterShape, m_alfCovarianceFrame[channel][iShapeIdx], m_alfCovarianceMerged[iShapeIdx], uiCoeffBits ); + dist += mergeFiltersAndCost( m_alfParamTemp, alfFilterShape, m_alfCovarianceFrame[channel][iShapeIdx], m_alfCovarianceMerged[iShapeIdx], m_alfClipMerged[iShapeIdx], uiCoeffBits ); } else { //distortion - dist += m_alfCovarianceFrame[channel][iShapeIdx][0].pixAcc + deriveCoeffQuant( m_filterCoeffQuant, m_alfCovarianceFrame[channel][iShapeIdx][0].E, m_alfCovarianceFrame[channel][iShapeIdx][0].y, alfFilterShape.numCoeff, alfFilterShape.weights, m_NUM_BITS, true ); - memcpy( m_filterCoeffSet[0], m_filterCoeffQuant, sizeof( *m_filterCoeffQuant ) * alfFilterShape.numCoeff ); - //setEnableFlag( m_alfSliceParamTemp, channel, m_ctuEnableFlag ); - const int alfChromaIdc = m_alfSliceParamTemp.enabledFlag[COMPONENT_Cb] * 2 + m_alfSliceParamTemp.enabledFlag[COMPONENT_Cr]; - for( int i = 0; i < MAX_NUM_ALF_CHROMA_COEFF; i++ ) - { - m_alfSliceParamTemp.chromaCoeff[i] = m_filterCoeffQuant[i]; - } - uiCoeffBits += getCoeffRate( m_alfSliceParamTemp, true ); - uiSliceFlag = lengthTruncatedUnary(alfChromaIdc, 3); - } - - double rate = uiCoeffBits + uiSliceFlag; - m_CABACEstimator->resetBits(); - m_CABACEstimator->codeAlfCtuEnableFlags( cs, channel, &m_alfSliceParamTemp); - rate += FracBitsScale * (double)m_CABACEstimator->getEstFracBits(); - return dist + m_lambda[channel] * rate; -} - -int EncAdaptiveLoopFilter::getCoeffRate( AlfSliceParam& alfSliceParam, bool isChroma ) -{ - int iBits = 0; - if( !isChroma ) - { - iBits++; // alf_coefficients_delta_flag - if( !alfSliceParam.alfLumaCoeffDeltaFlag ) + for( int altIdx = 0; altIdx < m_alfParamTemp.numAlternativesChroma; ++altIdx ) { - if( alfSliceParam.numLumaFilters > 1 ) - { - iBits++; // coeff_delta_pred_mode_flag - } - } - } + assert(alfFilterShape.numCoeff == m_alfCovarianceFrame[channel][iShapeIdx][altIdx].numCoeff); + AlfParam bestSliceParam; + double bestCost = MAX_DOUBLE; + double bestDist = MAX_DOUBLE; + int bestCoeffBits = 0; + const int nonLinearFlagMax = m_encCfg->getUseNonLinearAlfChroma() ? 2 : 1; - memset( m_bitsCoeffScan, 0, sizeof( m_bitsCoeffScan ) ); - AlfFilterShape alfShape( isChroma ? 5 : 7 ); - const int maxGolombIdx = AdaptiveLoopFilter::getMaxGolombIdx( alfShape.filterType ); - const short* coeff = isChroma ? alfSliceParam.chromaCoeff : alfSliceParam.lumaCoeff; - const int numFilters = isChroma ? 1 : alfSliceParam.numLumaFilters; - - // vlc for all - for( int ind = 0; ind < numFilters; ++ind ) - { - if( isChroma || !alfSliceParam.alfLumaCoeffDeltaFlag || alfSliceParam.alfLumaCoeffFlag[ind] ) - { - for( int i = 0; i < alfShape.numCoeff - 1; i++ ) + for( int nonLinearFlag = 0; nonLinearFlag < nonLinearFlagMax; nonLinearFlag++ ) { - int coeffVal = abs( coeff[ind * MAX_NUM_ALF_LUMA_COEFF + i] ); +#if JVET_Q0249_ALF_CHROMA_CLIPFLAG + int currentNonLinearFlag = m_alfParamTemp.nonLinearFlag[channel] ? 1 : 0; + if (nonLinearFlag != currentNonLinearFlag) + { + continue; + } +#else + m_alfParamTemp.nonLinearFlag[channel][altIdx] = nonLinearFlag; +#endif - for( int k = 1; k < 15; k++ ) + std::fill_n(m_filterClippSet[altIdx], MAX_NUM_ALF_CHROMA_COEFF, nonLinearFlag ? AlfNumClippingValues[CHANNEL_TYPE_CHROMA] / 2 : 0 ); + double dist = m_alfCovarianceFrame[channel][iShapeIdx][altIdx].pixAcc + deriveCoeffQuant( m_filterClippSet[altIdx], m_filterCoeffSet[altIdx], m_alfCovarianceFrame[channel][iShapeIdx][altIdx], alfFilterShape, m_NUM_BITS, nonLinearFlag ); + for( int i = 0; i < MAX_NUM_ALF_CHROMA_COEFF; i++ ) + { + m_alfParamTemp.chromaCoeff[altIdx][i] = m_filterCoeffSet[altIdx][i]; + m_alfParamTemp.chromaClipp[altIdx][i] = m_filterClippSet[altIdx][i]; + } + int coeffBits = getChromaCoeffRate( m_alfParamTemp, altIdx ); + double cost = dist + m_lambda[channel] * coeffBits; + if( cost < bestCost ) { - m_bitsCoeffScan[alfShape.golombIdx[i]][k] += lengthGolomb( coeffVal, k ); + bestCost = cost; + bestDist = dist; + bestCoeffBits = coeffBits; + bestSliceParam = m_alfParamTemp; } } + uiCoeffBits += bestCoeffBits; + dist += bestDist; + m_alfParamTemp = bestSliceParam; } + uiCoeffBits += lengthUvlc( m_alfParamTemp.numAlternativesChroma-1 ); +#if JVET_Q0249_ALF_CHROMA_CLIPFLAG + uiCoeffBits++; +#else + uiCoeffBits += m_alfParamTemp.numAlternativesChroma; // non-linear flags +#endif } - - int kMin = getGolombKMin( alfShape, numFilters, m_kMinTab, m_bitsCoeffScan ); - - // Golomb parameters - iBits += lengthUvlc( kMin - 1 ); // "min_golomb_order" - int golombOrderIncreaseFlag = 0; - - for( int idx = 0; idx < maxGolombIdx; idx++ ) + if (onlyFilterCost) { - golombOrderIncreaseFlag = ( m_kMinTab[idx] != kMin ) ? 1 : 0; - CHECK( !( m_kMinTab[idx] <= kMin + 1 ), "ALF Golomb parameter not consistent" ); - iBits += golombOrderIncreaseFlag; //golomb_order_increase_flag - kMin = m_kMinTab[idx]; + return dist + m_lambda[channel] * uiCoeffBits; } - - if( !isChroma ) + double rate = uiCoeffBits; + m_CABACEstimator->resetBits(); + m_CABACEstimator->codeAlfCtuEnableFlags( cs, channel, &m_alfParamTemp); + for( int ctuIdx = 0; ctuIdx < m_numCTUsInPic; ctuIdx++ ) { - if( alfSliceParam.alfLumaCoeffDeltaFlag ) + if( isLuma( channel ) ) { - iBits += numFilters; //filter_coefficient_flag[i] + // Evaluate cost of signaling filter set index for convergence of filters enabled flag / filter derivation + assert( cs.slice->getPic()->getAlfCtbFilterIndex()[ctuIdx] == NUM_FIXED_FILTER_SETS ); + assert( cs.slice->getTileGroupNumAps() == 1 ); + m_CABACEstimator->codeAlfCtuFilterIndex(cs, ctuIdx, &m_alfParamTemp.enabledFlag[COMPONENT_Y]); } } + m_CABACEstimator->codeAlfCtuAlternatives( cs, channel, &m_alfParamTemp ); + rate += FRAC_BITS_SCALE * m_CABACEstimator->getEstFracBits(); + return dist + m_lambda[channel] * rate; +} + +int EncAdaptiveLoopFilter::getChromaCoeffRate( AlfParam& alfParam, int altIdx ) +{ + int iBits = 0; + AlfFilterShape alfShape(5); // Filter coefficients - for( int ind = 0; ind < numFilters; ++ind ) + for( int i = 0; i < alfShape.numCoeff - 1; i++ ) { - if( !isChroma && !alfSliceParam.alfLumaCoeffFlag[ind] && alfSliceParam.alfLumaCoeffDeltaFlag ) - { - continue; - } - - for( int i = 0; i < alfShape.numCoeff - 1; i++ ) + iBits += lengthGolomb( alfParam.chromaCoeff[altIdx][i], 3 ); // alf_coeff_chroma[altIdx][i] + } +#if JVET_Q0249_ALF_CHROMA_CLIPFLAG + if( m_alfParamTemp.nonLinearFlag[CHANNEL_TYPE_CHROMA] ) +#else + if( m_alfParamTemp.nonLinearFlag[CHANNEL_TYPE_CHROMA][altIdx] ) +#endif + { + for (int i = 0; i < alfShape.numCoeff - 1; i++) { - iBits += lengthGolomb( coeff[ind* MAX_NUM_ALF_LUMA_COEFF + i], m_kMinTab[alfShape.golombIdx[i]] ); // alf_coeff_chroma[i], alf_coeff_luma_delta[i][j] + if( !abs( alfParam.chromaCoeff[altIdx][i] ) ) + { + alfParam.chromaClipp[altIdx][i] = 0; + } } + iBits += ((alfShape.numCoeff - 1) << 1); } return iBits; } @@ -612,7 +1206,7 @@ double EncAdaptiveLoopFilter::getUnfilteredDistortion( AlfCovariance* cov, Chann } else { - dist = getUnfilteredDistortion( cov, 1 ) + lengthTruncatedUnary( 0, 3 ) * m_lambda[COMPONENT_Cb]; + dist = getUnfilteredDistortion( cov, 1 ); } return dist; } @@ -633,14 +1227,13 @@ double EncAdaptiveLoopFilter::getFilteredDistortion( AlfCovariance* cov, const i for( int classIdx = 0; classIdx < numClasses; classIdx++ ) { - int filterIdx = numClasses == 1 ? 0 : m_filterIndices[numFiltersMinus1][classIdx]; - dist += calcErrorForCoeffs( cov[classIdx].E, cov[classIdx].y, m_filterCoeffSet[filterIdx], numCoeff, m_NUM_BITS ); + dist += cov[classIdx].calcErrorForCoeffs(m_filterClippSet[classIdx], m_filterCoeffSet[classIdx], numCoeff, m_NUM_BITS); } return dist; } -double EncAdaptiveLoopFilter::mergeFiltersAndCost( AlfSliceParam& alfSliceParam, AlfFilterShape& alfShape, AlfCovariance* covFrame, AlfCovariance* covMerged, int& uiCoeffBits ) +double EncAdaptiveLoopFilter::mergeFiltersAndCost( AlfParam& alfParam, AlfFilterShape& alfShape, AlfCovariance* covFrame, AlfCovariance* covMerged, int clipMerged[MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_LUMA_COEFF], int& uiCoeffBits ) { int numFiltersBest = 0; int numFilters = MAX_NUM_ALF_CLASSES; @@ -648,16 +1241,16 @@ double EncAdaptiveLoopFilter::mergeFiltersAndCost( AlfSliceParam& alfSliceParam, static double errorForce0CoeffTab[MAX_NUM_ALF_CLASSES][2]; double cost, cost0, dist, distForce0, costMin = MAX_DOUBLE; - int predMode = 0, bestPredMode = 0, coeffBits, coeffBitsForce0; + int coeffBits, coeffBitsForce0; - mergeClasses( covFrame, covMerged, MAX_NUM_ALF_CLASSES, m_filterIndices ); + mergeClasses( alfShape, covFrame, covMerged, clipMerged, MAX_NUM_ALF_CLASSES, m_filterIndices ); while( numFilters >= 1 ) { - dist = deriveFilterCoeffs( covFrame, covMerged, alfShape, m_filterIndices[numFilters - 1], numFilters, errorForce0CoeffTab ); + dist = deriveFilterCoeffs(covFrame, covMerged, clipMerged, alfShape, m_filterIndices[numFilters - 1], numFilters, errorForce0CoeffTab, alfParam); // filter coeffs are stored in m_filterCoeffSet distForce0 = getDistForce0( alfShape, numFilters, errorForce0CoeffTab, codedVarBins ); - coeffBits = deriveFilterCoefficientsPredictionMode( alfShape, m_filterCoeffSet, m_diffFilterCoeff, numFilters, predMode ); + coeffBits = deriveFilterCoefficientsPredictionMode( alfShape, m_filterCoeffSet, m_diffFilterCoeff, numFilters ); coeffBitsForce0 = getCostFilterCoeffForce0( alfShape, m_filterCoeffSet, numFilters, codedVarBins ); cost = dist + m_lambda[COMPONENT_Y] * coeffBits; @@ -672,177 +1265,111 @@ double EncAdaptiveLoopFilter::mergeFiltersAndCost( AlfSliceParam& alfSliceParam, { costMin = cost; numFiltersBest = numFilters; - bestPredMode = predMode; } numFilters--; } - dist = deriveFilterCoeffs( covFrame, covMerged, alfShape, m_filterIndices[numFiltersBest - 1], numFiltersBest, errorForce0CoeffTab ); - coeffBits = deriveFilterCoefficientsPredictionMode( alfShape, m_filterCoeffSet, m_diffFilterCoeff, numFiltersBest, predMode ); + dist = deriveFilterCoeffs( covFrame, covMerged, clipMerged, alfShape, m_filterIndices[numFiltersBest - 1], numFiltersBest, errorForce0CoeffTab, alfParam ); + coeffBits = deriveFilterCoefficientsPredictionMode( alfShape, m_filterCoeffSet, m_diffFilterCoeff, numFiltersBest ); distForce0 = getDistForce0( alfShape, numFiltersBest, errorForce0CoeffTab, codedVarBins ); coeffBitsForce0 = getCostFilterCoeffForce0( alfShape, m_filterCoeffSet, numFiltersBest, codedVarBins ); cost = dist + m_lambda[COMPONENT_Y] * coeffBits; cost0 = distForce0 + m_lambda[COMPONENT_Y] * coeffBitsForce0; - alfSliceParam.numLumaFilters = numFiltersBest; + alfParam.numLumaFilters = numFiltersBest; double distReturn; if (cost <= cost0) { distReturn = dist; - alfSliceParam.alfLumaCoeffDeltaFlag = 0; + alfParam.alfLumaCoeffDeltaFlag = 0; uiCoeffBits = coeffBits; - alfSliceParam.alfLumaCoeffDeltaPredictionFlag = bestPredMode; } else { distReturn = distForce0; - alfSliceParam.alfLumaCoeffDeltaFlag = 1; + alfParam.alfLumaCoeffDeltaFlag = 1; uiCoeffBits = coeffBitsForce0; - memcpy( alfSliceParam.alfLumaCoeffFlag, codedVarBins, sizeof( codedVarBins ) ); - alfSliceParam.alfLumaCoeffDeltaPredictionFlag = 0; + memcpy( alfParam.alfLumaCoeffFlag, codedVarBins, sizeof( codedVarBins ) ); for( int varInd = 0; varInd < numFiltersBest; varInd++ ) { if( codedVarBins[varInd] == 0 ) { memset( m_filterCoeffSet[varInd], 0, sizeof( int )*MAX_NUM_ALF_LUMA_COEFF ); + memset( m_filterClippSet[varInd], 0, sizeof( int )*MAX_NUM_ALF_LUMA_COEFF ); } } } - for( int ind = 0; ind < alfSliceParam.numLumaFilters; ++ind ) + for( int ind = 0; ind < alfParam.numLumaFilters; ++ind ) { for( int i = 0; i < alfShape.numCoeff; i++ ) { - if( alfSliceParam.alfLumaCoeffDeltaPredictionFlag ) - { - alfSliceParam.lumaCoeff[ind * MAX_NUM_ALF_LUMA_COEFF + i] = m_diffFilterCoeff[ind][i]; - } - else - { - alfSliceParam.lumaCoeff[ind * MAX_NUM_ALF_LUMA_COEFF + i] = m_filterCoeffSet[ind][i]; - } + alfParam.lumaCoeff[ind * MAX_NUM_ALF_LUMA_COEFF + i] = m_filterCoeffSet[ind][i]; + alfParam.lumaClipp[ind * MAX_NUM_ALF_LUMA_COEFF + i] = m_filterClippSet[ind][i]; } } - memcpy( alfSliceParam.filterCoeffDeltaIdx, m_filterIndices[numFiltersBest - 1], sizeof( short ) * MAX_NUM_ALF_CLASSES ); - uiCoeffBits += getNonFilterCoeffRate( alfSliceParam ); + memcpy( alfParam.filterCoeffDeltaIdx, m_filterIndices[numFiltersBest - 1], sizeof( short ) * MAX_NUM_ALF_CLASSES ); + uiCoeffBits += getNonFilterCoeffRate( alfParam ); return distReturn; } -int EncAdaptiveLoopFilter::getNonFilterCoeffRate( AlfSliceParam& alfSliceParam ) +int EncAdaptiveLoopFilter::getNonFilterCoeffRate( AlfParam& alfParam ) { - int len = 1 // alf_coefficients_delta_flag - + lengthTruncatedUnary( 0, 3 ) // chroma_idc = 0, it is signalled when ALF is enabled for luma - + getTBlength( alfSliceParam.numLumaFilters - 1, MAX_NUM_ALF_CLASSES ); //numLumaFilters + int len = 0 // alf_coefficients_delta_flag + + 2 // slice_alf_chroma_idc u(2) + + lengthUvlc (alfParam.numLumaFilters - 1); // alf_luma_num_filters_signalled_minus1 ue(v) - if( alfSliceParam.numLumaFilters > 1 ) + if( alfParam.numLumaFilters > 1 ) { + const int coeffLength = ceilLog2(alfParam.numLumaFilters); for( int i = 0; i < MAX_NUM_ALF_CLASSES; i++ ) { - len += getTBlength( (int)alfSliceParam.filterCoeffDeltaIdx[i], alfSliceParam.numLumaFilters ); //filter_coeff_delta[i] + len += coeffLength; // alf_luma_coeff_delta_idx u(v) } } return len; } -int EncAdaptiveLoopFilter::lengthTruncatedUnary( int symbol, int maxSymbol ) -{ - if( maxSymbol == 0 ) - { - return 0; - } - - bool codeLast = ( maxSymbol > symbol ); - int bins = 0; - int numBins = 0; - while( symbol-- ) - { - bins <<= 1; - bins++; - numBins++; - } - if( codeLast ) - { - bins <<= 1; - numBins++; - } - - return numBins; -} - -int EncAdaptiveLoopFilter::getTBlength( int uiSymbol, const int uiMaxSymbol ) -{ - int uiThresh; - if( uiMaxSymbol > 256 ) - { - int uiThreshVal = 1 << 8; - uiThresh = 8; - while( uiThreshVal <= uiMaxSymbol ) - { - uiThresh++; - uiThreshVal <<= 1; - } - uiThresh--; - } - else - { - uiThresh = g_tbMax[uiMaxSymbol]; - } - - int uiVal = 1 << uiThresh; - assert( uiVal <= uiMaxSymbol ); - assert( ( uiVal << 1 ) > uiMaxSymbol ); - assert( uiSymbol < uiMaxSymbol ); - int b = uiMaxSymbol - uiVal; - assert( b < uiVal ); - if( uiSymbol < uiVal - b ) - { - return uiThresh; - } - else - { - return uiThresh + 1; - } -} int EncAdaptiveLoopFilter::getCostFilterCoeffForce0( AlfFilterShape& alfShape, int **pDiffQFilterCoeffIntPP, const int numFilters, bool* codedVarBins ) { - const int maxGolombIdx = getMaxGolombIdx( alfShape.filterType ); - memset( m_bitsCoeffScan, 0, sizeof( m_bitsCoeffScan ) ); - + int len = 0; + // Filter coefficients for( int ind = 0; ind < numFilters; ++ind ) { - if( !codedVarBins[ind] ) + if( codedVarBins[ind] ) { - continue; + for( int i = 0; i < alfShape.numCoeff - 1; i++ ) + { + len += lengthGolomb( abs( pDiffQFilterCoeffIntPP[ind][i] ), 3 ); // alf_coeff_luma_delta[i][j] + } } - for( int i = 0; i < alfShape.numCoeff - 1; i++ ) + else { - int coeffVal = abs( pDiffQFilterCoeffIntPP[ind][i] ); - for( int k = 1; k < 15; k++ ) + for (int i = 0; i < alfShape.numCoeff - 1; i++) { - m_bitsCoeffScan[alfShape.golombIdx[i]][k] += lengthGolomb( coeffVal, k ); + len += lengthGolomb(0, 3); // alf_coeff_luma_delta[i][j] } } } - - int kMin = getGolombKMin( alfShape, numFilters, m_kMinTab, m_bitsCoeffScan ); - - // Coding parameters - int len = kMin //min_golomb_order - + maxGolombIdx //golomb_order_increase_flag - + numFilters; //filter_coefficient_flag[i] - - // Filter coefficients - for( int ind = 0; ind < numFilters; ++ind ) +#if JVET_Q0249_ALF_CHROMA_CLIPFLAG + if( m_alfParamTemp.nonLinearFlag[CHANNEL_TYPE_LUMA] ) +#else + if( m_alfParamTemp.nonLinearFlag[CHANNEL_TYPE_LUMA][0] ) +#endif { - if( codedVarBins[ind] ) + for (int ind = 0; ind < numFilters; ++ind) { - for( int i = 0; i < alfShape.numCoeff - 1; i++ ) + for (int i = 0; i < alfShape.numCoeff - 1; i++) { - len += lengthGolomb( abs( pDiffQFilterCoeffIntPP[ind][i] ), m_kMinTab[alfShape.golombIdx[i]] ); // alf_coeff_luma_delta[i][j] + if (!abs(pDiffQFilterCoeffIntPP[ind][i])) + { + m_filterClippSet[ind][i] = 0; + } + len += 2; } } } @@ -850,168 +1377,94 @@ int EncAdaptiveLoopFilter::getCostFilterCoeffForce0( AlfFilterShape& alfShape, i return len; } -int EncAdaptiveLoopFilter::deriveFilterCoefficientsPredictionMode( AlfFilterShape& alfShape, int **filterSet, int** filterCoeffDiff, const int numFilters, int& predMode ) +int EncAdaptiveLoopFilter::deriveFilterCoefficientsPredictionMode( AlfFilterShape& alfShape, int **filterSet, int** filterCoeffDiff, const int numFilters ) +{ + return (m_alfParamTemp.nonLinearFlag[CHANNEL_TYPE_LUMA] ? getCostFilterClipp(alfShape, filterSet, numFilters) : 0) + getCostFilterCoeff(alfShape, filterSet, numFilters); +} + +int EncAdaptiveLoopFilter::getCostFilterCoeff( AlfFilterShape& alfShape, int **pDiffQFilterCoeffIntPP, const int numFilters ) { - int ratePredMode0 = getCostFilterCoeff( alfShape, filterSet, numFilters ); + return lengthFilterCoeffs( alfShape, numFilters, pDiffQFilterCoeffIntPP ); // alf_coeff_luma_delta[i][j]; +} - for( int ind = 0; ind < numFilters; ++ind ) +int EncAdaptiveLoopFilter::getCostFilterClipp( AlfFilterShape& alfShape, int **pDiffQFilterCoeffIntPP, const int numFilters ) +{ + for (int filterIdx = 0; filterIdx < numFilters; ++filterIdx) { - if( ind == 0 ) - { - memcpy( filterCoeffDiff[ind], filterSet[ind], sizeof( int ) * alfShape.numCoeff ); - } - else + for (int i = 0; i < alfShape.numCoeff - 1; i++) { - for( int i = 0; i < alfShape.numCoeff; i++ ) + if (!abs(pDiffQFilterCoeffIntPP[filterIdx][i])) { - filterCoeffDiff[ind][i] = filterSet[ind][i] - filterSet[ind - 1][i]; + m_filterClippSet[filterIdx][i] = 0; } } } - - int ratePredMode1 = getCostFilterCoeff( alfShape, filterCoeffDiff, numFilters ); - - predMode = ( ratePredMode1 < ratePredMode0 && numFilters > 1 ) ? 1 : 0; - - return ( numFilters > 1 ? 1 : 0 ) // coeff_delta_pred_mode_flag - + ( predMode ? ratePredMode1 : ratePredMode0 ); // min_golomb_order, golomb_order_increase_flag, alf_coeff_luma_delta + return (numFilters * (alfShape.numCoeff - 1)) << 1; } -int EncAdaptiveLoopFilter::getCostFilterCoeff( AlfFilterShape& alfShape, int **pDiffQFilterCoeffIntPP, const int numFilters ) +int EncAdaptiveLoopFilter::lengthFilterCoeffs( AlfFilterShape& alfShape, const int numFilters, int **FilterCoeff ) { - const int maxGolombIdx = getMaxGolombIdx( alfShape.filterType ); - - memset( m_bitsCoeffScan, 0, sizeof( m_bitsCoeffScan ) ); + int bitCnt = 0; for( int ind = 0; ind < numFilters; ++ind ) { for( int i = 0; i < alfShape.numCoeff - 1; i++ ) { - int coeffVal = abs( pDiffQFilterCoeffIntPP[ind][i] ); - for( int k = 1; k < 15; k++ ) - { - m_bitsCoeffScan[alfShape.golombIdx[i]][k] += lengthGolomb( coeffVal, k ); - } - } - } - - int kMin = getGolombKMin( alfShape, numFilters, m_kMinTab, m_bitsCoeffScan ); - - // Coding parameters - int len = kMin //min_golomb_order - + maxGolombIdx; //golomb_order_increase_flag - - // Filter coefficients - len += lengthFilterCoeffs( alfShape, numFilters, pDiffQFilterCoeffIntPP, m_kMinTab ); // alf_coeff_luma_delta[i][j] - - return len; -} - -int EncAdaptiveLoopFilter::lengthFilterCoeffs( AlfFilterShape& alfShape, const int numFilters, int **FilterCoeff, int* kMinTab ) -{ - int bitCnt = 0; - - for( int ind = 0; ind < numFilters; ++ind ) - { - for( int i = 0; i < alfShape.numCoeff - 1; i++ ) - { - bitCnt += lengthGolomb( abs( FilterCoeff[ind][i] ), kMinTab[alfShape.golombIdx[i]] ); + bitCnt += lengthGolomb( abs( FilterCoeff[ind][i] ), 3 ); } } return bitCnt; } + double EncAdaptiveLoopFilter::getDistForce0( AlfFilterShape& alfShape, const int numFilters, double errorTabForce0Coeff[MAX_NUM_ALF_CLASSES][2], bool* codedVarBins ) { static int bitsVarBin[MAX_NUM_ALF_CLASSES]; - memset( m_bitsCoeffScan, 0, sizeof( m_bitsCoeffScan ) ); for( int ind = 0; ind < numFilters; ++ind ) { + bitsVarBin[ind] = 0; for( int i = 0; i < alfShape.numCoeff - 1; i++ ) { - int coeffVal = abs( m_filterCoeffSet[ind][i] ); - for( int k = 1; k < 15; k++ ) - { - m_bitsCoeffScan[alfShape.golombIdx[i]][k] += lengthGolomb( coeffVal, k ); - } + bitsVarBin[ind] += lengthGolomb( abs( m_filterCoeffSet[ind][i] ), 3 ); } } - getGolombKMin( alfShape, numFilters, m_kMinTab, m_bitsCoeffScan ); - - for( int ind = 0; ind < numFilters; ++ind ) + static int zeroBitsVarBin = 0; + for (int i = 0; i < alfShape.numCoeff - 1; i++) { - bitsVarBin[ind] = 0; - for( int i = 0; i < alfShape.numCoeff - 1; i++ ) - { - bitsVarBin[ind] += lengthGolomb( abs( m_filterCoeffSet[ind][i] ), m_kMinTab[alfShape.golombIdx[i]] ); - } + zeroBitsVarBin += lengthGolomb(0, 3); } - - double distForce0 = getDistCoeffForce0( codedVarBins, errorTabForce0Coeff, bitsVarBin, numFilters ); - - return distForce0; -} - -int EncAdaptiveLoopFilter::getGolombKMin( AlfFilterShape& alfShape, const int numFilters, int kMinTab[MAX_NUM_ALF_LUMA_COEFF], int bitsCoeffScan[m_MAX_SCAN_VAL][m_MAX_EXP_GOLOMB] ) -{ - int kStart; - const int maxGolombIdx = getMaxGolombIdx( alfShape.filterType ); - - int minBitsKStart = MAX_INT; - int minKStart = -1; - - for( int k = 1; k < 8; k++ ) +#if JVET_Q0249_ALF_CHROMA_CLIPFLAG + if( m_alfParamTemp.nonLinearFlag[CHANNEL_TYPE_LUMA] ) +#else + if( m_alfParamTemp.nonLinearFlag[CHANNEL_TYPE_LUMA][0] ) +#endif { - int bitsKStart = 0; kStart = k; - for( int scanPos = 0; scanPos < maxGolombIdx; scanPos++ ) + for (int ind = 0; ind < numFilters; ++ind) { - int kMin = kStart; - int minBits = bitsCoeffScan[scanPos][kMin]; - - if( bitsCoeffScan[scanPos][kStart + 1] < minBits ) + for (int i = 0; i < alfShape.numCoeff - 1; i++) { - kMin = kStart + 1; - minBits = bitsCoeffScan[scanPos][kMin]; + if (!abs(m_filterCoeffSet[ind][i])) + { + m_filterClippSet[ind][i] = 0; + } } - kStart = kMin; - bitsKStart += minBits; - } - if( bitsKStart < minBitsKStart ) - { - minBitsKStart = bitsKStart; - minKStart = k; } } - kStart = minKStart; - for( int scanPos = 0; scanPos < maxGolombIdx; scanPos++ ) - { - int kMin = kStart; - int minBits = bitsCoeffScan[scanPos][kMin]; - - if( bitsCoeffScan[scanPos][kStart + 1] < minBits ) - { - kMin = kStart + 1; - minBits = bitsCoeffScan[scanPos][kMin]; - } - - kMinTab[scanPos] = kMin; - kStart = kMin; - } + double distForce0 = getDistCoeffForce0( codedVarBins, errorTabForce0Coeff, bitsVarBin, zeroBitsVarBin, numFilters); - return minKStart; + return distForce0; } - -double EncAdaptiveLoopFilter::getDistCoeffForce0( bool* codedVarBins, double errorForce0CoeffTab[MAX_NUM_ALF_CLASSES][2], int* bitsVarBin, const int numFilters ) +double EncAdaptiveLoopFilter::getDistCoeffForce0( bool* codedVarBins, double errorForce0CoeffTab[MAX_NUM_ALF_CLASSES][2], int* bitsVarBin, int zeroBitsVarBin, const int numFilters) { double distForce0 = 0; std::memset( codedVarBins, 0, sizeof( *codedVarBins ) * MAX_NUM_ALF_CLASSES ); for( int filtIdx = 0; filtIdx < numFilters; filtIdx++ ) { - double costDiff = errorForce0CoeffTab[filtIdx][0] - ( errorForce0CoeffTab[filtIdx][1] + m_lambda[COMPONENT_Y] * bitsVarBin[filtIdx] ); + double costDiff = (errorForce0CoeffTab[filtIdx][0] + m_lambda[COMPONENT_Y] * zeroBitsVarBin) - (errorForce0CoeffTab[filtIdx][1] + m_lambda[COMPONENT_Y] * bitsVarBin[filtIdx]); codedVarBins[filtIdx] = costDiff > 0 ? true : false; distForce0 += errorForce0CoeffTab[filtIdx][codedVarBins[filtIdx] ? 1 : 0]; } @@ -1035,209 +1488,111 @@ int EncAdaptiveLoopFilter::lengthUvlc( int uiCode ) return ( uiLength >> 1 ) + ( ( uiLength + 1 ) >> 1 ); } -int EncAdaptiveLoopFilter::lengthGolomb( int coeffVal, int k ) +int EncAdaptiveLoopFilter::lengthGolomb( int coeffVal, int k, bool signed_coeff ) { - int m = 2 << ( k - 1 ); - int q = coeffVal / m; - if( coeffVal != 0 ) + int numBins = 0; + unsigned int symbol = abs(coeffVal); + while (symbol >= (unsigned int)(1 << k)) { - return q + 2 + k; + numBins++; + symbol -= 1 << k; + k++; } - else + numBins += ( k + 1) ; + if (signed_coeff && coeffVal != 0) { - return q + 1 + k; + numBins++; } + return numBins; } -double EncAdaptiveLoopFilter::deriveFilterCoeffs( AlfCovariance* cov, AlfCovariance* covMerged, AlfFilterShape& alfShape, short* filterIndices, int numFilters, double errorTabForce0Coeff[MAX_NUM_ALF_CLASSES][2] ) +double EncAdaptiveLoopFilter::deriveFilterCoeffs( AlfCovariance* cov, AlfCovariance* covMerged, int clipMerged[MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_LUMA_COEFF], AlfFilterShape& alfShape, short* filterIndices, int numFilters, double errorTabForce0Coeff[MAX_NUM_ALF_CLASSES][2], AlfParam& alfParam ) { double error = 0.0; AlfCovariance& tmpCov = covMerged[MAX_NUM_ALF_CLASSES]; + + + for( int filtIdx = 0; filtIdx < numFilters; filtIdx++ ) { tmpCov.reset(); + bool found_clip = false; for( int classIdx = 0; classIdx < MAX_NUM_ALF_CLASSES; classIdx++ ) { if( filterIndices[classIdx] == filtIdx ) { tmpCov += cov[classIdx]; + if( !found_clip ) + { + found_clip = true; // clip should be at the adress of shortest one + memcpy(m_filterClippSet[filtIdx], clipMerged[numFilters-1][classIdx], sizeof(int[MAX_NUM_ALF_LUMA_COEFF])); + } } } // Find coeffcients - errorTabForce0Coeff[filtIdx][1] = tmpCov.pixAcc + deriveCoeffQuant( m_filterCoeffQuant, tmpCov.E, tmpCov.y, alfShape.numCoeff, alfShape.weights, m_NUM_BITS ); + assert(alfShape.numCoeff == tmpCov.numCoeff); + errorTabForce0Coeff[filtIdx][1] = tmpCov.pixAcc + deriveCoeffQuant( m_filterClippSet[filtIdx], m_filterCoeffSet[filtIdx], tmpCov, alfShape, m_NUM_BITS, false ); errorTabForce0Coeff[filtIdx][0] = tmpCov.pixAcc; error += errorTabForce0Coeff[filtIdx][1]; - - // store coeff - memcpy( m_filterCoeffSet[filtIdx], m_filterCoeffQuant, sizeof( int )*alfShape.numCoeff ); } return error; } -double EncAdaptiveLoopFilter::deriveCoeffQuant( int *filterCoeffQuant, double **E, double *y, const int numCoeff, std::vector<int>& weights, const int bitDepth, const bool bChroma ) +double EncAdaptiveLoopFilter::deriveCoeffQuant( int *filterClipp, int *filterCoeffQuant, const AlfCovariance& cov, const AlfFilterShape& shape, const int bitDepth, const bool optimizeClip ) { const int factor = 1 << ( bitDepth - 1 ); - static int filterCoeffQuantMod[MAX_NUM_ALF_LUMA_COEFF]; + const int max_value = factor - 1; + const int min_value = -factor + 1; + +const int numCoeff = shape.numCoeff; static double filterCoeff[MAX_NUM_ALF_LUMA_COEFF]; - gnsSolveByChol( E, y, filterCoeff, numCoeff ); + cov.optimizeFilter( shape, filterClipp, filterCoeff, optimizeClip ); roundFiltCoeff( filterCoeffQuant, filterCoeff, numCoeff, factor ); - const int targetCoeffSumInt = 0; - int quantCoeffSum = 0; - for( int i = 0; i < numCoeff; i++ ) - { - quantCoeffSum += weights[i] * filterCoeffQuant[i]; - } - - int count = 0; - while( quantCoeffSum != targetCoeffSumInt && count < 10 ) - { - int sign = quantCoeffSum > targetCoeffSumInt ? 1 : -1; - int diff = ( quantCoeffSum - targetCoeffSumInt ) * sign; - - double errMin = MAX_DOUBLE; - int minInd = -1; - - for( int k = 0; k < numCoeff; k++ ) - { - if( weights[k] <= diff ) - { - memcpy( filterCoeffQuantMod, filterCoeffQuant, sizeof( int ) * numCoeff ); - - filterCoeffQuantMod[k] -= sign; - double error = calcErrorForCoeffs( E, y, filterCoeffQuantMod, numCoeff, bitDepth ); - - if( error < errMin ) - { - errMin = error; - minInd = k; - } - } - } - - if( minInd != -1 ) - { - filterCoeffQuant[minInd] -= sign; - } - - quantCoeffSum = 0; - for( int i = 0; i < numCoeff; i++ ) - { - quantCoeffSum += weights[i] * filterCoeffQuant[i]; - } - ++count; - } - if( count == 10 ) - { - memset( filterCoeffQuant, 0, sizeof( int ) * numCoeff ); - } - - int max_value = factor - 1; - int min_value = -factor; for ( int i = 0; i < numCoeff - 1; i++ ) { filterCoeffQuant[i] = std::min( max_value, std::max( min_value, filterCoeffQuant[i] ) ); - filterCoeff[i] = filterCoeffQuant[i] / double( factor ); - } - - quantCoeffSum = 0; - for ( int i = 0; i < numCoeff - 1; i++ ) - { - quantCoeffSum += weights[i] * filterCoeffQuant[i]; - filterCoeff[i] = filterCoeffQuant[i] / double(factor); } - filterCoeffQuant[numCoeff - 1] = -quantCoeffSum; - filterCoeff[numCoeff - 1] = filterCoeffQuant[numCoeff - 1] / double(factor); - + filterCoeffQuant[numCoeff - 1] = 0; - //Restrict the range of the center coefficient - int max_value_center = (2 * factor - 1) - factor; - int min_value_center = 0 - factor; + int modified=1; - filterCoeffQuant[numCoeff - 1] = std::min(max_value_center, std::max(min_value_center, filterCoeffQuant[numCoeff - 1])); - filterCoeff[numCoeff - 1] = filterCoeffQuant[numCoeff - 1] / double(factor); - - int coeffQuantAdjust[MAX_NUM_ALF_LUMA_COEFF]; - int adjustedTotalCoeff = (numCoeff - 1) << 1; - - count = 0; - quantCoeffSum += filterCoeffQuant[numCoeff - 1]; - while (quantCoeffSum != targetCoeffSumInt && count < 15) + double errRef=cov.calcErrorForCoeffs( filterClipp, filterCoeffQuant, numCoeff, bitDepth ); + while( modified ) { - int sign = quantCoeffSum > targetCoeffSumInt ? 1 : -1; - int diff = (quantCoeffSum - targetCoeffSumInt) * sign; - - if (diff > 4 * adjustedTotalCoeff) sign = sign * 8; - else if (diff > 2 * adjustedTotalCoeff) sign = sign * 4; - else if (diff > adjustedTotalCoeff) sign = sign * 2; - - double errMin = MAX_DOUBLE; - int minInd = -1; - - for (int k = 0; k < numCoeff - 1; k++) + modified=0; + for( int sign: {1, -1} ) { - memcpy(coeffQuantAdjust, filterCoeffQuant, sizeof(int) * numCoeff); - - coeffQuantAdjust[k] -= sign; + double errMin = MAX_DOUBLE; + int minInd = -1; - if (coeffQuantAdjust[k] <= max_value && coeffQuantAdjust[k] >= min_value) + for( int k = 0; k < numCoeff-1; k++ ) { - double error = calcErrorForCoeffs(E, y, coeffQuantAdjust, numCoeff, bitDepth); + if( filterCoeffQuant[k] - sign > max_value || filterCoeffQuant[k] - sign < min_value ) + continue; - if (error < errMin) + filterCoeffQuant[k] -= sign; + + double error = cov.calcErrorForCoeffs( filterClipp, filterCoeffQuant, numCoeff, bitDepth ); + if( error < errMin ) { errMin = error; minInd = k; } + filterCoeffQuant[k] += sign; + } + if( errMin < errRef ) + { + filterCoeffQuant[minInd] -= sign; + modified++; + errRef = errMin; } } - - if (minInd != -1) - { - filterCoeffQuant[minInd] -= sign; - quantCoeffSum -= (weights[minInd] * sign); - } - - ++count; - } - - if (quantCoeffSum != targetCoeffSumInt) - { - memset(filterCoeffQuant, 0, sizeof(int) * numCoeff); - } - - for (int i = 0; i < numCoeff - 1; i++) - { - CHECK(filterCoeffQuant[i] > max_value || filterCoeffQuant[i] < min_value, "filterCoeffQuant[i]>max_value || filterCoeffQuant[i]<min_value"); - filterCoeff[i] = filterCoeffQuant[i] / double(factor); - } - CHECK(filterCoeffQuant[numCoeff - 1] > max_value_center || filterCoeffQuant[numCoeff - 1] < min_value_center, "filterCoeffQuant[numCoeff-1]>max_value_center || filterCoeffQuant[numCoeff-1]<min_value_center"); - filterCoeff[numCoeff - 1] = filterCoeffQuant[numCoeff - 1] / double(factor); - - - double error = calcErrorForCoeffs( E, y, filterCoeffQuant, numCoeff, bitDepth ); - return error; -} - -double EncAdaptiveLoopFilter::calcErrorForCoeffs( double **E, double *y, int *coeff, const int numCoeff, const int bitDepth ) -{ - double factor = 1 << ( bitDepth - 1 ); - double error = 0; - - for( int i = 0; i < numCoeff; i++ ) //diagonal - { - double sum = 0; - for( int j = i + 1; j < numCoeff; j++ ) - { - // E[j][i] = E[i][j], sum will be multiplied by 2 later - sum += E[i][j] * coeff[j]; - } - error += ( ( E[i][i] * coeff[i] + sum * 2 ) / factor - 2 * y[i] ) * coeff[i]; } - return error / factor; + return errRef; } void EncAdaptiveLoopFilter::roundFiltCoeff( int *filterCoeffQuant, double *filterCoeff, const int numCoeff, const int factor ) @@ -1249,8 +1604,12 @@ void EncAdaptiveLoopFilter::roundFiltCoeff( int *filterCoeffQuant, double *filte } } -void EncAdaptiveLoopFilter::mergeClasses( AlfCovariance* cov, AlfCovariance* covMerged, const int numClasses, short filterIndices[MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_CLASSES] ) +void EncAdaptiveLoopFilter::mergeClasses( const AlfFilterShape& alfShape, AlfCovariance* cov, AlfCovariance* covMerged, int clipMerged[MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_LUMA_COEFF], const int numClasses, short filterIndices[MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_CLASSES] ) { + static int tmpClip[MAX_NUM_ALF_LUMA_COEFF]; + static int bestMergeClip[MAX_NUM_ALF_LUMA_COEFF]; + static double err[MAX_NUM_ALF_CLASSES]; + static double bestMergeErr; static bool availableClass[MAX_NUM_ALF_CLASSES]; static uint8_t indexList[MAX_NUM_ALF_CLASSES]; static uint8_t indexListTemp[MAX_NUM_ALF_CLASSES]; @@ -1264,14 +1623,43 @@ void EncAdaptiveLoopFilter::mergeClasses( AlfCovariance* cov, AlfCovariance* cov indexList[i] = i; availableClass[i] = true; covMerged[i] = cov[i]; +#if JVET_Q0249_ALF_CHROMA_CLIPFLAG + covMerged[i].numBins = m_alfParamTemp.nonLinearFlag[CHANNEL_TYPE_LUMA] ? AlfNumClippingValues[COMPONENT_Y] : 1; +#else + covMerged[i].numBins = m_alfParamTemp.nonLinearFlag[CHANNEL_TYPE_LUMA][0] ? AlfNumClippingValues[COMPONENT_Y] : 1; +#endif } // Try merging different covariance matrices // temporal AlfCovariance structure is allocated as the last element in covMerged array, the size of covMerged is MAX_NUM_ALF_CLASSES + 1 AlfCovariance& tmpCov = covMerged[MAX_NUM_ALF_CLASSES]; +#if JVET_Q0249_ALF_CHROMA_CLIPFLAG + tmpCov.numBins = m_alfParamTemp.nonLinearFlag[CHANNEL_TYPE_LUMA] ? AlfNumClippingValues[COMPONENT_Y] : 1; +#else + tmpCov.numBins = m_alfParamTemp.nonLinearFlag[CHANNEL_TYPE_LUMA][0] ? AlfNumClippingValues[COMPONENT_Y] : 1; +#endif + + // init Clip + for( int i = 0; i < numClasses; i++ ) + { +#if JVET_Q0249_ALF_CHROMA_CLIPFLAG + std::fill_n(clipMerged[numRemaining-1][i], MAX_NUM_ALF_LUMA_COEFF, m_alfParamTemp.nonLinearFlag[CHANNEL_TYPE_LUMA] ? AlfNumClippingValues[CHANNEL_TYPE_LUMA] / 2 : 0); + if ( m_alfParamTemp.nonLinearFlag[CHANNEL_TYPE_LUMA] ) +#else + std::fill_n(clipMerged[numRemaining-1][i], MAX_NUM_ALF_LUMA_COEFF, m_alfParamTemp.nonLinearFlag[CHANNEL_TYPE_LUMA][0] ? AlfNumClippingValues[CHANNEL_TYPE_LUMA] / 2 : 0); + if ( m_alfParamTemp.nonLinearFlag[CHANNEL_TYPE_LUMA][0] ) +#endif + { + err[i] = covMerged[i].optimizeFilterClip( alfShape, clipMerged[numRemaining-1][i] ); + } + else + { + err[i] = covMerged[i].calculateError( clipMerged[numRemaining-1][i] ); + } + } - while( numRemaining > 2 ) + while( numRemaining >= 2 ) { double errorMin = std::numeric_limits<double>::max(); int bestToMergeIdx1 = 0, bestToMergeIdx2 = 1; @@ -1284,14 +1672,25 @@ void EncAdaptiveLoopFilter::mergeClasses( AlfCovariance* cov, AlfCovariance* cov { if( availableClass[j] ) { - double error1 = calculateError( covMerged[i] ); - double error2 = calculateError( covMerged[j] ); + double error1 = err[i]; + double error2 = err[j]; tmpCov.add( covMerged[i], covMerged[j] ); - double error = calculateError( tmpCov ) - error1 - error2; + for( int l = 0; l < MAX_NUM_ALF_LUMA_COEFF; ++l ) + { + tmpClip[l] = (clipMerged[numRemaining-1][i][l] + clipMerged[numRemaining-1][j][l] + 1 ) >> 1; + } +#if JVET_Q0249_ALF_CHROMA_CLIPFLAG + double errorMerged = m_alfParamTemp.nonLinearFlag[CHANNEL_TYPE_LUMA] ? tmpCov.optimizeFilterClip(alfShape, tmpClip) : tmpCov.calculateError(tmpClip); +#else + double errorMerged = m_alfParamTemp.nonLinearFlag[CHANNEL_TYPE_LUMA][0] ? tmpCov.optimizeFilterClip( alfShape, tmpClip ) : tmpCov.calculateError( tmpClip ); +#endif + double error = errorMerged - error1 - error2; if( error < errorMin ) { + bestMergeErr = errorMerged; + memcpy(bestMergeClip, tmpClip, sizeof(bestMergeClip)); errorMin = error; bestToMergeIdx1 = i; bestToMergeIdx2 = j; @@ -1302,6 +1701,9 @@ void EncAdaptiveLoopFilter::mergeClasses( AlfCovariance* cov, AlfCovariance* cov } covMerged[bestToMergeIdx1] += covMerged[bestToMergeIdx2]; + memcpy(clipMerged[numRemaining-2], clipMerged[numRemaining-1], sizeof(int[MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_LUMA_COEFF])); + memcpy(clipMerged[numRemaining-2][bestToMergeIdx1], bestMergeClip, sizeof(bestMergeClip)); + err[bestToMergeIdx1] = bestMergeErr; availableClass[bestToMergeIdx2] = false; for( int i = 0; i < numClasses; i++ ) @@ -1352,36 +1754,46 @@ void EncAdaptiveLoopFilter::mergeClasses( AlfCovariance* cov, AlfCovariance* cov void EncAdaptiveLoopFilter::getFrameStats( ChannelType channel, int iShapeIdx ) { int numClasses = isLuma( channel ) ? MAX_NUM_ALF_CLASSES : 1; - for( int i = 0; i < numClasses; i++ ) - { - m_alfCovarianceFrame[channel][iShapeIdx][i].reset(); - } - if( isLuma( channel ) ) - { - getFrameStat( m_alfCovarianceFrame[CHANNEL_TYPE_LUMA][iShapeIdx], m_alfCovariance[COMPONENT_Y][iShapeIdx], m_ctuEnableFlag[COMPONENT_Y], numClasses ); - } - else + int numAlternatives = isLuma( channel ) ? 1 : m_alfParamTemp.numAlternativesChroma; + // When calling this function m_ctuEnableFlag shall be set to 0 for CTUs using alternative APS + // Here we compute frame stats for building new alternative filters + for( int altIdx = 0; altIdx < numAlternatives; ++altIdx ) { - getFrameStat( m_alfCovarianceFrame[CHANNEL_TYPE_CHROMA][iShapeIdx], m_alfCovariance[COMPONENT_Cb][iShapeIdx], m_ctuEnableFlag[COMPONENT_Cb], numClasses ); - getFrameStat( m_alfCovarianceFrame[CHANNEL_TYPE_CHROMA][iShapeIdx], m_alfCovariance[COMPONENT_Cr][iShapeIdx], m_ctuEnableFlag[COMPONENT_Cr], numClasses ); + for( int i = 0; i < numClasses; i++ ) + { + m_alfCovarianceFrame[channel][iShapeIdx][isLuma( channel ) ? i : altIdx].reset(AlfNumClippingValues[channel]); + } + if( isLuma( channel ) ) + { + getFrameStat( m_alfCovarianceFrame[CHANNEL_TYPE_LUMA][iShapeIdx], m_alfCovariance[COMPONENT_Y][iShapeIdx], m_ctuEnableFlag[COMPONENT_Y], nullptr, numClasses, altIdx ); + } + else + { + getFrameStat( m_alfCovarianceFrame[CHANNEL_TYPE_CHROMA][iShapeIdx], m_alfCovariance[COMPONENT_Cb][iShapeIdx], m_ctuEnableFlag[COMPONENT_Cb], m_ctuAlternative[COMPONENT_Cb], numClasses, altIdx ); + getFrameStat( m_alfCovarianceFrame[CHANNEL_TYPE_CHROMA][iShapeIdx], m_alfCovariance[COMPONENT_Cr][iShapeIdx], m_ctuEnableFlag[COMPONENT_Cr], m_ctuAlternative[COMPONENT_Cr], numClasses, altIdx ); + } } } -void EncAdaptiveLoopFilter::getFrameStat( AlfCovariance* frameCov, AlfCovariance** ctbCov, uint8_t* ctbEnableFlags, const int numClasses ) +void EncAdaptiveLoopFilter::getFrameStat( AlfCovariance* frameCov, AlfCovariance** ctbCov, uint8_t* ctbEnableFlags, uint8_t* ctbAltIdx, const int numClasses, int altIdx ) { - for( int i = 0; i < m_numCTUsInPic; i++ ) + const ChannelType channel = (!ctbAltIdx ? CHANNEL_TYPE_LUMA : CHANNEL_TYPE_CHROMA); + for( int ctuIdx = 0; ctuIdx < m_numCTUsInPic; ctuIdx++ ) { - if( ctbEnableFlags[i] ) + if( ctbEnableFlags[ctuIdx] ) { - for( int j = 0; j < numClasses; j++ ) + for( int classIdx = 0; classIdx < numClasses; classIdx++ ) { - frameCov[j] += ctbCov[i][j]; + if( isLuma( channel ) || altIdx == ctbAltIdx[ctuIdx] ) + { + frameCov[isLuma( channel ) ? classIdx : altIdx] += ctbCov[ctuIdx][classIdx]; + } } } } } -void EncAdaptiveLoopFilter::deriveStatsForFiltering( PelUnitBuf& orgYuv, PelUnitBuf& recYuv ) +void EncAdaptiveLoopFilter::deriveStatsForFiltering( PelUnitBuf& orgYuv, PelUnitBuf& recYuv, CodingStructure& cs ) { int ctuRsAddr = 0; const int numberOfComponents = getNumberValidComponents( m_chromaFormat ); @@ -1398,7 +1810,7 @@ void EncAdaptiveLoopFilter::deriveStatsForFiltering( PelUnitBuf& orgYuv, PelUnit { for( int ctuIdx = 0; ctuIdx < m_numCTUsInPic; ctuIdx++ ) { - m_alfCovariance[compIdx][shape][ctuIdx][classIdx].reset(); + m_alfCovariance[compIdx][shape][ctuIdx][classIdx].reset(AlfNumClippingValues[toChannelType( compID )]); } } } @@ -1409,23 +1821,115 @@ void EncAdaptiveLoopFilter::deriveStatsForFiltering( PelUnitBuf& orgYuv, PelUnit for( int channelIdx = 0; channelIdx < numberOfChannels; channelIdx++ ) { const ChannelType channelID = ChannelType( channelIdx ); + const int numAlts = channelID == CHANNEL_TYPE_LUMA ? 1 : MAX_NUM_ALF_ALTERNATIVES_CHROMA; const int numClasses = isLuma( channelID ) ? MAX_NUM_ALF_CLASSES : 1; + for( int altIdx = 0; altIdx < numAlts; ++altIdx ) for( int shape = 0; shape != m_filterShapes[channelIdx].size(); shape++ ) { for( int classIdx = 0; classIdx < numClasses; classIdx++ ) { - m_alfCovarianceFrame[channelIdx][shape][classIdx].reset(); + m_alfCovarianceFrame[channelIdx][shape][isLuma( channelID ) ? classIdx : altIdx].reset(AlfNumClippingValues[channelID]); } } } + const PreCalcValues& pcv = *cs.pcv; + bool clipTop = false, clipBottom = false, clipLeft = false, clipRight = false; + int numHorVirBndry = 0, numVerVirBndry = 0; + int horVirBndryPos[] = { 0, 0, 0 }; + int verVirBndryPos[] = { 0, 0, 0 }; + for( int yPos = 0; yPos < m_picHeight; yPos += m_maxCUHeight ) { for( int xPos = 0; xPos < m_picWidth; xPos += m_maxCUWidth ) { const int width = ( xPos + m_maxCUWidth > m_picWidth ) ? ( m_picWidth - xPos ) : m_maxCUWidth; const int height = ( yPos + m_maxCUHeight > m_picHeight ) ? ( m_picHeight - yPos ) : m_maxCUHeight; + int rasterSliceAlfPad = 0; + if( isCrossedByVirtualBoundaries( cs, xPos, yPos, width, height, clipTop, clipBottom, clipLeft, clipRight, numHorVirBndry, numVerVirBndry, horVirBndryPos, verVirBndryPos, rasterSliceAlfPad ) ) + { + int yStart = yPos; + for( int i = 0; i <= numHorVirBndry; i++ ) + { + const int yEnd = i == numHorVirBndry ? yPos + height : horVirBndryPos[i]; + const int h = yEnd - yStart; + const bool clipT = ( i == 0 && clipTop ) || ( i > 0 ) || ( yStart == 0 ); + const bool clipB = ( i == numHorVirBndry && clipBottom ) || ( i < numHorVirBndry ) || ( yEnd == pcv.lumaHeight ); + int xStart = xPos; + for( int j = 0; j <= numVerVirBndry; j++ ) + { + const int xEnd = j == numVerVirBndry ? xPos + width : verVirBndryPos[j]; + const int w = xEnd - xStart; + const bool clipL = ( j == 0 && clipLeft ) || ( j > 0 ) || ( xStart == 0 ); + const bool clipR = ( j == numVerVirBndry && clipRight ) || ( j < numVerVirBndry ) || ( xEnd == pcv.lumaWidth ); + const int wBuf = w + (clipL ? 0 : MAX_ALF_PADDING_SIZE) + (clipR ? 0 : MAX_ALF_PADDING_SIZE); + const int hBuf = h + (clipT ? 0 : MAX_ALF_PADDING_SIZE) + (clipB ? 0 : MAX_ALF_PADDING_SIZE); + PelUnitBuf recBuf = m_tempBuf2.subBuf( UnitArea( cs.area.chromaFormat, Area( 0, 0, wBuf, hBuf ) ) ); + recBuf.copyFrom( recYuv.subBuf( UnitArea( cs.area.chromaFormat, Area( xStart - (clipL ? 0 : MAX_ALF_PADDING_SIZE), yStart - (clipT ? 0 : MAX_ALF_PADDING_SIZE), wBuf, hBuf ) ) ) ); + // pad top-left unavailable samples for raster slice + if ( xStart == xPos && yStart == yPos && ( rasterSliceAlfPad & 1 ) ) + { + recBuf.padBorderPel( MAX_ALF_PADDING_SIZE, 1 ); + } + + // pad bottom-right unavailable samples for raster slice + if ( xEnd == xPos + width && yEnd == yPos + height && ( rasterSliceAlfPad & 2 ) ) + { + recBuf.padBorderPel( MAX_ALF_PADDING_SIZE, 2 ); + } + recBuf.extendBorderPel( MAX_ALF_PADDING_SIZE ); + recBuf = recBuf.subBuf( UnitArea ( cs.area.chromaFormat, Area( clipL ? 0 : MAX_ALF_PADDING_SIZE, clipT ? 0 : MAX_ALF_PADDING_SIZE, w, h ) ) ); + + const UnitArea area( m_chromaFormat, Area( 0, 0, w, h ) ); + const UnitArea areaDst( m_chromaFormat, Area( xStart, yStart, w, h ) ); + for( int compIdx = 0; compIdx < numberOfComponents; compIdx++ ) + { + const ComponentID compID = ComponentID( compIdx ); + const CompArea& compArea = area.block( compID ); + + int recStride = recBuf.get( compID ).stride; + Pel* rec = recBuf.get( compID ).bufAt( compArea ); + + int orgStride = orgYuv.get(compID).stride; + Pel* org = orgYuv.get(compID).bufAt(xStart >> ::getComponentScaleX(compID, m_chromaFormat), yStart >> ::getComponentScaleY(compID, m_chromaFormat)); + ChannelType chType = toChannelType( compID ); + + for( int shape = 0; shape != m_filterShapes[chType].size(); shape++ ) + { + const CompArea& compAreaDst = areaDst.block( compID ); + getBlkStats(m_alfCovariance[compIdx][shape][ctuRsAddr], m_filterShapes[chType][shape], compIdx ? nullptr : m_classifier, org, orgStride, rec, recStride, compAreaDst, compArea, chType + , ((compIdx == 0) ? m_alfVBLumaCTUHeight : m_alfVBChmaCTUHeight) + , (compIdx == 0) ? m_alfVBLumaPos : m_alfVBChmaPos + ); + } + } + + xStart = xEnd; + } + + yStart = yEnd; + } + + for( int compIdx = 0; compIdx < numberOfComponents; compIdx++ ) + { + const ComponentID compID = ComponentID( compIdx ); + + ChannelType chType = toChannelType( compID ); + + for( int shape = 0; shape != m_filterShapes[chType].size(); shape++ ) + { + const int numClasses = isLuma( compID ) ? MAX_NUM_ALF_CLASSES : 1; + + for( int classIdx = 0; classIdx < numClasses; classIdx++ ) + { + m_alfCovarianceFrame[chType][shape][isLuma( compID ) ? classIdx : 0] += m_alfCovariance[compIdx][shape][ctuRsAddr][classIdx]; + } + } + } + } + else + { const UnitArea area( m_chromaFormat, Area( xPos, yPos, width, height ) ); for( int compIdx = 0; compIdx < numberOfComponents; compIdx++ ) @@ -1443,40 +1947,49 @@ void EncAdaptiveLoopFilter::deriveStatsForFiltering( PelUnitBuf& orgYuv, PelUnit for( int shape = 0; shape != m_filterShapes[chType].size(); shape++ ) { - getBlkStats( m_alfCovariance[compIdx][shape][ctuRsAddr], m_filterShapes[chType][shape], compIdx ? nullptr : m_classifier, org, orgStride, rec, recStride, compArea ); + getBlkStats(m_alfCovariance[compIdx][shape][ctuRsAddr], m_filterShapes[chType][shape], compIdx ? nullptr : m_classifier, org, orgStride, rec, recStride, compArea, compArea, chType + , ((compIdx == 0) ? m_alfVBLumaCTUHeight : m_alfVBChmaCTUHeight) + , (compIdx == 0) ? m_alfVBLumaPos : m_alfVBChmaPos + ); + const int numClasses = isLuma( compID ) ? MAX_NUM_ALF_CLASSES : 1; for( int classIdx = 0; classIdx < numClasses; classIdx++ ) { - m_alfCovarianceFrame[chType][shape][classIdx] += m_alfCovariance[compIdx][shape][ctuRsAddr][classIdx]; + m_alfCovarianceFrame[chType][shape][isLuma( compID ) ? classIdx : 0] += m_alfCovariance[compIdx][shape][ctuRsAddr][classIdx]; } } } + } ctuRsAddr++; } } } -void EncAdaptiveLoopFilter::getBlkStats( AlfCovariance* alfCovariace, const AlfFilterShape& shape, AlfClassifier** classifier, Pel* org, const int orgStride, Pel* rec, const int recStride, const CompArea& area ) +void EncAdaptiveLoopFilter::getBlkStats(AlfCovariance* alfCovariance, const AlfFilterShape& shape, AlfClassifier** classifier, Pel* org, const int orgStride, Pel* rec, const int recStride, const CompArea& areaDst, const CompArea& area, const ChannelType channel, int vbCTUHeight, int vbPos) + + { - static int ELocal[MAX_NUM_ALF_LUMA_COEFF]; + static int ELocal[MAX_NUM_ALF_LUMA_COEFF][MaxAlfNumClippingValues]; + const int numBins = AlfNumClippingValues[channel]; int transposeIdx = 0; int classIdx = 0; for( int i = 0; i < area.height; i++ ) { + int vbDistance = ((areaDst.y + i) % vbCTUHeight) - vbPos; for( int j = 0; j < area.width; j++ ) { - if( classifier && classifier[area.y + i][area.x + j].classIdx == m_ALF_UNUSED_CLASSIDX && classifier[area.y + i][area.x + j].transposeIdx == m_ALF_UNUSED_TRANSPOSIDX ) + if( classifier && classifier[areaDst.y + i][areaDst.x + j].classIdx == m_ALF_UNUSED_CLASSIDX && classifier[areaDst.y + i][areaDst.x + j].transposeIdx == m_ALF_UNUSED_TRANSPOSIDX ) { continue; } - std::memset( ELocal, 0, shape.numCoeff * sizeof( int ) ); + std::memset( ELocal, 0, sizeof( ELocal ) ); if( classifier ) { - AlfClassifier& cl = classifier[area.y + i][area.x + j]; + AlfClassifier& cl = classifier[areaDst.y + i][areaDst.x + j]; transposeIdx = cl.transposeIdx; classIdx = cl.classIdx; } @@ -1487,31 +2000,46 @@ void EncAdaptiveLoopFilter::getBlkStats( AlfCovariance* alfCovariace, const AlfF weight = m_lumaLevelToWeightPLUT[org[j]]; } int yLocal = org[j] - rec[j]; - calcCovariance( ELocal, rec + j, recStride, shape.pattern.data(), shape.filterLength >> 1, transposeIdx ); + calcCovariance(ELocal, rec + j, recStride, shape, transposeIdx, channel, vbDistance); for( int k = 0; k < shape.numCoeff; k++ ) { for( int l = k; l < shape.numCoeff; l++ ) { - if (m_alfWSSD) + for( int b0 = 0; b0 < numBins; b0++ ) { - alfCovariace[classIdx].E[k][l] += weight * (double)(ELocal[k] * ELocal[l]); + for( int b1 = 0; b1 < numBins; b1++ ) + { + if (m_alfWSSD) + { + alfCovariance[classIdx].E[b0][b1][k][l] += weight * (double)(ELocal[k][b0] * ELocal[l][b1]); + } + else + { + alfCovariance[classIdx].E[b0][b1][k][l] += ELocal[k][b0] * ELocal[l][b1]; + } + } } - else - alfCovariace[classIdx].E[k][l] += ELocal[k] * ELocal[l]; } - if (m_alfWSSD) + for( int b = 0; b < numBins; b++ ) { - alfCovariace[classIdx].y[k] += weight * (double)(ELocal[k] * yLocal); + if (m_alfWSSD) + { + alfCovariance[classIdx].y[b][k] += weight * (double)(ELocal[k][b] * yLocal); + } + else + { + alfCovariance[classIdx].y[b][k] += ELocal[k][b] * yLocal; + } } - else - alfCovariace[classIdx].y[k] += ELocal[k] * yLocal; } if (m_alfWSSD) { - alfCovariace[classIdx].pixAcc += weight * (double)(yLocal * yLocal); + alfCovariance[classIdx].pixAcc += weight * (double)(yLocal * yLocal); } else - alfCovariace[classIdx].pixAcc += yLocal * yLocal; + { + alfCovariance[classIdx].pixAcc += yLocal * yLocal; + } } org += orgStride; rec += recStride; @@ -1524,31 +2052,61 @@ void EncAdaptiveLoopFilter::getBlkStats( AlfCovariance* alfCovariace, const AlfF { for( int l = 0; l < k; l++ ) { - alfCovariace[classIdx].E[k][l] = alfCovariace[classIdx].E[l][k]; + for( int b0 = 0; b0 < numBins; b0++ ) + { + for( int b1 = 0; b1 < numBins; b1++ ) + { + alfCovariance[classIdx].E[b0][b1][k][l] = alfCovariance[classIdx].E[b1][b0][l][k]; + } + } } } } } -void EncAdaptiveLoopFilter::calcCovariance( int *ELocal, const Pel *rec, const int stride, const int *filterPattern, const int halfFilterLength, const int transposeIdx ) +void EncAdaptiveLoopFilter::calcCovariance(int ELocal[MAX_NUM_ALF_LUMA_COEFF][MaxAlfNumClippingValues], const Pel *rec, const int stride, const AlfFilterShape& shape, const int transposeIdx, const ChannelType channel, int vbDistance) { + int clipTopRow = -4; + int clipBotRow = 4; + if (vbDistance >= -3 && vbDistance < 0) + { + clipBotRow = -vbDistance - 1; + clipTopRow = -clipBotRow; // symmetric + } + else if (vbDistance >= 0 && vbDistance < 3) + { + clipTopRow = -vbDistance; + clipBotRow = -clipTopRow; // symmetric + } + const int *filterPattern = shape.pattern.data(); + const int halfFilterLength = shape.filterLength >> 1; + const Pel* clip = m_alfClippingValues[channel]; + const int numBins = AlfNumClippingValues[channel]; + int k = 0; + const short curr = rec[0]; + if( transposeIdx == 0 ) { for( int i = -halfFilterLength; i < 0; i++ ) { - const Pel* rec0 = rec + i * stride; - const Pel* rec1 = rec - i * stride; - - for( int j = -halfFilterLength - i; j <= halfFilterLength + i; j++ ) + const Pel* rec0 = rec + std::max(i, clipTopRow) * stride; + const Pel* rec1 = rec - std::max(i, -clipBotRow) * stride; + for( int j = -halfFilterLength - i; j <= halfFilterLength + i; j++, k++ ) { - ELocal[filterPattern[k++]] += rec0[j] + rec1[-j]; + for( int b = 0; b < numBins; b++ ) + { + ELocal[filterPattern[k]][b] += clipALF(clip[b], curr, rec0[j], rec1[-j]); + } } } - for( int j = -halfFilterLength; j < 0; j++ ) + for( int j = -halfFilterLength; j < 0; j++, k++ ) { - ELocal[filterPattern[k++]] += rec[j] + rec[-j]; + for( int b = 0; b < numBins; b++ ) + { + ELocal[filterPattern[k]][b] += clipALF(clip[b], curr, rec[j], rec[-j]); + } } } else if( transposeIdx == 1 ) @@ -1557,32 +2115,43 @@ void EncAdaptiveLoopFilter::calcCovariance( int *ELocal, const Pel *rec, const i { const Pel* rec0 = rec + j; const Pel* rec1 = rec - j; - - for( int i = -halfFilterLength - j; i <= halfFilterLength + j; i++ ) + for (int i = -halfFilterLength - j; i <= halfFilterLength + j; i++, k++) { - ELocal[filterPattern[k++]] += rec0[i * stride] + rec1[-i * stride]; - } + for (int b = 0; b < numBins; b++) + { + ELocal[filterPattern[k]][b] += clipALF(clip[b], curr, rec0[std::max(i, clipTopRow) * stride], rec1[-std::max(i, -clipBotRow) * stride]); + } } - for( int i = -halfFilterLength; i < 0; i++ ) + } + for (int i = -halfFilterLength; i < 0; i++, k++) { - ELocal[filterPattern[k++]] += rec[i*stride] + rec[-i * stride]; + for (int b = 0; b < numBins; b++) + { + ELocal[filterPattern[k]][b] += clipALF(clip[b], curr, rec[std::max(i, clipTopRow) * stride], rec[-std::max(i, -clipBotRow) * stride]); + } } } else if( transposeIdx == 2 ) { for( int i = -halfFilterLength; i < 0; i++ ) { - const Pel* rec0 = rec + i * stride; - const Pel* rec1 = rec - i * stride; + const Pel* rec0 = rec + std::max(i, clipTopRow) * stride; + const Pel* rec1 = rec - std::max(i, -clipBotRow) * stride; - for( int j = halfFilterLength + i; j >= -halfFilterLength - i; j-- ) + for( int j = halfFilterLength + i; j >= -halfFilterLength - i; j--, k++ ) { - ELocal[filterPattern[k++]] += rec0[j] + rec1[-j]; + for( int b = 0; b < numBins; b++ ) + { + ELocal[filterPattern[k]][b] += clipALF(clip[b], curr, rec0[j], rec1[-j]); + } } } - for( int j = -halfFilterLength; j < 0; j++ ) + for( int j = -halfFilterLength; j < 0; j++, k++ ) { - ELocal[filterPattern[k++]] += rec[j] + rec[-j]; + for( int b = 0; b < numBins; b++ ) + { + ELocal[filterPattern[k]][b] += clipALF(clip[b], curr, rec[j], rec[-j]); + } } } else @@ -1591,221 +2160,814 @@ void EncAdaptiveLoopFilter::calcCovariance( int *ELocal, const Pel *rec, const i { const Pel* rec0 = rec + j; const Pel* rec1 = rec - j; - - for( int i = halfFilterLength + j; i >= -halfFilterLength - j; i-- ) + for (int i = halfFilterLength + j; i >= -halfFilterLength - j; i--, k++) { - ELocal[filterPattern[k++]] += rec0[i * stride] + rec1[-i * stride]; + for (int b = 0; b < numBins; b++) + { + ELocal[filterPattern[k]][b] += clipALF(clip[b], curr, rec0[std::max(i, clipTopRow) * stride], rec1[-std::max(i, -clipBotRow) * stride]); + } } } - for( int i = -halfFilterLength; i < 0; i++ ) + for (int i = -halfFilterLength; i < 0; i++, k++) { - ELocal[filterPattern[k++]] += rec[i*stride] + rec[-i * stride]; + for (int b = 0; b < numBins; b++) + { + ELocal[filterPattern[k]][b] += clipALF(clip[b], curr, rec[std::max(i, clipTopRow) * stride], rec[-std::max(i, -clipBotRow) * stride]); + } } - } - ELocal[filterPattern[k++]] += rec[0]; -} - - -double EncAdaptiveLoopFilter::calculateError( AlfCovariance& cov ) -{ - static double c[MAX_NUM_ALF_COEFF]; - - gnsSolveByChol( cov.E, cov.y, c, cov.numCoeff ); - - double sum = 0; - for( int i = 0; i < cov.numCoeff; i++ ) + } + for( int b = 0; b < numBins; b++ ) { - sum += c[i] * cov.y[i]; + ELocal[filterPattern[k]][b] += curr; } - - return cov.pixAcc - sum; } -//******************************** -// Cholesky decomposition -//******************************** -#define ROUND(a) (((a) < 0)? (int)((a) - 0.5) : (int)((a) + 0.5)) -#define REG 0.0001 -#define REG_SQR 0.0000001 -//Find filter coeff related -int EncAdaptiveLoopFilter::gnsCholeskyDec( double **inpMatr, double outMatr[MAX_NUM_ALF_COEFF][MAX_NUM_ALF_COEFF], int numEq ) +void EncAdaptiveLoopFilter::setEnableFlag( AlfParam& alfSlicePara, ChannelType channel, bool val ) { - static double invDiag[MAX_NUM_ALF_COEFF]; /* Vector of the inverse of diagonal entries of outMatr */ + if( channel == CHANNEL_TYPE_LUMA ) + { + alfSlicePara.enabledFlag[COMPONENT_Y] = val; + } + else + { + alfSlicePara.enabledFlag[COMPONENT_Cb] = alfSlicePara.enabledFlag[COMPONENT_Cr] = val; + } +} - for( int i = 0; i < numEq; i++ ) +void EncAdaptiveLoopFilter::setEnableFlag( AlfParam& alfSlicePara, ChannelType channel, uint8_t** ctuFlags ) +{ + const ComponentID compIDFirst = isLuma( channel ) ? COMPONENT_Y : COMPONENT_Cb; + const ComponentID compIDLast = isLuma( channel ) ? COMPONENT_Y : COMPONENT_Cr; + for( int compId = compIDFirst; compId <= compIDLast; compId++ ) { - for( int j = i; j < numEq; j++ ) + alfSlicePara.enabledFlag[compId] = false; + for( int i = 0; i < m_numCTUsInPic; i++ ) { - /* Compute the scaling factor */ - double scale = inpMatr[i][j]; - if( i > 0 ) - { - for( int k = i - 1; k >= 0; k-- ) - { - scale -= outMatr[k][j] * outMatr[k][i]; - } - } - - /* Compute i'th row of outMatr */ - if( i == j ) - { - if( scale <= REG_SQR ) // if(scale <= 0 ) /* If inpMatr is singular */ - { - return 0; - } - else /* Normal operation */ - invDiag[i] = 1.0 / ( outMatr[i][i] = sqrt( scale ) ); - } - else + if( ctuFlags[compId][i] ) { - outMatr[i][j] = scale * invDiag[i]; /* Upper triangular part */ - outMatr[j][i] = 0.0; /* Lower triangular part set to 0 */ + alfSlicePara.enabledFlag[compId] = true; + break; } } } - return 1; /* Signal that Cholesky factorization is successfully performed */ } -void EncAdaptiveLoopFilter::gnsTransposeBacksubstitution( double U[MAX_NUM_ALF_COEFF][MAX_NUM_ALF_COEFF], double* rhs, double* x, int order ) +void EncAdaptiveLoopFilter::copyCtuEnableFlag( uint8_t** ctuFlagsDst, uint8_t** ctuFlagsSrc, ChannelType channel ) { - /* Backsubstitution starts */ - x[0] = rhs[0] / U[0][0]; /* First row of U' */ - for( int i = 1; i < order; i++ ) - { /* For the rows 1..order-1 */ - - double sum = 0; //Holds backsubstitution from already handled rows - - for( int j = 0; j < i; j++ ) /* Backsubst already solved unknowns */ - { - sum += x[j] * U[j][i]; - } + if( isLuma( channel ) ) + { + memcpy( ctuFlagsDst[COMPONENT_Y], ctuFlagsSrc[COMPONENT_Y], sizeof( uint8_t ) * m_numCTUsInPic ); + } + else + { + memcpy( ctuFlagsDst[COMPONENT_Cb], ctuFlagsSrc[COMPONENT_Cb], sizeof( uint8_t ) * m_numCTUsInPic ); + memcpy( ctuFlagsDst[COMPONENT_Cr], ctuFlagsSrc[COMPONENT_Cr], sizeof( uint8_t ) * m_numCTUsInPic ); + } +} - x[i] = ( rhs[i] - sum ) / U[i][i]; /* i'th component of solution vect. */ +void EncAdaptiveLoopFilter::setCtuEnableFlag( uint8_t** ctuFlags, ChannelType channel, uint8_t val ) +{ + if( isLuma( channel ) ) + { + memset( ctuFlags[COMPONENT_Y], val, sizeof( uint8_t ) * m_numCTUsInPic ); + } + else + { + memset( ctuFlags[COMPONENT_Cb], val, sizeof( uint8_t ) * m_numCTUsInPic ); + memset( ctuFlags[COMPONENT_Cr], val, sizeof( uint8_t ) * m_numCTUsInPic ); } } -void EncAdaptiveLoopFilter::gnsBacksubstitution( double R[MAX_NUM_ALF_COEFF][MAX_NUM_ALF_COEFF], double* z, int size, double* A ) +std::vector<int> EncAdaptiveLoopFilter::getAvaiApsIdsLuma(CodingStructure& cs, int &newApsId) { - size--; - A[size] = z[size] / R[size][size]; + APS** apss = cs.slice->getAlfAPSs(); + for (int i = 0; i < ALF_CTB_MAX_NUM_APS; i++) + { + apss[i] = m_apsMap->getPS((i << NUM_APS_TYPE_LEN) + ALF_APS); + } - for( int i = size - 1; i >= 0; i-- ) + std::vector<int> result; + int apsIdChecked = 0, curApsId = m_apsIdStart; + if (curApsId < ALF_CTB_MAX_NUM_APS) { - double sum = 0; + while (apsIdChecked < ALF_CTB_MAX_NUM_APS && !cs.slice->isIntra() && result.size() < ALF_CTB_MAX_NUM_APS && !cs.slice->getPendingRasInit() && !cs.slice->isIDRorBLA()) + { + APS* curAPS = cs.slice->getAlfAPSs()[curApsId]; - for( int j = i + 1; j <= size; j++ ) + if( curAPS && curAPS->getLayerId() == cs.slice->getPic()->layerId && curAPS->getTemporalId() <= cs.slice->getTLayer() && curAPS->getAlfAPSParam().newFilterFlag[CHANNEL_TYPE_LUMA] ) + { + result.push_back(curApsId); + } + apsIdChecked++; + curApsId = (curApsId + 1) % ALF_CTB_MAX_NUM_APS; + } + } + cs.slice->setTileGroupNumAps((int)result.size()); + cs.slice->setAlfAPSs(result); + newApsId = m_apsIdStart - 1; + if (newApsId < 0) + { + newApsId = ALF_CTB_MAX_NUM_APS - 1; + } + CHECK(newApsId >= ALF_CTB_MAX_NUM_APS, "Wrong APS index assignment in getAvaiApsIdsLuma"); + return result; +} +void EncAdaptiveLoopFilter::initDistortion() +{ + for (int comp = 0; comp < MAX_NUM_COMPONENT; comp++) + { + for (int ctbIdx = 0; ctbIdx < m_numCTUsInPic; ctbIdx++) { - sum += R[i][j] * A[j]; + m_ctbDistortionUnfilter[comp][ctbIdx] = getUnfilteredDistortion(m_alfCovariance[comp][0][ctbIdx], comp == 0 ? MAX_NUM_ALF_CLASSES : 1); } - - A[i] = ( z[i] - sum ) / R[i][i]; } } - -int EncAdaptiveLoopFilter::gnsSolveByChol( double **LHS, double *rhs, double *x, int numEq ) +void EncAdaptiveLoopFilter::alfEncoderCtb(CodingStructure& cs, AlfParam& alfParamNewFilters +#if ENABLE_QPA + , const double lambdaChromaWeight +#endif +) { - static double aux[MAX_NUM_ALF_COEFF]; /* Auxiliary vector */ - static double U[MAX_NUM_ALF_COEFF][MAX_NUM_ALF_COEFF]; /* Upper triangular Cholesky factor of LHS */ - int res = 1; // Signal that Cholesky factorization is successfully performed - - /* The equation to be solved is LHSx = rhs */ + TempCtx ctxStart(m_CtxCache, AlfCtx(m_CABACEstimator->getCtx())); + TempCtx ctxBest(m_CtxCache); + TempCtx ctxTempStart(m_CtxCache); + TempCtx ctxTempBest(m_CtxCache); + TempCtx ctxTempAltStart( m_CtxCache ); + TempCtx ctxTempAltBest( m_CtxCache ); + AlfParam alfParamNewFiltersBest = alfParamNewFilters; + APS** apss = cs.slice->getAlfAPSs(); + short* alfCtbFilterSetIndex = cs.picture->getAlfCtbFilterIndex(); + bool hasNewFilters[2] = { alfParamNewFilters.enabledFlag[COMPONENT_Y] , alfParamNewFilters.enabledFlag[COMPONENT_Cb] || alfParamNewFilters.enabledFlag[COMPONENT_Cr] }; + initDistortion(); + + //luma + m_alfParamTemp = alfParamNewFilters; + setCtuEnableFlag(m_ctuEnableFlag, CHANNEL_TYPE_LUMA, 1); + getFrameStats(CHANNEL_TYPE_LUMA, 0); + setCtuEnableFlag(m_ctuEnableFlag, CHANNEL_TYPE_LUMA, 0); + double costOff = getUnfilteredDistortion(m_alfCovarianceFrame[CHANNEL_TYPE_LUMA][0], CHANNEL_TYPE_LUMA); + + int newApsId; + std::vector<int> apsIds = getAvaiApsIdsLuma(cs, newApsId); + std::vector<int> bestApsIds; + double costMin = MAX_DOUBLE; + reconstructCoeffAPSs(cs, true, false, true); - /* Compute upper triangular U such that U'*U = LHS */ - if( gnsCholeskyDec( LHS, U, numEq ) ) /* If Cholesky decomposition has been successful */ + int numLoops = hasNewFilters[CHANNEL_TYPE_LUMA] ? 2 : 1; + for (int useNewFilter = 0; useNewFilter < numLoops; useNewFilter++) { - /* Now, the equation is U'*U*x = rhs, where U is upper triangular - * Solve U'*aux = rhs for aux - */ - gnsTransposeBacksubstitution( U, rhs, aux, numEq ); + int bitsNewFilter = 0; + if (useNewFilter == 1) + { + if (!hasNewFilters[CHANNEL_TYPE_LUMA]) + { + continue; + } + else + { + bitsNewFilter = m_bitsNewFilter[CHANNEL_TYPE_LUMA]; + reconstructCoeff(alfParamNewFilters, CHANNEL_TYPE_LUMA, true, true); + } + } + int numIter = useNewFilter ? 2 : 1; + for (int numTemporalAps = 0; numTemporalAps <= apsIds.size(); numTemporalAps++) + { + if (numTemporalAps + useNewFilter >= ALF_CTB_MAX_NUM_APS) + { + continue; + } + cs.slice->setTileGroupNumAps(numTemporalAps + useNewFilter); + int numFilterSet = NUM_FIXED_FILTER_SETS + numTemporalAps + useNewFilter; + if (numTemporalAps == apsIds.size() && numTemporalAps > 0 && useNewFilter && newApsId == apsIds.back()) //last temporalAPS is occupied by new filter set and this temporal APS becomes unavailable + { + continue; + } + for (int iter = 0; iter < numIter; iter++) + { + m_alfParamTemp = alfParamNewFilters; + m_alfParamTemp.enabledFlag[CHANNEL_TYPE_LUMA] = true; + double curCost = 3 * m_lambda[CHANNEL_TYPE_LUMA]; + if (iter > 0) //re-derive new filter-set + { + double dDistOrgNewFilter = 0; + int blocksUsingNewFilter = 0; + for (int ctbIdx = 0; ctbIdx < m_numCTUsInPic; ctbIdx++) + { + if (m_ctuEnableFlag[COMPONENT_Y][ctbIdx] && alfCtbFilterSetIndex[ctbIdx] != NUM_FIXED_FILTER_SETS) + { + m_ctuEnableFlag[COMPONENT_Y][ctbIdx] = 0; + } + else if (m_ctuEnableFlag[COMPONENT_Y][ctbIdx] && alfCtbFilterSetIndex[ctbIdx] == NUM_FIXED_FILTER_SETS) + { + blocksUsingNewFilter++; + dDistOrgNewFilter += m_ctbDistortionUnfilter[COMPONENT_Y][ctbIdx]; + for (int classIdx = 0; classIdx < MAX_NUM_ALF_CLASSES; classIdx++) + { + short* pCoeff = m_coeffFinal; + short* pClipp = m_clippFinal; + for (int i = 0; i < MAX_NUM_ALF_LUMA_COEFF; i++) + { + m_filterTmp[i] = pCoeff[classIdx * MAX_NUM_ALF_LUMA_COEFF + i]; + m_clipTmp[i] = pClipp[classIdx * MAX_NUM_ALF_LUMA_COEFF + i]; + } + dDistOrgNewFilter += m_alfCovariance[COMPONENT_Y][0][ctbIdx][classIdx].calcErrorForCoeffs(m_clipTmp, m_filterTmp, MAX_NUM_ALF_LUMA_COEFF, m_NUM_BITS); + } + } + } + if (blocksUsingNewFilter > 0 && blocksUsingNewFilter < m_numCTUsInPic) + { + int bitNL[2] = { 0, 0 }; + double errNL[2] = { 0.0, 0.0 }; +#if JVET_Q0249_ALF_CHROMA_CLIPFLAG + m_alfParamTemp.nonLinearFlag[CHANNEL_TYPE_LUMA] = 1; +#else + m_alfParamTemp.nonLinearFlag[CHANNEL_TYPE_LUMA][0] = 1; +#endif + if (m_encCfg->getUseNonLinearAlfLuma()) + { + errNL[1] = getFilterCoeffAndCost(cs, 0, CHANNEL_TYPE_LUMA, true, 0, bitNL[1], true); + m_alfParamTempNL = m_alfParamTemp; + } + else + { + errNL[1] = MAX_DOUBLE; + } +#if JVET_Q0249_ALF_CHROMA_CLIPFLAG + m_alfParamTemp.nonLinearFlag[CHANNEL_TYPE_LUMA] = 0; +#else + m_alfParamTemp.nonLinearFlag[CHANNEL_TYPE_LUMA][0] = 0; +#endif + errNL[0] = getFilterCoeffAndCost(cs, 0, CHANNEL_TYPE_LUMA, true, 0, bitNL[0], true); - /* The equation is now U*x = aux, solve it for x (new motion coefficients) */ - gnsBacksubstitution( U, aux, numEq, x ); + int bitsNewFilterTempLuma = bitNL[0]; + double err = errNL[0]; + if (errNL[1] < errNL[0]) + { + err = errNL[1]; + bitsNewFilterTempLuma = bitNL[1]; + m_alfParamTemp = m_alfParamTempNL; + } + if (dDistOrgNewFilter + m_lambda[CHANNEL_TYPE_LUMA] * m_bitsNewFilter[CHANNEL_TYPE_LUMA] < err) //re-derived filter is not good, skip + { + continue; + } + reconstructCoeff(m_alfParamTemp, CHANNEL_TYPE_LUMA, true, true); + bitsNewFilter = bitsNewFilterTempLuma; + } + else //no blocks using new filter, skip + { + continue; + } + } + m_CABACEstimator->getCtx() = ctxStart; + for (int ctbIdx = 0; ctbIdx < m_numCTUsInPic; ctbIdx++) + { + double distUnfilterCtb = m_ctbDistortionUnfilter[COMPONENT_Y][ctbIdx]; + //ctb on + m_ctuEnableFlag[COMPONENT_Y][ctbIdx] = 1; + double costOn = MAX_DOUBLE; + ctxTempStart = AlfCtx(m_CABACEstimator->getCtx()); + int iBestFilterSetIdx = 0; + for (int filterSetIdx = 0; filterSetIdx < numFilterSet; filterSetIdx++) + { + //rate + m_CABACEstimator->getCtx() = AlfCtx(ctxTempStart); + m_CABACEstimator->resetBits(); + m_CABACEstimator->codeAlfCtuEnableFlag(cs, ctbIdx, COMPONENT_Y, &m_alfParamTemp); + alfCtbFilterSetIndex[ctbIdx] = filterSetIdx; + m_CABACEstimator->codeAlfCtuFilterIndex(cs, ctbIdx, &m_alfParamTemp.enabledFlag[COMPONENT_Y]); + double rateOn = FRAC_BITS_SCALE * m_CABACEstimator->getEstFracBits(); + //distortion + double dist = distUnfilterCtb; + for (int classIdx = 0; classIdx < MAX_NUM_ALF_CLASSES; classIdx++) + { + if (filterSetIdx < NUM_FIXED_FILTER_SETS) + { + int filterIdx = m_classToFilterMapping[filterSetIdx][classIdx]; + dist += m_alfCovariance[COMPONENT_Y][0][ctbIdx][classIdx].calcErrorForCoeffs(m_clipDefaultEnc, m_fixedFilterSetCoeff[filterIdx], MAX_NUM_ALF_LUMA_COEFF, m_NUM_BITS); + } + else + { + short *pCoeff; + short *pClipp; + if (useNewFilter && filterSetIdx == NUM_FIXED_FILTER_SETS) + { + pCoeff = m_coeffFinal; + pClipp = m_clippFinal; + } + else if (useNewFilter) + { + pCoeff = m_coeffApsLuma[filterSetIdx - 1 - NUM_FIXED_FILTER_SETS]; + pClipp = m_clippApsLuma[filterSetIdx - 1 - NUM_FIXED_FILTER_SETS]; + } + else + { + pCoeff = m_coeffApsLuma[filterSetIdx - NUM_FIXED_FILTER_SETS]; + pClipp = m_clippApsLuma[filterSetIdx - NUM_FIXED_FILTER_SETS]; + } + for (int i = 0; i < MAX_NUM_ALF_LUMA_COEFF; i++) + { + m_filterTmp[i] = pCoeff[classIdx * MAX_NUM_ALF_LUMA_COEFF + i]; + m_clipTmp[i] = pClipp[classIdx * MAX_NUM_ALF_LUMA_COEFF + i]; + } + dist += m_alfCovariance[COMPONENT_Y][0][ctbIdx][classIdx].calcErrorForCoeffs(m_clipTmp, m_filterTmp, MAX_NUM_ALF_LUMA_COEFF, m_NUM_BITS); + } + } + //cost + double costOnTmp = dist + m_lambda[COMPONENT_Y] * rateOn; + if (costOnTmp < costOn) + { + ctxTempBest = AlfCtx(m_CABACEstimator->getCtx()); + costOn = costOnTmp; + iBestFilterSetIdx = filterSetIdx; + } + } + //ctb off + m_ctuEnableFlag[COMPONENT_Y][ctbIdx] = 0; + //rate + m_CABACEstimator->getCtx() = AlfCtx(ctxTempStart); + m_CABACEstimator->resetBits(); + m_CABACEstimator->codeAlfCtuEnableFlag(cs, ctbIdx, COMPONENT_Y, &m_alfParamTemp); + //cost + double costOff = + distUnfilterCtb + m_lambda[COMPONENT_Y] * FRAC_BITS_SCALE * m_CABACEstimator->getEstFracBits(); + if (costOn < costOff) + { + m_CABACEstimator->getCtx() = AlfCtx(ctxTempBest); + m_ctuEnableFlag[COMPONENT_Y][ctbIdx] = 1; + alfCtbFilterSetIndex[ctbIdx] = iBestFilterSetIdx; + curCost += costOn; + } + else + { + m_ctuEnableFlag[COMPONENT_Y][ctbIdx] = 0; + curCost += costOff; + } + } //for(ctbIdx) + int tmpBits = bitsNewFilter + 3 * (numFilterSet - NUM_FIXED_FILTER_SETS); + curCost += tmpBits * m_lambda[COMPONENT_Y]; + if (curCost < costMin) + { + costMin = curCost; + bestApsIds.resize(numFilterSet - NUM_FIXED_FILTER_SETS); + for (int i = 0; i < bestApsIds.size(); i++) + { + if (i == 0 && useNewFilter) + { + bestApsIds[i] = newApsId; + } + else + { + bestApsIds[i] = apsIds[i - useNewFilter]; + } + } + alfParamNewFiltersBest = m_alfParamTemp; + ctxBest = AlfCtx(m_CABACEstimator->getCtx()); + copyCtuEnableFlag(m_ctuEnableFlagTmp, m_ctuEnableFlag, CHANNEL_TYPE_LUMA); + for (int ctuIdx = 0; ctuIdx < m_numCTUsInPic; ctuIdx++) + { + m_alfCtbFilterSetIndexTmp[ctuIdx] = alfCtbFilterSetIndex[ctuIdx]; + } + alfParamNewFiltersBest.newFilterFlag[CHANNEL_TYPE_LUMA] = useNewFilter; + } + }//for (int iter = 0; iter < numIter; iter++) + }// for (int numTemporalAps = 0; numTemporalAps < apsIds.size(); numTemporalAps++) + }//for (int useNewFilter = 0; useNewFilter <= 1; useNewFilter++) + + if (costOff <= costMin) + { + cs.slice->resetTileGroupAlfEnabledFlag(); + cs.slice->setTileGroupNumAps(0); + setCtuEnableFlag(m_ctuEnableFlag, CHANNEL_TYPE_LUMA, 0); + setCtuEnableFlag(m_ctuEnableFlag, CHANNEL_TYPE_CHROMA, 0); + return; } - else /* LHS was singular */ + else { - res = 0; + cs.slice->setTileGroupAlfEnabledFlag(COMPONENT_Y, true); + cs.slice->setTileGroupNumAps((int)bestApsIds.size()); + cs.slice->setAlfAPSs(bestApsIds); + copyCtuEnableFlag(m_ctuEnableFlag, m_ctuEnableFlagTmp, CHANNEL_TYPE_LUMA); + for (int ctuIdx = 0; ctuIdx < m_numCTUsInPic; ctuIdx++) + { + alfCtbFilterSetIndex[ctuIdx] = m_alfCtbFilterSetIndexTmp[ctuIdx]; + } + if (alfParamNewFiltersBest.newFilterFlag[CHANNEL_TYPE_LUMA]) + { + APS* newAPS = m_apsMap->getPS((newApsId << NUM_APS_TYPE_LEN) + ALF_APS); + if (newAPS == NULL) + { + newAPS = m_apsMap->allocatePS((newApsId << NUM_APS_TYPE_LEN) + ALF_APS); + newAPS->setAPSId(newApsId); + newAPS->setAPSType(ALF_APS); + } + newAPS->setAlfAPSParam(alfParamNewFiltersBest); + newAPS->setTemporalId( cs.slice->getTLayer() ); + newAPS->getAlfAPSParam().newFilterFlag[CHANNEL_TYPE_CHROMA] = false; + m_apsMap->setChangedFlag((newApsId << NUM_APS_TYPE_LEN) + ALF_APS); + m_apsIdStart = newApsId; + } - /* Regularize LHS */ - for( int i = 0; i < numEq; i++ ) + std::vector<int> apsIds = cs.slice->getTileGroupApsIdLuma(); + for (int i = 0; i < (int)cs.slice->getTileGroupNumAps(); i++) { - LHS[i][i] += REG; + apss[apsIds[i]] = m_apsMap->getPS((apsIds[i] << NUM_APS_TYPE_LEN) + ALF_APS); } + } - /* Compute upper triangular U such that U'*U = regularized LHS */ - res = gnsCholeskyDec( LHS, U, numEq ); + //chroma + m_alfParamTemp = alfParamNewFiltersBest; + if( m_alfParamTemp.numAlternativesChroma < 1 ) + { + m_alfParamTemp.numAlternativesChroma = 1; + } + setCtuAlternativeChroma( m_ctuAlternative, 0 ); + setCtuEnableFlag(m_ctuEnableFlag, CHANNEL_TYPE_CHROMA, 1); + getFrameStats(CHANNEL_TYPE_CHROMA, 0); + costOff = getUnfilteredDistortion(m_alfCovarianceFrame[CHANNEL_TYPE_CHROMA][0], CHANNEL_TYPE_CHROMA); + costMin = MAX_DOUBLE; + m_CABACEstimator->getCtx() = AlfCtx(ctxBest); + ctxStart = AlfCtx(m_CABACEstimator->getCtx()); + int newApsIdChroma = -1; + if (alfParamNewFiltersBest.newFilterFlag[CHANNEL_TYPE_LUMA] && (alfParamNewFiltersBest.enabledFlag[COMPONENT_Cb] || alfParamNewFiltersBest.enabledFlag[COMPONENT_Cr])) + { + newApsIdChroma = newApsId; + } + else if (alfParamNewFiltersBest.enabledFlag[COMPONENT_Cb] || alfParamNewFiltersBest.enabledFlag[COMPONENT_Cr]) + { + int curId = m_apsIdStart; + while (newApsIdChroma < 0) + { + curId--; + if (curId < 0) + { + curId = ALF_CTB_MAX_NUM_APS - 1; + } + if (std::find(bestApsIds.begin(), bestApsIds.end(), curId) == bestApsIds.end()) + { + newApsIdChroma = curId; + } + } + } + for (int curApsId = 0; curApsId < ALF_CTB_MAX_NUM_APS; curApsId++) + { + if ((cs.slice->getPendingRasInit() || cs.slice->isIDRorBLA() || cs.slice->isIntra()) && curApsId != newApsIdChroma) + { + continue; + } + APS* curAPS = m_apsMap->getPS((curApsId << NUM_APS_TYPE_LEN) + ALF_APS); - if( !res ) + if( curAPS && curAPS->getLayerId() != cs.slice->getPic()->layerId ) { - std::memset( x, 0, sizeof( double )*numEq ); - return 0; + continue; } - /* Solve U'*aux = rhs for aux */ - gnsTransposeBacksubstitution( U, rhs, aux, numEq ); + double curCost = m_lambda[CHANNEL_TYPE_CHROMA] * 3; + if (curApsId == newApsIdChroma) + { + m_alfParamTemp = alfParamNewFilters; + curCost += m_lambda[CHANNEL_TYPE_CHROMA] * m_bitsNewFilter[CHANNEL_TYPE_CHROMA]; + } + else if (curAPS && curAPS->getTemporalId() <= cs.slice->getTLayer() && curAPS->getAlfAPSParam().newFilterFlag[CHANNEL_TYPE_CHROMA]) + { + m_alfParamTemp = curAPS->getAlfAPSParam(); + } + else + { + continue; + } + reconstructCoeff(m_alfParamTemp, CHANNEL_TYPE_CHROMA, true, true); + m_CABACEstimator->getCtx() = AlfCtx(ctxStart); + for (int compId = 1; compId < MAX_NUM_COMPONENT; compId++) + { + m_alfParamTemp.enabledFlag[compId] = true; + for (int ctbIdx = 0; ctbIdx < m_numCTUsInPic; ctbIdx++) + { + double distUnfilterCtu = m_ctbDistortionUnfilter[compId][ctbIdx]; + //cost on + m_ctuEnableFlag[compId][ctbIdx] = 1; + ctxTempStart = AlfCtx(m_CABACEstimator->getCtx()); + //rate + m_CABACEstimator->getCtx() = AlfCtx(ctxTempStart); + m_CABACEstimator->resetBits(); + //ctb flag + m_CABACEstimator->codeAlfCtuEnableFlag(cs, ctbIdx, compId, &m_alfParamTemp); + double rateOn = FRAC_BITS_SCALE * m_CABACEstimator->getEstFracBits(); +#if ENABLE_QPA + const double ctuLambda = lambdaChromaWeight > 0.0 ? cs.picture->m_uEnerHpCtu[ctbIdx] / lambdaChromaWeight : m_lambda[compId]; +#else + const double ctuLambda = m_lambda[compId]; +#endif + double dist = MAX_DOUBLE; + int numAlts = m_alfParamTemp.numAlternativesChroma; + ctxTempBest = AlfCtx( m_CABACEstimator->getCtx() ); + double bestAltRate = 0; + double bestAltCost = MAX_DOUBLE; + int bestAltIdx = -1; + ctxTempAltStart = AlfCtx( ctxTempBest ); + for( int altIdx = 0; altIdx < numAlts; ++altIdx ) + { + if( altIdx ) + m_CABACEstimator->getCtx() = AlfCtx( ctxTempAltStart ); + m_CABACEstimator->resetBits(); + m_ctuAlternative[compId][ctbIdx] = altIdx; + m_CABACEstimator->codeAlfCtuAlternative( cs, ctbIdx, compId, &m_alfParamTemp ); + double altRate = FRAC_BITS_SCALE * m_CABACEstimator->getEstFracBits(); + double r_altCost = ctuLambda * altRate; + + //distortion + for (int i = 0; i < MAX_NUM_ALF_CHROMA_COEFF; i++) + { + m_filterTmp[i] = m_chromaCoeffFinal[altIdx][i]; + m_clipTmp[i] = m_chromaClippFinal[altIdx][i]; + } + double altDist = m_alfCovariance[compId][0][ctbIdx][0].calcErrorForCoeffs( m_clipTmp, m_filterTmp, MAX_NUM_ALF_CHROMA_COEFF, m_NUM_BITS ); + double altCost = altDist + r_altCost; + if( altCost < bestAltCost ) + { + bestAltCost = altCost; + bestAltIdx = altIdx; + bestAltRate = altRate; + ctxTempBest = AlfCtx( m_CABACEstimator->getCtx() ); + dist = altDist; + } + } + m_ctuAlternative[compId][ctbIdx] = bestAltIdx; + rateOn += bestAltRate; + dist += distUnfilterCtu; + //cost + double costOn = dist + ctuLambda * rateOn; + //cost off + m_ctuEnableFlag[compId][ctbIdx] = 0; + //rate + m_CABACEstimator->getCtx() = AlfCtx(ctxTempStart); + m_CABACEstimator->resetBits(); + m_CABACEstimator->codeAlfCtuEnableFlag(cs, ctbIdx, compId, &m_alfParamTemp); + //cost + double costOff = distUnfilterCtu + m_lambda[compId] * FRAC_BITS_SCALE * m_CABACEstimator->getEstFracBits(); + if (costOn < costOff) + { + m_CABACEstimator->getCtx() = AlfCtx(ctxTempBest); + m_ctuEnableFlag[compId][ctbIdx] = 1; + curCost += costOn; + } + else + { + m_ctuEnableFlag[compId][ctbIdx] = 0; + curCost += costOff; + } + } + } + //chroma idc + setEnableFlag(m_alfParamTemp, CHANNEL_TYPE_CHROMA, m_ctuEnableFlag); - /* Solve U*x = aux for x */ - gnsBacksubstitution( U, aux, numEq, x ); + if (curCost < costMin) + { + costMin = curCost; + cs.slice->setTileGroupApsIdChroma(curApsId); + cs.slice->setTileGroupAlfEnabledFlag(COMPONENT_Cb, m_alfParamTemp.enabledFlag[COMPONENT_Cb]); + cs.slice->setTileGroupAlfEnabledFlag(COMPONENT_Cr, m_alfParamTemp.enabledFlag[COMPONENT_Cr]); + copyCtuEnableFlag(m_ctuEnableFlagTmp, m_ctuEnableFlag, CHANNEL_TYPE_CHROMA); + copyCtuAlternativeChroma(m_ctuAlternativeTmp, m_ctuAlternative); + } } - return res; -} -////////////////////////////////////////////////////////////////////////////////////////// -void EncAdaptiveLoopFilter::setEnableFlag( AlfSliceParam& alfSlicePara, ChannelType channel, bool val ) -{ - if( channel == CHANNEL_TYPE_LUMA ) + if (costOff < costMin) { - alfSlicePara.enabledFlag[COMPONENT_Y] = val; + cs.slice->setTileGroupAlfEnabledFlag(COMPONENT_Cb, false); + cs.slice->setTileGroupAlfEnabledFlag(COMPONENT_Cr, false); + setCtuEnableFlag(m_ctuEnableFlag, CHANNEL_TYPE_CHROMA, 0); } else { - alfSlicePara.enabledFlag[COMPONENT_Cb] = alfSlicePara.enabledFlag[COMPONENT_Cr] = val; + copyCtuEnableFlag(m_ctuEnableFlag, m_ctuEnableFlagTmp, CHANNEL_TYPE_CHROMA); + copyCtuAlternativeChroma(m_ctuAlternative, m_ctuAlternativeTmp); + if (cs.slice->getTileGroupApsIdChroma() == newApsIdChroma) //new filter + { + APS* newAPS = m_apsMap->getPS((newApsIdChroma << NUM_APS_TYPE_LEN) + ALF_APS); + if (newAPS == NULL) + { + newAPS = m_apsMap->allocatePS((newApsIdChroma << NUM_APS_TYPE_LEN) + ALF_APS); + newAPS->setAPSType(ALF_APS); + newAPS->setAPSId(newApsIdChroma); + newAPS->getAlfAPSParam().reset(); + } + newAPS->getAlfAPSParam().newFilterFlag[CHANNEL_TYPE_CHROMA] = true; + if (!alfParamNewFiltersBest.newFilterFlag[CHANNEL_TYPE_LUMA]) + { + newAPS->getAlfAPSParam().newFilterFlag[CHANNEL_TYPE_LUMA] = false; + } + newAPS->getAlfAPSParam().numAlternativesChroma = alfParamNewFilters.numAlternativesChroma; +#if JVET_Q0249_ALF_CHROMA_CLIPFLAG + newAPS->getAlfAPSParam().nonLinearFlag[CHANNEL_TYPE_CHROMA] = alfParamNewFilters.nonLinearFlag[CHANNEL_TYPE_CHROMA]; +#else + for( int altIdx = 0; altIdx < MAX_NUM_ALF_ALTERNATIVES_CHROMA; ++altIdx ) + newAPS->getAlfAPSParam().nonLinearFlag[CHANNEL_TYPE_CHROMA][altIdx] = alfParamNewFilters.nonLinearFlag[CHANNEL_TYPE_CHROMA][altIdx]; +#endif + newAPS->setTemporalId( cs.slice->getTLayer() ); + for (int altIdx = 0; altIdx < MAX_NUM_ALF_ALTERNATIVES_CHROMA; ++altIdx ) + for (int i = 0; i < MAX_NUM_ALF_CHROMA_COEFF; i++) + { + newAPS->getAlfAPSParam().chromaCoeff[altIdx][i] = alfParamNewFilters.chromaCoeff[altIdx][i]; + newAPS->getAlfAPSParam().chromaClipp[altIdx][i] = alfParamNewFilters.chromaClipp[altIdx][i]; + } + m_apsMap->setChangedFlag((newApsIdChroma << NUM_APS_TYPE_LEN) + ALF_APS); + m_apsIdStart = newApsIdChroma; + } + apss[cs.slice->getTileGroupApsIdChroma()] = m_apsMap->getPS((cs.slice->getTileGroupApsIdChroma() << NUM_APS_TYPE_LEN) + ALF_APS); } } -void EncAdaptiveLoopFilter::setEnableFlag( AlfSliceParam& alfSlicePara, ChannelType channel, uint8_t** ctuFlags ) +void EncAdaptiveLoopFilter::alfReconstructor(CodingStructure& cs, const PelUnitBuf& recExtBuf) { - const ComponentID compIDFirst = isLuma( channel ) ? COMPONENT_Y : COMPONENT_Cb; - const ComponentID compIDLast = isLuma( channel ) ? COMPONENT_Y : COMPONENT_Cr; - for( int compId = compIDFirst; compId <= compIDLast; compId++ ) + if (!cs.slice->getTileGroupAlfEnabledFlag(COMPONENT_Y)) { - alfSlicePara.enabledFlag[compId] = false; - for( int i = 0; i < m_numCTUsInPic; i++ ) + return; + } + reconstructCoeffAPSs(cs, true, cs.slice->getTileGroupAlfEnabledFlag(COMPONENT_Cb) || cs.slice->getTileGroupAlfEnabledFlag(COMPONENT_Cr), false); + short* alfCtuFilterIndex = cs.slice->getPic()->getAlfCtbFilterIndex(); + PelUnitBuf& recBuf = cs.getRecoBufRef(); + const PreCalcValues& pcv = *cs.pcv; + + int ctuIdx = 0; + bool clipTop = false, clipBottom = false, clipLeft = false, clipRight = false; + int numHorVirBndry = 0, numVerVirBndry = 0; + int horVirBndryPos[] = { 0, 0, 0 }; + int verVirBndryPos[] = { 0, 0, 0 }; + for (int yPos = 0; yPos < pcv.lumaHeight; yPos += pcv.maxCUHeight) + { + for (int xPos = 0; xPos < pcv.lumaWidth; xPos += pcv.maxCUWidth) { - if( ctuFlags[compId][i] ) + const int width = (xPos + pcv.maxCUWidth > pcv.lumaWidth) ? (pcv.lumaWidth - xPos) : pcv.maxCUWidth; + const int height = (yPos + pcv.maxCUHeight > pcv.lumaHeight) ? (pcv.lumaHeight - yPos) : pcv.maxCUHeight; + + bool ctuEnableFlag = m_ctuEnableFlag[COMPONENT_Y][ctuIdx]; + for (int compIdx = 1; compIdx < MAX_NUM_COMPONENT; compIdx++) { - alfSlicePara.enabledFlag[compId] = true; - break; + ctuEnableFlag |= m_ctuEnableFlag[compIdx][ctuIdx] > 0; + } + int rasterSliceAlfPad = 0; + if ( ctuEnableFlag && isCrossedByVirtualBoundaries( cs, xPos, yPos, width, height, clipTop, clipBottom, clipLeft, clipRight, numHorVirBndry, numVerVirBndry, horVirBndryPos, verVirBndryPos, rasterSliceAlfPad ) ) + { + int yStart = yPos; + for (int i = 0; i <= numHorVirBndry; i++) + { + const int yEnd = i == numHorVirBndry ? yPos + height : horVirBndryPos[i]; + const int h = yEnd - yStart; + const bool clipT = (i == 0 && clipTop) || (i > 0) || (yStart == 0); + const bool clipB = (i == numHorVirBndry && clipBottom) || (i < numHorVirBndry ) || (yEnd == pcv.lumaHeight); + int xStart = xPos; + for (int j = 0; j <= numVerVirBndry; j++) + { + const int xEnd = j == numVerVirBndry ? xPos + width : verVirBndryPos[j]; + const int w = xEnd - xStart; + const bool clipL = (j == 0 && clipLeft) || (j > 0) || (xStart == 0); + const bool clipR = (j == numVerVirBndry && clipRight) || (j < numVerVirBndry ) || (xEnd == pcv.lumaWidth); + const int wBuf = w + (clipL ? 0 : MAX_ALF_PADDING_SIZE) + (clipR ? 0 : MAX_ALF_PADDING_SIZE); + const int hBuf = h + (clipT ? 0 : MAX_ALF_PADDING_SIZE) + (clipB ? 0 : MAX_ALF_PADDING_SIZE); + PelUnitBuf buf = m_tempBuf2.subBuf(UnitArea(cs.area.chromaFormat, Area(0, 0, wBuf, hBuf))); + buf.copyFrom(recExtBuf.subBuf(UnitArea(cs.area.chromaFormat, Area(xStart - (clipL ? 0 : MAX_ALF_PADDING_SIZE), yStart - (clipT ? 0 : MAX_ALF_PADDING_SIZE), wBuf, hBuf)))); + // pad top-left unavailable samples for raster slice + if ( xStart == xPos && yStart == yPos && ( rasterSliceAlfPad & 1 ) ) + { + buf.padBorderPel( MAX_ALF_PADDING_SIZE, 1 ); + } + + // pad bottom-right unavailable samples for raster slice + if ( xEnd == xPos + width && yEnd == yPos + height && ( rasterSliceAlfPad & 2 ) ) + { + buf.padBorderPel( MAX_ALF_PADDING_SIZE, 2 ); + } + buf.extendBorderPel(MAX_ALF_PADDING_SIZE); + buf = buf.subBuf(UnitArea(cs.area.chromaFormat, Area(clipL ? 0 : MAX_ALF_PADDING_SIZE, clipT ? 0 : MAX_ALF_PADDING_SIZE, w, h))); + + if (m_ctuEnableFlag[COMPONENT_Y][ctuIdx]) + { + const Area blkSrc(0, 0, w, h); + const Area blkDst(xStart, yStart, w, h); + short filterSetIndex = alfCtuFilterIndex[ctuIdx]; + short *coeff; + short *clip; + if (filterSetIndex >= NUM_FIXED_FILTER_SETS) + { + coeff = m_coeffApsLuma[filterSetIndex - NUM_FIXED_FILTER_SETS]; + clip = m_clippApsLuma[filterSetIndex - NUM_FIXED_FILTER_SETS]; + } + else + { + coeff = m_fixedFilterSetCoeffDec[filterSetIndex]; + clip = m_clipDefault; + } + m_filter7x7Blk(m_classifier, recBuf, buf, blkDst, blkSrc, COMPONENT_Y, coeff, clip, m_clpRngs.comp[COMPONENT_Y], cs + , m_alfVBLumaCTUHeight + , m_alfVBLumaPos + ); + } + + for (int compIdx = 1; compIdx < MAX_NUM_COMPONENT; compIdx++) + { + ComponentID compID = ComponentID(compIdx); + const int chromaScaleX = getComponentScaleX(compID, recBuf.chromaFormat); + const int chromaScaleY = getComponentScaleY(compID, recBuf.chromaFormat); + if (m_ctuEnableFlag[compIdx][ctuIdx]) + { + const Area blkSrc(0, 0, w >> chromaScaleX, h >> chromaScaleY); + const Area blkDst(xStart >> chromaScaleX, yStart >> chromaScaleY, w >> chromaScaleX, h >> chromaScaleY); + const int alt_num = m_ctuAlternative[compID][ctuIdx]; + m_filter5x5Blk(m_classifier, recBuf, buf, blkDst, blkSrc, compID, m_chromaCoeffFinal[alt_num], m_chromaClippFinal[alt_num], m_clpRngs.comp[compIdx], cs + , m_alfVBChmaCTUHeight + , m_alfVBChmaPos + ); + } + } + + xStart = xEnd; + } + + yStart = yEnd; + } + } + else + { + + const UnitArea area(cs.area.chromaFormat, Area(xPos, yPos, width, height)); + if (m_ctuEnableFlag[COMPONENT_Y][ctuIdx]) + { + Area blk(xPos, yPos, width, height); + short filterSetIndex = alfCtuFilterIndex[ctuIdx]; + short *coeff; + short *clip; + if (filterSetIndex >= NUM_FIXED_FILTER_SETS) + { + coeff = m_coeffApsLuma[filterSetIndex - NUM_FIXED_FILTER_SETS]; + clip = m_clippApsLuma[filterSetIndex - NUM_FIXED_FILTER_SETS]; + } + else + { + coeff = m_fixedFilterSetCoeffDec[filterSetIndex]; + clip = m_clipDefault; + } + m_filter7x7Blk(m_classifier, recBuf, recExtBuf, blk, blk, COMPONENT_Y, coeff, clip, m_clpRngs.comp[COMPONENT_Y], cs + , m_alfVBLumaCTUHeight + , m_alfVBLumaPos + ); + } + + for (int compIdx = 1; compIdx < MAX_NUM_COMPONENT; compIdx++) + { + ComponentID compID = ComponentID(compIdx); + const int chromaScaleX = getComponentScaleX(compID, recBuf.chromaFormat); + const int chromaScaleY = getComponentScaleY(compID, recBuf.chromaFormat); + if (m_ctuEnableFlag[compIdx][ctuIdx]) + { + Area blk(xPos >> chromaScaleX, yPos >> chromaScaleY, width >> chromaScaleX, height >> chromaScaleY); + const int alt_num = m_ctuAlternative[compID][ctuIdx]; + m_filter5x5Blk(m_classifier, recBuf, recExtBuf, blk, blk, compID, m_chromaCoeffFinal[alt_num], m_chromaClippFinal[alt_num], m_clpRngs.comp[compIdx], cs + , m_alfVBChmaCTUHeight + , m_alfVBChmaPos + ); + } + } } + ctuIdx++; } } } -void EncAdaptiveLoopFilter::copyCtuEnableFlag( uint8_t** ctuFlagsDst, uint8_t** ctuFlagsSrc, ChannelType channel ) +void EncAdaptiveLoopFilter::copyCtuAlternativeChroma( uint8_t* ctuAltsDst[MAX_NUM_COMPONENT], uint8_t* ctuAltsSrc[MAX_NUM_COMPONENT] ) { - if( isLuma( channel ) ) - { - memcpy( ctuFlagsDst[COMPONENT_Y], ctuFlagsSrc[COMPONENT_Y], sizeof( uint8_t ) * m_numCTUsInPic ); - } - else - { - memcpy( ctuFlagsDst[COMPONENT_Cb], ctuFlagsSrc[COMPONENT_Cb], sizeof( uint8_t ) * m_numCTUsInPic ); - memcpy( ctuFlagsDst[COMPONENT_Cr], ctuFlagsSrc[COMPONENT_Cr], sizeof( uint8_t ) * m_numCTUsInPic ); - } + std::copy_n( ctuAltsSrc[COMPONENT_Cb], m_numCTUsInPic, ctuAltsDst[COMPONENT_Cb] ); + std::copy_n( ctuAltsSrc[COMPONENT_Cr], m_numCTUsInPic, ctuAltsDst[COMPONENT_Cr] ); } -void EncAdaptiveLoopFilter::setCtuEnableFlag( uint8_t** ctuFlags, ChannelType channel, uint8_t val ) +void EncAdaptiveLoopFilter::setCtuAlternativeChroma( uint8_t* ctuAlts[MAX_NUM_COMPONENT], uint8_t val ) { - if( isLuma( channel ) ) + std::fill_n( ctuAlts[COMPONENT_Cb], m_numCTUsInPic, val ); + std::fill_n( ctuAlts[COMPONENT_Cr], m_numCTUsInPic, val ); +} + +void EncAdaptiveLoopFilter::initCtuAlternativeChroma( uint8_t* ctuAlts[MAX_NUM_COMPONENT] ) +{ + uint8_t altIdx = 0; + for( int ctuIdx = 0; ctuIdx < m_numCTUsInPic; ++ctuIdx ) { - memset( ctuFlags[COMPONENT_Y], val, sizeof( uint8_t ) * m_numCTUsInPic ); + ctuAlts[COMPONENT_Cb][ctuIdx] = altIdx; + if( (ctuIdx+1) * m_alfParamTemp.numAlternativesChroma >= (altIdx+1)*m_numCTUsInPic ) + ++altIdx; } - else + altIdx = 0; + for( int ctuIdx = 0; ctuIdx < m_numCTUsInPic; ++ctuIdx ) { - memset( ctuFlags[COMPONENT_Cb], val, sizeof( uint8_t ) * m_numCTUsInPic ); - memset( ctuFlags[COMPONENT_Cr], val, sizeof( uint8_t ) * m_numCTUsInPic ); + ctuAlts[COMPONENT_Cr][ctuIdx] = altIdx; + if( (ctuIdx+1) * m_alfParamTemp.numAlternativesChroma >= (altIdx+1)*m_numCTUsInPic ) + ++altIdx; } } +int EncAdaptiveLoopFilter::getMaxNumAlternativesChroma( ) +{ + return std::min<int>( m_numCTUsInPic * 2, m_encCfg->getMaxNumAlfAlternativesChroma() ); +} diff --git a/source/Lib/EncoderLib/EncAdaptiveLoopFilter.h b/source/Lib/EncoderLib/EncAdaptiveLoopFilter.h index d2b02d902520a609957a6f9f0a30f953093df072..d2ceb026fe13f0c41ea4a5ec403b568965120afb 100644 --- a/source/Lib/EncoderLib/EncAdaptiveLoopFilter.h +++ b/source/Lib/EncoderLib/EncAdaptiveLoopFilter.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -41,62 +41,52 @@ #include "CommonLib/AdaptiveLoopFilter.h" #include "CABACWriter.h" +#include "EncCfg.h" struct AlfCovariance { + static constexpr int MaxAlfNumClippingValues = AdaptiveLoopFilter::MaxAlfNumClippingValues; + using TE = double[MAX_NUM_ALF_LUMA_COEFF][MAX_NUM_ALF_LUMA_COEFF]; + using Ty = double[MAX_NUM_ALF_LUMA_COEFF]; + using TKE = TE[AdaptiveLoopFilter::MaxAlfNumClippingValues][AdaptiveLoopFilter::MaxAlfNumClippingValues]; + using TKy = Ty[AdaptiveLoopFilter::MaxAlfNumClippingValues]; + int numCoeff; - double *y; - double **E; + int numBins; + TKy y; + TKE E; double pixAcc; AlfCovariance() {} ~AlfCovariance() {} - void create( int size ) + void create( int size, int num_bins = MaxAlfNumClippingValues ) { numCoeff = size; - - y = new double[numCoeff]; - E = new double*[numCoeff]; - - for( int i = 0; i < numCoeff; i++ ) - { - E[i] = new double[numCoeff]; - } + numBins = num_bins; + std::memset( y, 0, sizeof( y ) ); + std::memset( E, 0, sizeof( E ) ); } void destroy() { - for( int i = 0; i < numCoeff; i++ ) - { - delete[] E[i]; - E[i] = nullptr; - } - - delete[] E; - E = nullptr; - - delete[] y; - y = nullptr; } - void reset() + void reset( int num_bins = -1 ) { + if ( num_bins > 0 ) + numBins = num_bins; pixAcc = 0; - std::memset( y, 0, sizeof( *y ) * numCoeff ); - for( int i = 0; i < numCoeff; i++ ) - { - std::memset( E[i], 0, sizeof( *E[i] ) * numCoeff ); - } + std::memset( y, 0, sizeof( y ) ); + std::memset( E, 0, sizeof( E ) ); } const AlfCovariance& operator=( const AlfCovariance& src ) { - for( int i = 0; i < numCoeff; i++ ) - { - std::memcpy( E[i], src.E[i], sizeof( *E[i] ) * numCoeff ); - } - std::memcpy( y, src.y, sizeof( *y ) * numCoeff ); + numCoeff = src.numCoeff; + numBins = src.numBins; + std::memcpy( E, src.E, sizeof( E ) ); + std::memcpy( y, src.y, sizeof( y ) ); pixAcc = src.pixAcc; return *this; @@ -104,26 +94,52 @@ struct AlfCovariance void add( const AlfCovariance& lhs, const AlfCovariance& rhs ) { - for( int j = 0; j < numCoeff; j++ ) + numCoeff = lhs.numCoeff; + numBins = lhs.numBins; + for( int b0 = 0; b0 < numBins; b0++ ) { - for( int i = 0; i < numCoeff; i++ ) + for( int b1 = 0; b1 < numBins; b1++ ) { - E[j][i] = lhs.E[j][i] + rhs.E[j][i]; + for( int j = 0; j < numCoeff; j++ ) + { + for( int i = 0; i < numCoeff; i++ ) + { + E[b0][b1][j][i] = lhs.E[b0][b1][j][i] + rhs.E[b0][b1][j][i]; + } + } + } + } + for( int b = 0; b < numBins; b++ ) + { + for( int j = 0; j < numCoeff; j++ ) + { + y[b][j] = lhs.y[b][j] + rhs.y[b][j]; } - y[j] = lhs.y[j] + rhs.y[j]; } pixAcc = lhs.pixAcc + rhs.pixAcc; } const AlfCovariance& operator+= ( const AlfCovariance& src ) { - for( int j = 0; j < numCoeff; j++ ) + for( int b0 = 0; b0 < numBins; b0++ ) { - for( int i = 0; i < numCoeff; i++ ) + for( int b1 = 0; b1 < numBins; b1++ ) { - E[j][i] += src.E[j][i]; + for( int j = 0; j < numCoeff; j++ ) + { + for( int i = 0; i < numCoeff; i++ ) + { + E[b0][b1][j][i] += src.E[b0][b1][j][i]; + } + } + } + } + for( int b = 0; b < numBins; b++ ) + { + for( int j = 0; j < numCoeff; j++ ) + { + y[b][j] += src.y[b][j]; } - y[j] += src.y[j]; } pixAcc += src.pixAcc; @@ -132,88 +148,157 @@ struct AlfCovariance const AlfCovariance& operator-= ( const AlfCovariance& src ) { - for( int j = 0; j < numCoeff; j++ ) + for( int b0 = 0; b0 < numBins; b0++ ) + { + for( int b1 = 0; b1 < numBins; b1++ ) + { + for( int j = 0; j < numCoeff; j++ ) + { + for( int i = 0; i < numCoeff; i++ ) + { + E[b0][b1][j][i] -= src.E[b0][b1][j][i]; + } + } + } + } + for( int b = 0; b < numBins; b++ ) { - for( int i = 0; i < numCoeff; i++ ) + for( int j = 0; j < numCoeff; j++ ) { - E[j][i] -= src.E[j][i]; + y[b][j] -= src.y[b][j]; } - y[j] -= src.y[j]; } pixAcc -= src.pixAcc; return *this; } + + void setEyFromClip(const int* clip, TE _E, Ty _y, int size) const + { + for (int k=0; k<size; k++) + { + _y[k] = y[clip[k]][k]; + for (int l=0; l<size; l++) + { + _E[k][l] = E[clip[k]][clip[l]][k][l]; + } + } + } + + double optimizeFilter(const int* clip, double *f, int size) const + { + gnsSolveByChol( clip, f, size ); + return calculateError( clip, f ); + } + + double optimizeFilter(const AlfFilterShape& alfShape, int* clip, double *f, bool optimize_clip) const; + double optimizeFilterClip(const AlfFilterShape& alfShape, int* clip) const + { + Ty f; + return optimizeFilter(alfShape, clip, f, true); + } + + double calculateError( const int *clip ) const; + double calculateError( const int *clip, const double *coeff ) const { return calculateError(clip, coeff, numCoeff); } + double calculateError( const int *clip, const double *coeff, const int numCoeff ) const; + double calcErrorForCoeffs( const int *clip, const int *coeff, const int numCoeff, const int bitDepth ) const; + + void getClipMax(const AlfFilterShape& alfShape, int *clip_max) const; + void reduceClipCost(const AlfFilterShape& alfShape, int *clip) const; + +private: + // Cholesky decomposition + + int gnsSolveByChol( const int *clip, double *x, int numEq ) const; + int gnsSolveByChol( TE LHS, double* rhs, double *x, int numEq ) const; + void gnsBacksubstitution( TE R, double* z, int size, double* A ) const; + void gnsTransposeBacksubstitution( TE U, double* rhs, double* x, int order ) const; + int gnsCholeskyDec( TE inpMatr, TE outMatr, int numEq ) const; }; class EncAdaptiveLoopFilter : public AdaptiveLoopFilter { public: - static constexpr int m_MAX_SCAN_VAL = 11; - static constexpr int m_MAX_EXP_GOLOMB = 16; - int m_alfWSSD; inline void setAlfWSSD(int alfWSSD) { m_alfWSSD = alfWSSD; } static std::vector<double> m_lumaLevelToWeightPLUT; inline std::vector<double>& getLumaLevelWeightTable() { return m_lumaLevelToWeightPLUT; } private: + int m_alfWSSD; + const EncCfg* m_encCfg; AlfCovariance*** m_alfCovariance[MAX_NUM_COMPONENT]; // [compIdx][shapeIdx][ctbAddr][classIdx] - AlfCovariance** m_alfCovarianceFrame[MAX_NUM_CHANNEL_TYPE]; // [CHANNEL][shapeIdx][classIdx] - uint8_t* m_ctuEnableFlagTmp[MAX_NUM_COMPONENT]; + AlfCovariance** m_alfCovarianceFrame[MAX_NUM_CHANNEL_TYPE]; // [CHANNEL][shapeIdx][lumaClassIdx/chromaAltIdx] + uint8_t* m_ctuEnableFlagTmp[MAX_NUM_COMPONENT]; + uint8_t* m_ctuEnableFlagTmp2[MAX_NUM_COMPONENT]; + uint8_t* m_ctuAlternativeTmp[MAX_NUM_COMPONENT]; //for RDO - AlfSliceParam m_alfSliceParamTemp; - AlfCovariance m_alfCovarianceMerged[ALF_NUM_OF_FILTER_TYPES][MAX_NUM_ALF_CLASSES + 1]; + AlfParam m_alfParamTemp; + ParameterSetMap<APS>* m_apsMap; + AlfCovariance m_alfCovarianceMerged[ALF_NUM_OF_FILTER_TYPES][MAX_NUM_ALF_CLASSES + 2]; + int m_alfClipMerged[ALF_NUM_OF_FILTER_TYPES][MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_LUMA_COEFF]; CABACWriter* m_CABACEstimator; CtxCache* m_CtxCache; double m_lambda[MAX_NUM_COMPONENT]; - const double FracBitsScale = 1.0 / double( 1 << SCALE_BITS ); - int* m_filterCoeffQuant; - int** m_filterCoeffSet; + int** m_filterCoeffSet; // [lumaClassIdx/chromaAltIdx][coeffIdx] + int** m_filterClippSet; // [lumaClassIdx/chromaAltIdx][coeffIdx] int** m_diffFilterCoeff; - int m_kMinTab[MAX_NUM_ALF_LUMA_COEFF]; - int m_bitsCoeffScan[m_MAX_SCAN_VAL][m_MAX_EXP_GOLOMB]; short m_filterIndices[MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_CLASSES]; + unsigned m_bitsNewFilter[MAX_NUM_CHANNEL_TYPE]; + int& m_apsIdStart; + double *m_ctbDistortionFixedFilter; + double *m_ctbDistortionUnfilter[MAX_NUM_COMPONENT]; + std::vector<short> m_alfCtbFilterSetIndexTmp; + AlfParam m_alfParamTempNL; + int m_clipDefaultEnc[MAX_NUM_ALF_LUMA_COEFF]; + int m_filterTmp[MAX_NUM_ALF_LUMA_COEFF]; + int m_clipTmp[MAX_NUM_ALF_LUMA_COEFF]; public: - EncAdaptiveLoopFilter(); + EncAdaptiveLoopFilter( int& apsIdStart ); virtual ~EncAdaptiveLoopFilter() {} - - void ALFProcess( CodingStructure& cs, const double *lambdas, + void initDistortion(); + std::vector<int> getAvaiApsIdsLuma(CodingStructure& cs, int &newApsId); + void alfEncoderCtb(CodingStructure& cs, AlfParam& alfParamNewFilters #if ENABLE_QPA - const double lambdaChromaWeight, + , const double lambdaChromaWeight #endif - AlfSliceParam& alfSliceParam ); - void initCABACEstimator( CABACEncoder* cabacEncoder, CtxCache* ctxCache, Slice* pcSlice ); - void create( const int picWidth, const int picHeight, const ChromaFormat chromaFormatIDC, const int maxCUWidth, const int maxCUHeight, const int maxCUDepth, const int inputBitDepth[MAX_NUM_CHANNEL_TYPE], const int internalBitDepth[MAX_NUM_CHANNEL_TYPE] ); + ); + void alfReconstructor(CodingStructure& cs, const PelUnitBuf& recExtBuf); + void ALFProcess(CodingStructure& cs, const double *lambdas +#if ENABLE_QPA + , const double lambdaChromaWeight +#endif + ); + void initCABACEstimator( CABACEncoder* cabacEncoder, CtxCache* ctxCache, Slice* pcSlice, ParameterSetMap<APS>* apsMap ); + void create( const EncCfg* encCfg, const int picWidth, const int picHeight, const ChromaFormat chromaFormatIDC, const int maxCUWidth, const int maxCUHeight, const int maxCUDepth, const int inputBitDepth[MAX_NUM_CHANNEL_TYPE], const int internalBitDepth[MAX_NUM_CHANNEL_TYPE] ); void destroy(); - static int lengthGolomb( int coeffVal, int k ); - static int getGolombKMin( AlfFilterShape& alfShape, const int numFilters, int kMinTab[MAX_NUM_ALF_LUMA_COEFF], int bitsCoeffScan[m_MAX_SCAN_VAL][m_MAX_EXP_GOLOMB] ); + static int lengthGolomb( int coeffVal, int k, bool signed_coeff=true ); + void setApsIdStart( int i) { m_apsIdStart = i; } private: - void alfEncoder( CodingStructure& cs, AlfSliceParam& alfSliceParam, const PelUnitBuf& orgUnitBuf, const PelUnitBuf& recExtBuf, const PelUnitBuf& recBuf, const ChannelType channel + void alfEncoder( CodingStructure& cs, AlfParam& alfParam, const PelUnitBuf& orgUnitBuf, const PelUnitBuf& recExtBuf, const PelUnitBuf& recBuf, const ChannelType channel #if ENABLE_QPA , const double lambdaChromaWeight = 0.0 #endif ); - void copyAlfSliceParam( AlfSliceParam& alfSliceParamDst, AlfSliceParam& alfSliceParamSrc, ChannelType channel ); - double mergeFiltersAndCost( AlfSliceParam& alfSliceParam, AlfFilterShape& alfShape, AlfCovariance* covFrame, AlfCovariance* covMerged, int& uiCoeffBits ); + void copyAlfParam( AlfParam& alfParamDst, AlfParam& alfParamSrc, ChannelType channel ); + double mergeFiltersAndCost( AlfParam& alfParam, AlfFilterShape& alfShape, AlfCovariance* covFrame, AlfCovariance* covMerged, int clipMerged[MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_LUMA_COEFF], int& uiCoeffBits ); void getFrameStats( ChannelType channel, int iShapeIdx ); - void getFrameStat( AlfCovariance* frameCov, AlfCovariance** ctbCov, uint8_t* ctbEnableFlags, const int numClasses ); - void deriveStatsForFiltering( PelUnitBuf& orgYuv, PelUnitBuf& recYuv ); - void getBlkStats( AlfCovariance* alfCovariace, const AlfFilterShape& shape, AlfClassifier** classifier, Pel* org, const int orgStride, Pel* rec, const int recStride, const CompArea& area ); - void calcCovariance( int *ELocal, const Pel *rec, const int stride, const int *filterPattern, const int halfFilterLength, const int transposeIdx ); - void mergeClasses( AlfCovariance* cov, AlfCovariance* covMerged, const int numClasses, short filterIndices[MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_CLASSES] ); - - double calculateError( AlfCovariance& cov ); - double calcErrorForCoeffs( double **E, double *y, int *coeff, const int numCoeff, const int bitDepth ); - double getFilterCoeffAndCost( CodingStructure& cs, double distUnfilter, ChannelType channel, bool bReCollectStat, int iShapeIdx, int& uiCoeffBits ); - double deriveFilterCoeffs( AlfCovariance* cov, AlfCovariance* covMerged, AlfFilterShape& alfShape, short* filterIndices, int numFilters, double errorTabForce0Coeff[MAX_NUM_ALF_CLASSES][2] ); - int deriveFilterCoefficientsPredictionMode( AlfFilterShape& alfShape, int **filterSet, int** filterCoeffDiff, const int numFilters, int& predMode ); - double deriveCoeffQuant( int *filterCoeffQuant, double **E, double *y, const int numCoeff, std::vector<int>& weights, const int bitDepth, const bool bChroma = false ); + void getFrameStat( AlfCovariance* frameCov, AlfCovariance** ctbCov, uint8_t* ctbEnableFlags, uint8_t* ctbAltIdx, const int numClasses, int altIdx ); + void deriveStatsForFiltering( PelUnitBuf& orgYuv, PelUnitBuf& recYuv, CodingStructure& cs ); + void getBlkStats(AlfCovariance* alfCovariace, const AlfFilterShape& shape, AlfClassifier** classifier, Pel* org, const int orgStride, Pel* rec, const int recStride, const CompArea& areaDst, const CompArea& area, const ChannelType channel, int vbCTUHeight, int vbPos); + void calcCovariance(int ELocal[MAX_NUM_ALF_LUMA_COEFF][MaxAlfNumClippingValues], const Pel *rec, const int stride, const AlfFilterShape& shape, const int transposeIdx, const ChannelType channel, int vbDistance); + void mergeClasses(const AlfFilterShape& alfShape, AlfCovariance* cov, AlfCovariance* covMerged, int clipMerged[MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_LUMA_COEFF], const int numClasses, short filterIndices[MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_CLASSES]); + + + double getFilterCoeffAndCost( CodingStructure& cs, double distUnfilter, ChannelType channel, bool bReCollectStat, int iShapeIdx, int& uiCoeffBits, bool onlyFilterCost = false ); + double deriveFilterCoeffs(AlfCovariance* cov, AlfCovariance* covMerged, int clipMerged[MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_LUMA_COEFF], AlfFilterShape& alfShape, short* filterIndices, int numFilters, double errorTabForce0Coeff[MAX_NUM_ALF_CLASSES][2], AlfParam& alfParam); + int deriveFilterCoefficientsPredictionMode( AlfFilterShape& alfShape, int **filterSet, int** filterCoeffDiff, const int numFilters ); + double deriveCoeffQuant( int *filterClipp, int *filterCoeffQuant, const AlfCovariance& cov, const AlfFilterShape& shape, const int bitDepth, const bool optimizeClip ); double deriveCtbAlfEnableFlags( CodingStructure& cs, const int iShapeIdx, ChannelType channel, #if ENABLE_QPA const double chromaWeight, @@ -221,32 +306,29 @@ private: const int numClasses, const int numCoeff, double& distUnfilter ); void roundFiltCoeff( int *filterCoeffQuant, double *filterCoeff, const int numCoeff, const int factor ); - double getDistCoeffForce0( bool* codedVarBins, double errorForce0CoeffTab[MAX_NUM_ALF_CLASSES][2], int* bitsVarBin, const int numFilters ); - int lengthTruncatedUnary( int symbol, int maxSymbol ); + double getDistCoeffForce0( bool* codedVarBins, double errorForce0CoeffTab[MAX_NUM_ALF_CLASSES][2], int* bitsVarBin, int zeroBitsVarBin, const int numFilters); int lengthUvlc( int uiCode ); - int getNonFilterCoeffRate( AlfSliceParam& alfSliceParam ); - int getTBlength( int uiSymbol, const int uiMaxSymbol ); + int getNonFilterCoeffRate( AlfParam& alfParam ); int getCostFilterCoeffForce0( AlfFilterShape& alfShape, int **pDiffQFilterCoeffIntPP, const int numFilters, bool* codedVarBins ); int getCostFilterCoeff( AlfFilterShape& alfShape, int **pDiffQFilterCoeffIntPP, const int numFilters ); - int lengthFilterCoeffs( AlfFilterShape& alfShape, const int numFilters, int **FilterCoeff, int* kMinTab ); + int getCostFilterClipp( AlfFilterShape& alfShape, int **pDiffQFilterCoeffIntPP, const int numFilters ); + int lengthFilterCoeffs( AlfFilterShape& alfShape, const int numFilters, int **FilterCoeff ); double getDistForce0( AlfFilterShape& alfShape, const int numFilters, double errorTabForce0Coeff[MAX_NUM_ALF_CLASSES][2], bool* codedVarBins ); - int getCoeffRate( AlfSliceParam& alfSliceParam, bool isChroma ); + int getChromaCoeffRate( AlfParam& alfParam, int altIdx ); double getUnfilteredDistortion( AlfCovariance* cov, ChannelType channel ); double getUnfilteredDistortion( AlfCovariance* cov, const int numClasses ); double getFilteredDistortion( AlfCovariance* cov, const int numClasses, const int numFiltersMinus1, const int numCoeff ); - // Cholesky decomposition - int gnsSolveByChol( double **LHS, double *rhs, double *x, int numEq ); - void gnsBacksubstitution( double R[MAX_NUM_ALF_COEFF][MAX_NUM_ALF_COEFF], double* z, int size, double* A ); - void gnsTransposeBacksubstitution( double U[MAX_NUM_ALF_COEFF][MAX_NUM_ALF_COEFF], double* rhs, double* x, int order ); - int gnsCholeskyDec( double **inpMatr, double outMatr[MAX_NUM_ALF_COEFF][MAX_NUM_ALF_COEFF], int numEq ); - - void setEnableFlag( AlfSliceParam& alfSlicePara, ChannelType channel, bool val ); - void setEnableFlag( AlfSliceParam& alfSlicePara, ChannelType channel, uint8_t** ctuFlags ); + void setEnableFlag( AlfParam& alfSlicePara, ChannelType channel, bool val ); + void setEnableFlag( AlfParam& alfSlicePara, ChannelType channel, uint8_t** ctuFlags ); void setCtuEnableFlag( uint8_t** ctuFlags, ChannelType channel, uint8_t val ); void copyCtuEnableFlag( uint8_t** ctuFlagsDst, uint8_t** ctuFlagsSrc, ChannelType channel ); + void initCtuAlternativeChroma( uint8_t* ctuAlts[MAX_NUM_COMPONENT] ); + void setCtuAlternativeChroma( uint8_t* ctuAlts[MAX_NUM_COMPONENT], uint8_t val ); + void copyCtuAlternativeChroma( uint8_t* ctuAltsDst[MAX_NUM_COMPONENT], uint8_t* ctuAltsSrc[MAX_NUM_COMPONENT] ); + int getMaxNumAlternativesChroma( ); }; diff --git a/source/Lib/EncoderLib/EncCfg.h b/source/Lib/EncoderLib/EncCfg.h index 41d8a29e4af1b8f928b33957767b91d53da9c68e..f27bb3c1001942819569517a46d851507e6be885 100644 --- a/source/Lib/EncoderLib/EncCfg.h +++ b/source/Lib/EncoderLib/EncCfg.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -47,6 +47,10 @@ #include "CommonLib/Unit.h" +#if JVET_O0756_CALCULATE_HDRMETRICS +#include "HDRLib/inc/DistortionMetric.H" +#endif + struct GOPEntry { int m_POC; @@ -64,16 +68,15 @@ struct GOPEntry int m_betaOffsetDiv2; int m_temporalId; bool m_refPic; - int m_numRefPicsActive; int8_t m_sliceType; - int m_numRefPics; - int m_referencePics[MAX_NUM_REF_PICS]; - int m_usedByCurrPic[MAX_NUM_REF_PICS]; - int m_interRPSPrediction; - int m_deltaRPS; - int m_numRefIdc; - int m_refIdc[MAX_NUM_REF_PICS+1]; + int m_numRefPicsActive0; + int m_numRefPics0; + int m_deltaRefPics0[MAX_NUM_REF_PICS]; + int m_numRefPicsActive1; + int m_numRefPics1; + int m_deltaRefPics1[MAX_NUM_REF_PICS]; bool m_isEncoded; + bool m_ltrp_in_slice_header_flag; GOPEntry() : m_POC(-1) , m_QPOffset(0) @@ -90,21 +93,47 @@ struct GOPEntry , m_betaOffsetDiv2(0) , m_temporalId(0) , m_refPic(false) - , m_numRefPicsActive(0) , m_sliceType('P') - , m_numRefPics(0) - , m_interRPSPrediction(false) - , m_deltaRPS(0) - , m_numRefIdc(0) - , m_isEncoded(false) + , m_numRefPicsActive0(0) + , m_numRefPics0(0) + , m_numRefPicsActive1(0) + , m_numRefPics1(0) + , m_isEncoded(false) + , m_ltrp_in_slice_header_flag(false) { - ::memset( m_referencePics, 0, sizeof(m_referencePics) ); - ::memset( m_usedByCurrPic, 0, sizeof(m_usedByCurrPic) ); - ::memset( m_refIdc, 0, sizeof(m_refIdc) ); + ::memset(m_deltaRefPics0, 0, sizeof(m_deltaRefPics0)); + ::memset(m_deltaRefPics1, 0, sizeof(m_deltaRefPics1)); + } +}; + +struct RPLEntry +{ + int m_POC; + int m_temporalId; + bool m_refPic; + int m_numRefPicsActive; + int8_t m_sliceType; + int m_numRefPics; + int m_deltaRefPics[MAX_NUM_REF_PICS]; + bool m_isEncoded; + bool m_ltrp_in_slice_header_flag; + RPLEntry() + : m_POC(-1) + , m_temporalId(0) + , m_refPic(false) + , m_numRefPicsActive(0) + , m_sliceType('P') + , m_numRefPics(0) + , m_isEncoded(false) + , m_ltrp_in_slice_header_flag(false) + { + ::memset(m_deltaRefPics, 0, sizeof(m_deltaRefPics)); } }; std::istringstream &operator>>(std::istringstream &in, GOPEntry &entry); //input + + //! \ingroup EncoderLib //! \{ @@ -139,53 +168,69 @@ protected: uint32_t m_maxChromaFormatConstraintIdc; bool m_bFrameConstraintFlag; bool m_bNoQtbttDualTreeIntraConstraintFlag; + bool m_noPartitionConstraintsOverrideConstraintFlag; bool m_bNoSaoConstraintFlag; bool m_bNoAlfConstraintFlag; - bool m_bNoPcmConstraintFlag; bool m_bNoRefWraparoundConstraintFlag; bool m_bNoTemporalMvpConstraintFlag; bool m_bNoSbtmvpConstraintFlag; bool m_bNoAmvrConstraintFlag; bool m_bNoBdofConstraintFlag; + bool m_noDmvrConstraintFlag; bool m_bNoCclmConstraintFlag; bool m_bNoMtsConstraintFlag; + bool m_noSbtConstraintFlag; bool m_bNoAffineMotionConstraintFlag; - bool m_bNoGbiConstraintFlag; - bool m_bNoMhIntraConstraintFlag; + bool m_bNoBcwConstraintFlag; + bool m_noIbcConstraintFlag; + bool m_bNoCiipConstraintFlag; + bool m_noFPelMmvdConstraintFlag; bool m_bNoTriangleConstraintFlag; bool m_bNoLadfConstraintFlag; - bool m_bNoCurrPicRefConstraintFlag; + bool m_noTransformSkipConstraintFlag; + bool m_noBDPCMConstraintFlag; + bool m_noJointCbCrConstraintFlag; bool m_bNoQpDeltaConstraintFlag; bool m_bNoDepQuantConstraintFlag; bool m_bNoSignDataHidingConstraintFlag; + bool m_noTrailConstraintFlag; + bool m_noStsaConstraintFlag; + bool m_noRaslConstraintFlag; + bool m_noRadlConstraintFlag; + bool m_noIdrConstraintFlag; + bool m_noCraConstraintFlag; + bool m_noGdrConstraintFlag; + bool m_noApsConstraintFlag; /* profile & level */ Profile::Name m_profile; Level::Tier m_levelTier; Level::Name m_level; + std::vector<uint32_t> m_subProfile; + uint8_t m_numSubProfile; bool m_progressiveSourceFlag; bool m_interlacedSourceFlag; bool m_nonPackedConstraintFlag; bool m_frameOnlyConstraintFlag; - uint32_t m_bitDepthConstraintValue; - ChromaFormat m_chromaFormatConstraintValue; - bool m_intraConstraintFlag; - bool m_onePictureOnlyConstraintFlag; - bool m_lowerBitRateConstraintFlag; + bool m_intraConstraintFlag; //====== Coding Structure ======== - uint32_t m_uiIntraPeriod; // TODO: make this an int - it can be -1! + int m_uiIntraPeriod; // needs to be signed to allow '-1' for no intra period uint32_t m_uiDecodingRefreshType; ///< the type of decoding refresh employed for the random access. -#if JCTVC_Y0038_PARAMS bool m_rewriteParamSets; -#endif + bool m_idrRefParamList; int m_iGOPSize; + RPLEntry m_RPLList0[MAX_GOP]; + RPLEntry m_RPLList1[MAX_GOP]; + int m_numRPLList0; + int m_numRPLList1; GOPEntry m_GOPList[MAX_GOP]; - int m_extraRPSs; int m_maxDecPicBuffering[MAX_TLAYER]; int m_numReorderPics[MAX_TLAYER]; + int m_drapPeriod; int m_iQP; // if (AdaptiveQP == OFF) + ChromaQpMappingTableParams m_chromaQpMappingTableParams; #if X0038_LAMBDA_FROM_QP_CAPABILITY int m_intraQPOffset; ///< QP offset for intra slice (integer) int m_lambdaFromQPEnable; ///< enable lambda derivation from QP @@ -199,11 +244,23 @@ protected: int m_maxTempLayer; ///< Max temporal layer unsigned m_CTUSize; + bool m_subPicPresentFlag; + unsigned m_numSubPics; + uint32_t m_subPicCtuTopLeftX[MAX_NUM_SUB_PICS]; + uint32_t m_subPicCtuTopLeftY[MAX_NUM_SUB_PICS]; + uint32_t m_subPicWidth[MAX_NUM_SUB_PICS]; + uint32_t m_subPicHeight[MAX_NUM_SUB_PICS]; + uint32_t m_subPicTreatedAsPicFlag[MAX_NUM_SUB_PICS]; + uint32_t m_loopFilterAcrossSubpicEnabledFlag[MAX_NUM_SUB_PICS]; + bool m_subPicIdPresentFlag; + bool m_subPicIdSignallingPresentFlag; + unsigned m_subPicIdLen; + uint32_t m_subPicId[MAX_NUM_SUB_PICS]; bool m_useSplitConsOverride; unsigned m_uiMinQT[3]; //0: I slice; 1: P/B slice, 2: I slice chroma - unsigned m_uiMaxBTDepth; - unsigned m_uiMaxBTDepthI; - unsigned m_uiMaxBTDepthIChroma; + unsigned m_uiMaxMTTHierarchyDepth; + unsigned m_uiMaxMTTHierarchyDepthI; + unsigned m_uiMaxMTTHierarchyDepthIChroma; bool m_dualITree; unsigned m_maxCUWidth; unsigned m_maxCUHeight; @@ -211,21 +268,28 @@ protected: unsigned m_log2DiffMaxMinCodingBlockSize; int m_LMChroma; - bool m_cclmCollocatedChromaFlag; + bool m_horCollocatedChromaFlag; + bool m_verCollocatedChromaFlag; int m_IntraMTS; int m_InterMTS; - int m_IntraMTSMaxCand; - int m_InterMTSMaxCand; + int m_MTSIntraMaxCand; + int m_MTSInterMaxCand; int m_ImplicitMTS; bool m_SBT; ///< Sub-Block Transform for inter blocks + int m_SBTFast64WidthTh; ///< Enable size-64 SBT in encoder RDO check for HD and above sequences + + bool m_LFNST; + bool m_useFastLFNST; int m_SubPuMvpMode; bool m_Affine; bool m_AffineType; + bool m_PROF; bool m_BIO; + bool m_SMVD; bool m_compositeRefEnabled; //composite reference - bool m_GBi; - bool m_GBiFast; + bool m_bcw; + bool m_BcwFast; #if LUMA_ADAPTIVE_DEBLOCKING_FILTER_QP_OFFSET bool m_LadfEnabled; int m_LadfNumIntervals; @@ -233,13 +297,19 @@ protected: int m_LadfIntervalLowerBound[MAX_LADF_INTERVALS]; #endif - bool m_MHIntra; + bool m_ciip; bool m_Triangle; bool m_allowDisFracMMVD; bool m_AffineAmvr; bool m_HashME; bool m_AffineAmvrEncOpt; bool m_DMVR; + bool m_MMVD; + int m_MmvdDisNum; + bool m_rgbFormat; + bool m_useColorTrans; + unsigned m_PLTMode; + bool m_JointCbCrMode; unsigned m_IBCMode; unsigned m_IBCLocalSearchRangeX; unsigned m_IBCLocalSearchRangeY; @@ -252,10 +322,16 @@ protected: unsigned m_wrapAroundOffset; // ADD_NEW_TOOL : (encoder lib) add tool enabling flags and associated parameters here - bool m_lumaReshapeEnable; + bool m_loopFilterAcrossVirtualBoundariesDisabledFlag; + unsigned m_numVerVirtualBoundaries; + unsigned m_numHorVirtualBoundaries; + unsigned m_virtualBoundariesPosX[3]; + unsigned m_virtualBoundariesPosY[3]; + bool m_lmcsEnabled; unsigned m_reshapeSignalType; unsigned m_intraCMD; ReshapeCW m_reshapeCW; + int m_CSoffset; bool m_encDbOpt; bool m_useFastLCTU; bool m_useFastMrg; @@ -263,10 +339,14 @@ protected: bool m_useAMaxBT; bool m_e0023FastEnc; bool m_contentBasedFastQtbt; - -#if MAX_TB_SIZE_SIGNALLING + bool m_useNonLinearAlfLuma; + bool m_useNonLinearAlfChroma; + unsigned m_maxNumAlfAlternativesChroma; + bool m_MRL; + bool m_MIP; + bool m_useFastMIP; + int m_fastLocalDualTreeMode; uint32_t m_log2MaxTbSize; -#endif //====== Loop/Deblock Filter ======== bool m_bLoopFilterDisable; @@ -285,9 +365,7 @@ protected: int m_maxNumOffsetsPerPic; bool m_saoCtuBoundary; -#if K0238_SAO_GREEDY_MERGE_ENCODING bool m_saoGreedyMergeEnc; -#endif //====== Motion search ======== bool m_bDisableIntraPUsInInterSlices; MESearchMethod m_motionEstimationSearchMethod; @@ -307,6 +385,8 @@ protected: int m_chromaCrQpOffset; // Chroma Cr Qp Offset (0:default) int m_chromaCbQpOffsetDualTree; // Chroma Cb QP Offset for dual tree int m_chromaCrQpOffsetDualTree; // Chroma Cr Qp Offset for dual tree + int m_chromaCbCrQpOffset; // QP Offset for the joint Cb-Cr mode + int m_chromaCbCrQpOffsetDualTree; // QP Offset for the joint Cb-Cr mode in dual tree #if ER_CHROMA_QP_WCG_PPS WCGChromaQPControl m_wcgChromaQpControl; ///< Wide-colour-gamut chroma QP control. #endif @@ -347,6 +427,8 @@ protected: uint32_t m_log2SaoOffsetScale[MAX_NUM_CHANNEL_TYPE]; bool m_useTransformSkip; bool m_useTransformSkipFast; + bool m_useChromaTS; + int m_useBDPCM; uint32_t m_log2MaxTransformSkipBlockSize; bool m_transformSkipRotationEnabledFlag; bool m_transformSkipContextEnabledFlag; @@ -359,122 +441,177 @@ protected: int* m_aidQP; uint32_t m_uiDeltaQpRD; bool m_bFastDeltaQP; + bool m_ISP; bool m_useFastISP; - bool m_bUseConstrainedIntraPred; bool m_bFastUDIUseMPMEnabled; bool m_bFastMEForGenBLowDelayEnabled; bool m_bUseBLambdaForNonKeyLowDelayPictures; - bool m_usePCM; - int m_PCMBitDepth[MAX_NUM_CHANNEL_TYPE]; - uint32_t m_pcmLog2MaxSize; - uint32_t m_uiPCMLog2MinSize; - //====== Slice ======== - SliceConstraint m_sliceMode; - int m_sliceArgument; - //====== Dependent Slice ======== - SliceConstraint m_sliceSegmentMode; - int m_sliceSegmentArgument; - bool m_bLFCrossSliceBoundaryFlag; - - bool m_bPCMInputBitDepthFlag; - bool m_bPCMFilterDisableFlag; - bool m_intraSmoothingDisabledFlag; -#if HEVC_TILES_WPP - bool m_loopFilterAcrossTilesEnabledFlag; - bool m_tileUniformSpacingFlag; - int m_iNumColumnsMinus1; - int m_iNumRowsMinus1; - std::vector<int> m_tileColumnWidth; - std::vector<int> m_tileRowHeight; + bool m_gopBasedTemporalFilterEnabled; + bool m_noPicPartitionFlag; ///< no picture partitioning flag (single tile, single slice) + std::vector<uint32_t> m_tileColumnWidth; ///< tile column widths in units of CTUs (last column width will be repeated uniformly to cover any remaining picture width) + std::vector<uint32_t> m_tileRowHeight; ///< tile row heights in units of CTUs (last row height will be repeated uniformly to cover any remaining picture height) + bool m_rectSliceFlag; ///< indicates if using rectangular or raster-scan slices + uint32_t m_numSlicesInPic; ///< number of rectangular slices in the picture (raster-scan slice specified at slice level) + bool m_tileIdxDeltaPresentFlag; ///< rectangular slice tile index delta present flag + std::vector<RectSlice> m_rectSlices; ///< list of rectanglar slice syntax parameters + std::vector<uint32_t> m_rasterSliceSize; ///< raster-scan slice sizes in units of tiles + bool m_bLFCrossTileBoundaryFlag; ///< 1: filter across tile boundaries 0: do not filter across tile boundaries + bool m_bLFCrossSliceBoundaryFlag; ///< 1: filter across slice boundaries 0: do not filter across slice boundaries + bool m_intraSmoothingDisabledFlag; + //====== Sub-picture and Slices ======== + bool m_singleSlicePerSubPicFlag; bool m_entropyCodingSyncEnabledFlag; -#endif + HashType m_decodedPictureHashSEIType; bool m_bufferingPeriodSEIEnabled; bool m_pictureTimingSEIEnabled; - bool m_recoveryPointSEIEnabled; - bool m_toneMappingInfoSEIEnabled; - int m_toneMapId; - bool m_toneMapCancelFlag; - bool m_toneMapPersistenceFlag; - int m_codedDataBitDepth; - int m_targetBitDepth; - int m_modelId; - int m_minValue; - int m_maxValue; - int m_sigmoidMidpoint; - int m_sigmoidWidth; - int m_numPivots; - int m_cameraIsoSpeedIdc; - int m_cameraIsoSpeedValue; - int m_exposureIndexIdc; - int m_exposureIndexValue; - bool m_exposureCompensationValueSignFlag; - int m_exposureCompensationValueNumerator; - int m_exposureCompensationValueDenomIdc; - int m_refScreenLuminanceWhite; - int m_extendedRangeWhiteLevel; - int m_nominalBlackLevelLumaCodeValue; - int m_nominalWhiteLevelLumaCodeValue; - int m_extendedWhiteLevelLumaCodeValue; - int* m_startOfCodedInterval; - int* m_codedPivotValue; - int* m_targetPivotValue; + bool m_frameFieldInfoSEIEnabled; + bool m_dependentRAPIndicationSEIEnabled; bool m_framePackingSEIEnabled; int m_framePackingSEIType; int m_framePackingSEIId; int m_framePackingSEIQuincunx; int m_framePackingSEIInterpretation; - bool m_segmentedRectFramePackingSEIEnabled; - bool m_segmentedRectFramePackingSEICancel; - int m_segmentedRectFramePackingSEIType; - bool m_segmentedRectFramePackingSEIPersistence; - int m_displayOrientationSEIAngle; - bool m_temporalLevel0IndexSEIEnabled; - bool m_gradualDecodingRefreshInfoEnabled; - int m_noDisplaySEITLayer; + bool m_bpDeltasGOPStructure; bool m_decodingUnitInfoSEIEnabled; +#if HEVC_SEI bool m_SOPDescriptionSEIEnabled; bool m_scalableNestingSEIEnabled; bool m_tmctsSEIEnabled; +#endif + bool m_erpSEIEnabled; + bool m_erpSEICancelFlag; + bool m_erpSEIPersistenceFlag; + bool m_erpSEIGuardBandFlag; + uint32_t m_erpSEIGuardBandType; + uint32_t m_erpSEILeftGuardBandWidth; + uint32_t m_erpSEIRightGuardBandWidth; + bool m_sphereRotationSEIEnabled; + bool m_sphereRotationSEICancelFlag; + bool m_sphereRotationSEIPersistenceFlag; + int m_sphereRotationSEIYaw; + int m_sphereRotationSEIPitch; + int m_sphereRotationSEIRoll; + bool m_omniViewportSEIEnabled; + uint32_t m_omniViewportSEIId; + bool m_omniViewportSEICancelFlag; + bool m_omniViewportSEIPersistenceFlag; + uint32_t m_omniViewportSEICntMinus1; + std::vector<int> m_omniViewportSEIAzimuthCentre; + std::vector<int> m_omniViewportSEIElevationCentre; + std::vector<int> m_omniViewportSEITiltCentre; + std::vector<uint32_t> m_omniViewportSEIHorRange; + std::vector<uint32_t> m_omniViewportSEIVerRange; + bool m_rwpSEIEnabled; + bool m_rwpSEIRwpCancelFlag; + bool m_rwpSEIRwpPersistenceFlag; + bool m_rwpSEIConstituentPictureMatchingFlag; + int m_rwpSEINumPackedRegions; + int m_rwpSEIProjPictureWidth; + int m_rwpSEIProjPictureHeight; + int m_rwpSEIPackedPictureWidth; + int m_rwpSEIPackedPictureHeight; + std::vector<uint8_t> m_rwpSEIRwpTransformType; + std::vector<bool> m_rwpSEIRwpGuardBandFlag; + std::vector<uint32_t> m_rwpSEIProjRegionWidth; + std::vector<uint32_t> m_rwpSEIProjRegionHeight; + std::vector<uint32_t> m_rwpSEIRwpSEIProjRegionTop; + std::vector<uint32_t> m_rwpSEIProjRegionLeft; + std::vector<uint16_t> m_rwpSEIPackedRegionWidth; + std::vector<uint16_t> m_rwpSEIPackedRegionHeight; + std::vector<uint16_t> m_rwpSEIPackedRegionTop; + std::vector<uint16_t> m_rwpSEIPackedRegionLeft; + std::vector<uint8_t> m_rwpSEIRwpLeftGuardBandWidth; + std::vector<uint8_t> m_rwpSEIRwpRightGuardBandWidth; + std::vector<uint8_t> m_rwpSEIRwpTopGuardBandHeight; + std::vector<uint8_t> m_rwpSEIRwpBottomGuardBandHeight; + std::vector<bool> m_rwpSEIRwpGuardBandNotUsedForPredFlag; + std::vector<uint8_t> m_rwpSEIRwpGuardBandType; + bool m_gcmpSEIEnabled; + bool m_gcmpSEICancelFlag; + bool m_gcmpSEIPersistenceFlag; + uint8_t m_gcmpSEIPackingType; + uint8_t m_gcmpSEIMappingFunctionType; + std::vector<uint8_t> m_gcmpSEIFaceIndex; + std::vector<uint8_t> m_gcmpSEIFaceRotation; + std::vector<double> m_gcmpSEIFunctionCoeffU; + std::vector<bool> m_gcmpSEIFunctionUAffectedByVFlag; + std::vector<double> m_gcmpSEIFunctionCoeffV; + std::vector<bool> m_gcmpSEIFunctionVAffectedByUFlag; + bool m_gcmpSEIGuardBandFlag; + bool m_gcmpSEIGuardBandBoundaryType; + uint8_t m_gcmpSEIGuardBandSamplesMinus1; + bool m_subpicureLevelInfoSEIEnabled; + bool m_sampleAspectRatioInfoSEIEnabled; + bool m_sariCancelFlag; + bool m_sariPersistenceFlag; + int m_sariAspectRatioIdc; + int m_sariSarWidth; + int m_sariSarHeight; bool m_MCTSEncConstraint; - bool m_timeCodeSEIEnabled; - int m_timeCodeSEINumTs; - SEITimeSet m_timeSetArray[MAX_TIMECODE_SEI_SETS]; - bool m_kneeSEIEnabled; - int m_kneeSEIId; - bool m_kneeSEICancelFlag; - bool m_kneeSEIPersistenceFlag; - int m_kneeSEIInputDrange; - int m_kneeSEIInputDispLuminance; - int m_kneeSEIOutputDrange; - int m_kneeSEIOutputDispLuminance; - int m_kneeSEINumKneePointsMinus1; - int* m_kneeSEIInputKneePoint; - int* m_kneeSEIOutputKneePoint; - std::string m_colourRemapSEIFileRoot; ///< SEI Colour Remapping File (initialized from external file) SEIMasteringDisplay m_masteringDisplay; #if U0033_ALTERNATIVE_TRANSFER_CHARACTERISTICS_SEI bool m_alternativeTransferCharacteristicsSEIEnabled; uint8_t m_preferredTransferCharacteristics; #endif - bool m_greenMetadataInfoSEIEnabled; - uint8_t m_greenMetadataType; - uint8_t m_xsdMetricType; + // film grain characterstics sei + bool m_fgcSEIEnabled; + bool m_fgcSEICancelFlag; + bool m_fgcSEIPersistenceFlag; + uint8_t m_fgcSEIModelID; + bool m_fgcSEISepColourDescPresentFlag; + uint8_t m_fgcSEIBlendingModeID; + uint8_t m_fgcSEILog2ScaleFactor; + bool m_fgcSEICompModelPresent[MAX_NUM_COMPONENT]; +// cll SEI + bool m_cllSEIEnabled; + uint16_t m_cllSEIMaxContentLevel; + uint16_t m_cllSEIMaxPicAvgLevel; +// ave sei + bool m_aveSEIEnabled; + uint32_t m_aveSEIAmbientIlluminance; + uint16_t m_aveSEIAmbientLightX; + uint16_t m_aveSEIAmbientLightY; +// ccv sei + bool m_ccvSEIEnabled; + bool m_ccvSEICancelFlag; + bool m_ccvSEIPersistenceFlag; + bool m_ccvSEIPrimariesPresentFlag; + bool m_ccvSEIMinLuminanceValuePresentFlag; + bool m_ccvSEIMaxLuminanceValuePresentFlag; + bool m_ccvSEIAvgLuminanceValuePresentFlag; + double m_ccvSEIPrimariesX[MAX_NUM_COMPONENT]; + double m_ccvSEIPrimariesY[MAX_NUM_COMPONENT]; + double m_ccvSEIMinLuminanceValue; + double m_ccvSEIMaxLuminanceValue; + double m_ccvSEIAvgLuminanceValue; //====== Weighted Prediction ======== bool m_useWeightedPred; //< Use of Weighting Prediction (P_SLICE) bool m_useWeightedBiPred; //< Use of Bi-directional Weighting Prediction (B_SLICE) WeightedPredictionMethod m_weightedPredictionMethod; - uint32_t m_log2ParallelMergeLevelMinus2; ///< Parallel merge estimation region uint32_t m_maxNumMergeCand; ///< Maximum number of merge candidates uint32_t m_maxNumAffineMergeCand; ///< Maximum number of affine merge candidates -#if HEVC_USE_SCALING_LISTS + uint32_t m_maxNumTriangleCand; + uint32_t m_maxNumIBCMergeCand; ///< Max number of IBC merge candidates ScalingListMode m_useScalingListId; ///< Using quantization matrix i.e. 0=off, 1=default, 2=file. std::string m_scalingListFileName; ///< quantization matrix file name -#endif + bool m_sliceLevelRpl; ///< code reference picture lists in slice headers rather than picture header + bool m_sliceLevelDblk; ///< code deblocking filter parameters in slice headers rather than picture header + bool m_sliceLevelSao; ///< code SAO parameters in slice headers rather than picture header + bool m_sliceLevelAlf; ///< code ALF parameters in slice headers rather than picture header + bool m_disableScalingMatrixForLfnstBlks; int m_TMVPModeId; + bool m_constantSliceHeaderParamsEnabledFlag; + int m_PPSDepQuantEnabledIdc; + int m_PPSRefPicListSPSIdc0; + int m_PPSRefPicListSPSIdc1; + int m_PPSMvdL1ZeroIdc; + int m_PPSCollocatedFromL0Idc; + uint32_t m_PPSSixMinusMaxNumMergeCandPlus1; + uint32_t m_PPSMaxNumMergeCandMinusMaxNumTriangleCandPlus1; bool m_DepQuantEnabledFlag; bool m_SignDataHidingEnabledFlag; bool m_RCEnableRateControl; @@ -489,29 +626,26 @@ protected: uint32_t m_RCCpbSize; double m_RCInitialCpbFullness; #endif - bool m_TransquantBypassEnabledFlag; ///< transquant_bypass_enabled_flag setting in PPS. - bool m_CUTransquantBypassFlagForce; ///< if transquant_bypass_enabled_flag, then, if true, all CU transquant bypass flags will be set to true. - CostMode m_costMode; ///< The cost function to use, primarily when considering lossless coding. -#if HEVC_VPS VPS m_cVPS; -#endif + DPS m_dps; + bool m_decodingParameterSetEnabled; ///< enable decoding parameter set bool m_recalculateQPAccordingToLambda; ///< recalculate QP value according to the lambda value +#if HEVC_SEI int m_activeParameterSetsSEIEnabled; ///< enable active parameter set SEI message +#endif + bool m_hrdParametersPresentFlag; ///< enable generation of HRD parameters bool m_vuiParametersPresentFlag; ///< enable generation of VUI parameters bool m_aspectRatioInfoPresentFlag; ///< Signals whether aspect_ratio_idc is present +#if HEVC_SEI bool m_chromaResamplingFilterHintEnabled; ///< Signals whether chroma sampling filter hint data is present int m_chromaResamplingHorFilterIdc; ///< Specifies the Index of filter to use int m_chromaResamplingVerFilterIdc; ///< Specifies the Index of filter to use +#endif int m_aspectRatioIdc; ///< aspect_ratio_idc int m_sarWidth; ///< horizontal size of the sample aspect ratio int m_sarHeight; ///< vertical size of the sample aspect ratio - bool m_overscanInfoPresentFlag; ///< Signals whether overscan_appropriate_flag is present - bool m_overscanAppropriateFlag; ///< Indicates whether conformant decoded pictures are suitable for display using overscan - bool m_videoSignalTypePresentFlag; ///< Signals whether video_format, video_full_range_flag, and colour_description_present_flag are present - int m_videoFormat; ///< Indicates representation of pictures - bool m_videoFullRangeFlag; ///< Indicates the black level and range of luma and chroma signals bool m_colourDescriptionPresentFlag; ///< Signals whether colour_primaries, transfer_characteristics and matrix_coefficients are present int m_colourPrimaries; ///< Indicates chromaticity coordinates of the source primaries int m_transferCharacteristics; ///< Indicates the opto-electronic transfer characteristics of the source @@ -519,25 +653,11 @@ protected: bool m_chromaLocInfoPresentFlag; ///< Signals whether chroma_sample_loc_type_top_field and chroma_sample_loc_type_bottom_field are present int m_chromaSampleLocTypeTopField; ///< Specifies the location of chroma samples for top field int m_chromaSampleLocTypeBottomField; ///< Specifies the location of chroma samples for bottom field - bool m_neutralChromaIndicationFlag; ///< Indicates that the value of all decoded chroma samples is equal to 1<<(BitDepthCr-1) - Window m_defaultDisplayWindow; ///< Represents the default display window parameters - bool m_frameFieldInfoPresentFlag; ///< Indicates that pic_struct and other field coding related values are present in picture timing SEI messages - bool m_pocProportionalToTimingFlag; ///< Indicates that the POC value is proportional to the output time w.r.t. first picture in CVS - int m_numTicksPocDiffOneMinus1; ///< Number of ticks minus 1 that for a POC difference of one - bool m_bitstreamRestrictionFlag; ///< Signals whether bitstream restriction parameters are present -#if HEVC_TILES_WPP - bool m_tilesFixedStructureFlag; ///< Indicates that each active picture parameter set has the same values of the syntax elements related to tiles -#endif - bool m_motionVectorsOverPicBoundariesFlag; ///< Indicates that no samples outside the picture boundaries are used for inter prediction - int m_minSpatialSegmentationIdc; ///< Indicates the maximum size of the spatial segments in the pictures in the coded video sequence - int m_maxBytesPerPicDenom; ///< Indicates a number of bytes not exceeded by the sum of the sizes of the VCL NAL units associated with any coded picture - int m_maxBitsPerMinCuDenom; ///< Indicates an upper bound for the number of bits of coding_unit() data - int m_log2MaxMvLengthHorizontal; ///< Indicate the maximum absolute value of a decoded horizontal MV component in quarter-pel luma units - int m_log2MaxMvLengthVertical; ///< Indicate the maximum absolute value of a decoded vertical MV component in quarter-pel luma units - -#if HEVC_USE_INTRA_SMOOTHING_T32 || HEVC_USE_INTRA_SMOOTHING_T64 - bool m_useStrongIntraSmoothing; ///< enable the use of strong intra smoothing (bi_linear interpolation) for 32x32 blocks when reference samples are flat. -#endif + int m_chromaSampleLocType; ///< Specifies the location of chroma samples for progressive content + bool m_overscanInfoPresentFlag; ///< Signals whether overscan_appropriate_flag is present + bool m_overscanAppropriateFlag; ///< Indicates whether conformant decoded pictures are suitable for display using overscan + bool m_videoFullRangeFlag; ///< Indicates the black level and range of luma and chroma signals + bool m_bEfficientFieldIRAPEnabled; ///< enable to code fields in a specific, potentially more efficient, order. bool m_bHarmonizeGopFirstFieldCoupleEnabled; @@ -561,23 +681,32 @@ protected: int m_numSplitThreads; bool m_forceSingleSplitThread; #endif -#if ENABLE_WPP_PARALLELISM - int m_numWppThreads; - int m_numWppExtraLines; - bool m_ensureWppBitEqual; -#endif bool m_alf; ///< Adaptive Loop Filter +#if JVET_O0756_CALCULATE_HDRMETRICS + double m_whitePointDeltaE[hdrtoolslib::NB_REF_WHITE]; + double m_maxSampleValue; + hdrtoolslib::SampleRange m_sampleRange; + hdrtoolslib::ColorPrimaries m_colorPrimaries; + bool m_enableTFunctionLUT; + hdrtoolslib::ChromaLocation m_chromaLocation[2]; + int m_chromaUPFilter; + int m_cropOffsetLeft; + int m_cropOffsetTop; + int m_cropOffsetRight; + int m_cropOffsetBottom; + bool m_calculateHdrMetrics; +#endif + double m_scalingRatioHor; + double m_scalingRatioVer; + bool m_rprEnabled; + int m_switchPocPeriod; + int m_upscaledOutput; + int m_numRefLayers[MAX_VPS_LAYERS]; public: EncCfg() - #if HEVC_TILES_WPP - : m_tileColumnWidth() - , m_tileRowHeight() -#endif { - m_PCMBitDepth[CHANNEL_TYPE_LUMA]=8; - m_PCMBitDepth[CHANNEL_TYPE_CHROMA]=8; } virtual ~EncCfg() @@ -585,7 +714,8 @@ public: void setProfile(Profile::Name profile) { m_profile = profile; } void setLevel(Level::Tier tier, Level::Name level) { m_levelTier = tier; m_level = level; } - + void setNumSubProfile( uint8_t numSubProfile) { m_numSubProfile = numSubProfile; m_subProfile.resize(m_numSubProfile); } + void setSubProfile( int i, uint32_t subProfile) { m_subProfile[i] = subProfile; } bool getIntraOnlyConstraintFlag() const { return m_bIntraOnlyConstraintFlag; } void setIntraOnlyConstraintFlag(bool bVal) { m_bIntraOnlyConstraintFlag = bVal; } uint32_t getMaxBitDepthConstraintIdc() const { return m_maxBitDepthConstraintIdc; } @@ -596,12 +726,12 @@ public: void setFrameConstraintFlag(bool bVal) { m_bFrameConstraintFlag = bVal; } bool getNoQtbttDualTreeIntraConstraintFlag() const { return m_bNoQtbttDualTreeIntraConstraintFlag; } void setNoQtbttDualTreeIntraConstraintFlag(bool bVal) { m_bNoQtbttDualTreeIntraConstraintFlag = bVal; } + bool getNoPartitionConstraintsOverrideConstraintFlag() const { return m_noPartitionConstraintsOverrideConstraintFlag; } + void setNoPartitionConstraintsOverrideConstraintFlag(bool bVal) { m_noPartitionConstraintsOverrideConstraintFlag = bVal; } bool getNoSaoConstraintFlag() const { return m_bNoSaoConstraintFlag; } void setNoSaoConstraintFlag(bool bVal) { m_bNoSaoConstraintFlag = bVal; } bool getNoAlfConstraintFlag() const { return m_bNoAlfConstraintFlag; } void setNoAlfConstraintFlag(bool bVal) { m_bNoAlfConstraintFlag = bVal; } - bool getNoPcmConstraintFlag() const { return m_bNoPcmConstraintFlag; } - void setNoPcmConstraintFlag(bool bVal) { m_bNoPcmConstraintFlag = bVal; } bool getNoRefWraparoundConstraintFlag() const { return m_bNoRefWraparoundConstraintFlag; } void setNoRefWraparoundConstraintFlag(bool bVal) { m_bNoRefWraparoundConstraintFlag = bVal; } bool getNoTemporalMvpConstraintFlag() const { return m_bNoTemporalMvpConstraintFlag; } @@ -612,28 +742,57 @@ public: void setNoAmvrConstraintFlag(bool bVal) { m_bNoAmvrConstraintFlag = bVal; } bool getNoBdofConstraintFlag() const { return m_bNoBdofConstraintFlag; } void setNoBdofConstraintFlag(bool bVal) { m_bNoBdofConstraintFlag = bVal; } + bool getNoDmvrConstraintFlag() const { return m_noDmvrConstraintFlag; } + void setNoDmvrConstraintFlag(bool bVal) { m_noDmvrConstraintFlag = bVal; } bool getNoCclmConstraintFlag() const { return m_bNoCclmConstraintFlag; } void setNoCclmConstraintFlag(bool bVal) { m_bNoCclmConstraintFlag = bVal; } bool getNoMtsConstraintFlag() const { return m_bNoMtsConstraintFlag; } void setNoMtsConstraintFlag(bool bVal) { m_bNoMtsConstraintFlag = bVal; } + bool getNoSbtConstraintFlag() const { return m_noSbtConstraintFlag; } + void setNoSbtConstraintFlag(bool bVal) { m_noSbtConstraintFlag = bVal; } bool getNoAffineMotionConstraintFlag() const { return m_bNoAffineMotionConstraintFlag; } void setNoAffineMotionConstraintFlag(bool bVal) { m_bNoAffineMotionConstraintFlag = bVal; } - bool getNoGbiConstraintFlag() const { return m_bNoGbiConstraintFlag; } - void setNoGbiConstraintFlag(bool bVal) { m_bNoGbiConstraintFlag = bVal; } - bool getNoMhIntraConstraintFlag() const { return m_bNoMhIntraConstraintFlag; } - void setNoMhIntraConstraintFlag(bool bVal) { m_bNoMhIntraConstraintFlag = bVal; } + bool getNoBcwConstraintFlag() const { return m_bNoBcwConstraintFlag; } + void setNoBcwConstraintFlag(bool bVal) { m_bNoBcwConstraintFlag = bVal; } + bool getNoIbcConstraintFlag() const { return m_noIbcConstraintFlag; } + void setNoIbcConstraintFlag(bool bVal) { m_noIbcConstraintFlag = bVal; } + bool getNoCiipConstraintFlag() const { return m_bNoCiipConstraintFlag; } + void setNoCiipConstraintFlag(bool bVal) { m_bNoCiipConstraintFlag = bVal; } + bool getNoFPelMmvdConstraintFlag() const { return m_noFPelMmvdConstraintFlag; } + void setNoFPelMmvdConstraintFlag(bool bVal) { m_noFPelMmvdConstraintFlag = bVal; } bool getNoTriangleConstraintFlag() const { return m_bNoTriangleConstraintFlag; } void setNoTriangleConstraintFlag(bool bVal) { m_bNoTriangleConstraintFlag = bVal; } bool getNoLadfConstraintFlag() const { return m_bNoLadfConstraintFlag; } void setNoLadfConstraintFlag(bool bVal) { m_bNoLadfConstraintFlag = bVal; } - bool getNoCurrPicRefConstraintFlag() const { return m_bNoCurrPicRefConstraintFlag; } - void setNoCurrPicRefConstraintFlag(bool bVal) { m_bNoCurrPicRefConstraintFlag = bVal; } + bool getNoTransformSkipConstraintFlag() const { return m_noTransformSkipConstraintFlag; } + void setNoTransformSkipConstraintFlag(bool bVal) { m_noTransformSkipConstraintFlag = bVal; } + bool getNoBDPCMConstraintFlag() const { return m_noBDPCMConstraintFlag; } + void setNoBDPCMConstraintFlag(bool bVal) { m_noBDPCMConstraintFlag = bVal; } + bool getNoJointCbCrConstraintFlag() const { return m_noJointCbCrConstraintFlag; } + void setNoJointCbCrConstraintFlag(bool bVal) { m_noJointCbCrConstraintFlag = bVal; } bool getNoQpDeltaConstraintFlag() const { return m_bNoQpDeltaConstraintFlag; } void setNoQpDeltaConstraintFlag(bool bVal) { m_bNoQpDeltaConstraintFlag = bVal; } bool getNoDepQuantConstraintFlag() const { return m_bNoDepQuantConstraintFlag; } void setNoDepQuantConstraintFlag(bool bVal) { m_bNoDepQuantConstraintFlag = bVal; } bool getNoSignDataHidingConstraintFlag() const { return m_bNoSignDataHidingConstraintFlag; } void setNoSignDataHidingConstraintFlag(bool bVal) { m_bNoSignDataHidingConstraintFlag = bVal; } + bool getNoTrailConstraintFlag() const { return m_noTrailConstraintFlag; } + void setNoTrailConstraintFlag(bool bVal) { m_noTrailConstraintFlag = bVal; } + bool getNoStsaConstraintFlag() const { return m_noStsaConstraintFlag; } + void setNoStsaConstraintFlag(bool bVal) { m_noStsaConstraintFlag = bVal; } + bool getNoRaslConstraintFlag() const { return m_noRaslConstraintFlag; } + void setNoRaslConstraintFlag(bool bVal) { m_noRaslConstraintFlag = bVal; } + bool getNoRadlConstraintFlag() const { return m_noRadlConstraintFlag; } + void setNoRadlConstraintFlag(bool bVal) { m_noRadlConstraintFlag = bVal; } + bool getNoIdrConstraintFlag() const { return m_noIdrConstraintFlag; } + void setNoIdrConstraintFlag(bool bVal) { m_noIdrConstraintFlag = bVal; } + bool getNoCraConstraintFlag() const { return m_noCraConstraintFlag; } + void setNoCraConstraintFlag(bool bVal) { m_noCraConstraintFlag = bVal; } + bool getNoGdrConstraintFlag() const { return m_noGdrConstraintFlag; } + void setNoGdrConstraintFlag(bool bVal) { m_noGdrConstraintFlag = bVal; } + bool getNoApsConstraintFlag() const { return m_noApsConstraintFlag; } + void setNoApsConstraintFlag(bool bVal) { m_noApsConstraintFlag = bVal; } + void setFrameRate ( int i ) { m_iFrameRate = i; } void setFrameSkip ( uint32_t i ) { m_FrameSkip = i; } @@ -662,24 +821,46 @@ public: void setCabacZeroWordPaddingEnabled(bool value) { m_cabacZeroWordPaddingEnabled = value; } //====== Coding Structure ======== - void setIntraPeriod ( int i ) { m_uiIntraPeriod = (uint32_t)i; } + void setIntraPeriod (int i) { m_uiIntraPeriod = i; } void setDecodingRefreshType ( int i ) { m_uiDecodingRefreshType = (uint32_t)i; } -#if JCTVC_Y0038_PARAMS void setReWriteParamSets ( bool b ) { m_rewriteParamSets = b; } -#endif + void setIDRRefParamListPresent ( bool b ) { m_idrRefParamList = b; } + bool getIDRRefParamListPresent () const { return m_idrRefParamList; } void setGOPSize ( int i ) { m_iGOPSize = i; } - void setGopList ( const GOPEntry GOPList[MAX_GOP] ) { for ( int i = 0; i < MAX_GOP; i++ ) m_GOPList[i] = GOPList[i]; } - void setExtraRPSs ( int i ) { m_extraRPSs = i; } + void setGopList(const GOPEntry GOPList[MAX_GOP]) { for (int i = 0; i < MAX_GOP; i++) m_GOPList[i] = GOPList[i]; } const GOPEntry &getGOPEntry ( int i ) const { return m_GOPList[i]; } - void setEncodedFlag ( int i, bool value ) { m_GOPList[i].m_isEncoded = value; } + void setRPLList0(const RPLEntry RPLList[MAX_GOP]) + { + m_numRPLList0 = 0; + for (int i = 0; i < MAX_GOP; i++) + { + m_RPLList0[i] = RPLList[i]; + if (m_RPLList0[i].m_POC != -1) m_numRPLList0++; + } + } + void setRPLList1(const RPLEntry RPLList[MAX_GOP]) + { + m_numRPLList1 = 0; + for (int i = 0; i < MAX_GOP; i++) + { + m_RPLList1[i] = RPLList[i]; + if (m_RPLList1[i].m_POC != -1) m_numRPLList1++; + } + } + const RPLEntry &getRPLEntry(int L01, int idx) const { return (L01 == 0) ? m_RPLList0[idx] : m_RPLList1[idx]; } + int getRPLCandidateSize(int L01) const { return (L01 == 0) ? m_numRPLList0 : m_numRPLList1; } + void setEncodedFlag(uint32_t i, bool value) { m_RPLList0[i].m_isEncoded = value; m_RPLList1[i].m_isEncoded = value; m_GOPList[i].m_isEncoded = value; } void setMaxDecPicBuffering ( uint32_t u, uint32_t tlayer ) { m_maxDecPicBuffering[tlayer] = u; } void setNumReorderPics ( int i, uint32_t tlayer ) { m_numReorderPics[tlayer] = i; } + void setDrapPeriod (int drapPeriod) { m_drapPeriod = drapPeriod; } void setBaseQP ( int i ) { m_iQP = i; } #if X0038_LAMBDA_FROM_QP_CAPABILITY void setIntraQPOffset ( int i ) { m_intraQPOffset = i; } void setLambdaFromQPEnable ( bool b ) { m_lambdaFromQPEnable = b; } #endif + void setChromaQpMappingTableParams (const ChromaQpMappingTableParams ¶ms) { m_chromaQpMappingTableParams = params; } + void setPad ( int* iPad ) { for ( int i = 0; i < 2; i++ ) m_aiPad[i] = iPad[i]; } int getMaxRefPicNum () { return m_iMaxRefPicNum; } @@ -690,21 +871,52 @@ public: void setCTUSize ( unsigned u ) { m_CTUSize = u; } void setMinQTSizes ( unsigned* minQT) { m_uiMinQT[0] = minQT[0]; m_uiMinQT[1] = minQT[1]; m_uiMinQT[2] = minQT[2]; } - void setMaxBTDepth ( unsigned uiMaxBTDepth, unsigned uiMaxBTDepthI, unsigned uiMaxBTDepthIChroma ) - { m_uiMaxBTDepth = uiMaxBTDepth; m_uiMaxBTDepthI = uiMaxBTDepthI; m_uiMaxBTDepthIChroma = uiMaxBTDepthIChroma; } - unsigned getMaxBTDepth () const { return m_uiMaxBTDepth; } - unsigned getMaxBTDepthI () const { return m_uiMaxBTDepthI; } - unsigned getMaxBTDepthIChroma () const { return m_uiMaxBTDepthIChroma; } + void setMaxMTTHierarchyDepth ( unsigned uiMaxMTTHierarchyDepth, unsigned uiMaxMTTHierarchyDepthI, unsigned uiMaxMTTHierarchyDepthIChroma ) + { m_uiMaxMTTHierarchyDepth = uiMaxMTTHierarchyDepth; m_uiMaxMTTHierarchyDepthI = uiMaxMTTHierarchyDepthI; m_uiMaxMTTHierarchyDepthIChroma = uiMaxMTTHierarchyDepthIChroma; } + unsigned getMaxMTTHierarchyDepth () const { return m_uiMaxMTTHierarchyDepth; } + unsigned getMaxMTTHierarchyDepthI () const { return m_uiMaxMTTHierarchyDepthI; } + unsigned getMaxMTTHierarchyDepthIChroma () const { return m_uiMaxMTTHierarchyDepthIChroma; } int getCTUSize () const { return m_CTUSize; } void setUseSplitConsOverride (bool n) { m_useSplitConsOverride = n; } bool getUseSplitConsOverride () const { return m_useSplitConsOverride; } void setDualITree ( bool b ) { m_dualITree = b; } bool getDualITree () const { return m_dualITree; } + void setSubPicPresentFlag (bool b) { m_subPicPresentFlag = b; } + void setNumSubPics (uint32_t u) { m_numSubPics = u; } + void setSubPicCtuTopLeftX (uint32_t u, int i) { m_subPicCtuTopLeftX[i] = u; } + void setSubPicCtuTopLeftY (uint32_t u, int i) { m_subPicCtuTopLeftY[i] = u; } + void setSubPicWidth (uint32_t u, int i) { m_subPicWidth[i] = u; } + void setSubPicHeight (uint32_t u, int i) { m_subPicHeight[i] = u; } + void setSubPicTreatedAsPicFlag (bool b, int i) { m_subPicTreatedAsPicFlag[i] = b; } + void setLoopFilterAcrossSubpicEnabledFlag (uint32_t u, int i) { m_loopFilterAcrossSubpicEnabledFlag[i] = u; } + void setSubPicIdPresentFlag (bool b) { m_subPicIdPresentFlag = b; } + void setSubPicIdSignallingPresentFlag (bool b) { m_subPicIdSignallingPresentFlag = b; } + void setSubPicIdLen (uint32_t u) { m_subPicIdLen = u; } + void setSubPicId (uint32_t b, int i) { m_subPicId[i] = b; } + + bool getSubPicPresentFlag () { return m_subPicPresentFlag; } + uint32_t getNumSubPics () { return m_numSubPics; } + uint32_t getSubPicCtuTopLeftX (int i) { return m_subPicCtuTopLeftX[i]; } + uint32_t getSubPicCtuTopLeftY (int i) { return m_subPicCtuTopLeftY[i]; } + uint32_t getSubPicWidth (int i) { return m_subPicWidth[i]; } + uint32_t getSubPicHeight (int i) { return m_subPicHeight[i]; } + bool getSubPicTreatedAsPicFlag (int i) { return m_subPicTreatedAsPicFlag[i]; } + uint32_t getLoopFilterAcrossSubpicEnabledFlag (int i) { return m_loopFilterAcrossSubpicEnabledFlag[i]; } + bool getSubPicIdPresentFlag () { return m_subPicIdPresentFlag; } + bool getSubPicIdSignallingPresentFlag () { return m_subPicIdSignallingPresentFlag; } + uint32_t getSubPicIdLen () { return m_subPicIdLen; } + uint32_t getSubPicId (int i) { return m_subPicId[i]; } + void setLFNST ( bool b ) { m_LFNST = b; } + bool getLFNST() const { return m_LFNST; } + void setUseFastLFNST ( bool b ) { m_useFastLFNST = b; } + bool getUseFastLFNST() const { return m_useFastLFNST; } void setUseLMChroma ( int n ) { m_LMChroma = n; } int getUseLMChroma() const { return m_LMChroma; } - void setCclmCollocatedChromaFlag ( bool b ) { m_cclmCollocatedChromaFlag = b; } - bool getCclmCollocatedChromaFlag () const { return m_cclmCollocatedChromaFlag; } + void setHorCollocatedChromaFlag( bool b ) { m_horCollocatedChromaFlag = b; } + bool getHorCollocatedChromaFlag() const { return m_horCollocatedChromaFlag; } + void setVerCollocatedChromaFlag( bool b ) { m_verCollocatedChromaFlag = b; } + bool getVerCollocatedChromaFlag() const { return m_verCollocatedChromaFlag; } void setSubPuMvpMode(int n) { m_SubPuMvpMode = n; } bool getSubPuMvpMode() const { return m_SubPuMvpMode; } @@ -713,13 +925,15 @@ public: bool getAffine () const { return m_Affine; } void setAffineType( bool b ) { m_AffineType = b; } bool getAffineType() const { return m_AffineType; } + void setPROF (bool b) { m_PROF = b; } + bool getPROF () const { return m_PROF; } void setBIO(bool b) { m_BIO = b; } bool getBIO() const { return m_BIO; } - void setIntraMTSMaxCand ( unsigned u ) { m_IntraMTSMaxCand = u; } - unsigned getIntraMTSMaxCand () const { return m_IntraMTSMaxCand; } - void setInterMTSMaxCand ( unsigned u ) { m_InterMTSMaxCand = u; } - unsigned getInterMTSMaxCand () const { return m_InterMTSMaxCand; } + void setMTSIntraMaxCand ( unsigned u ) { m_MTSIntraMaxCand = u; } + unsigned getMTSIntraMaxCand () const { return m_MTSIntraMaxCand; } + void setMTSInterMaxCand ( unsigned u ) { m_MTSInterMaxCand = u; } + unsigned getMTSInterMaxCand () const { return m_MTSInterMaxCand; } void setIntraMTS ( bool b ) { m_IntraMTS = b; } bool getIntraMTS () const { return m_IntraMTS; } void setInterMTS ( bool b ) { m_InterMTS = b; } @@ -729,12 +943,17 @@ public: void setUseSBT ( bool b ) { m_SBT = b; } bool getUseSBT () const { return m_SBT; } + void setSBTFast64WidthTh ( int b ) { m_SBTFast64WidthTh = b; } + int getSBTFast64WidthTh () const { return m_SBTFast64WidthTh; } + void setUseCompositeRef (bool b) { m_compositeRefEnabled = b; } bool getUseCompositeRef () const { return m_compositeRefEnabled; } - void setUseGBi ( bool b ) { m_GBi = b; } - bool getUseGBi () const { return m_GBi; } - void setUseGBiFast ( uint32_t b ) { m_GBiFast = b; } - bool getUseGBiFast () const { return m_GBiFast; } + void setUseSMVD ( bool b ) { m_SMVD = b; } + bool getUseSMVD () const { return m_SMVD; } + void setUseBcw ( bool b ) { m_bcw = b; } + bool getUseBcw () const { return m_bcw; } + void setUseBcwFast ( uint32_t b ) { m_BcwFast = b; } + bool getUseBcwFast () const { return m_BcwFast; } #if LUMA_ADAPTIVE_DEBLOCKING_FILTER_QP_OFFSET void setUseLadf ( bool b ) { m_LadfEnabled = b; } @@ -748,8 +967,8 @@ public: #endif - void setUseMHIntra ( bool b ) { m_MHIntra = b; } - bool getUseMHIntra () const { return m_MHIntra; } + void setUseCiip ( bool b ) { m_ciip = b; } + bool getUseCiip () const { return m_ciip; } void setUseTriangle ( bool b ) { m_Triangle = b; } bool getUseTriangle () const { return m_Triangle; } void setAllowDisFracMMVD ( bool b ) { m_allowDisFracMMVD = b; } @@ -762,7 +981,18 @@ public: bool getUseAffineAmvrEncOpt () const { return m_AffineAmvrEncOpt; } void setDMVR ( bool b ) { m_DMVR = b; } bool getDMVR () const { return m_DMVR; } - + void setMMVD (bool b) { m_MMVD = b; } + bool getMMVD () const { return m_MMVD; } + void setMmvdDisNum ( int b ) { m_MmvdDisNum = b; } + int getMmvdDisNum () const { return m_MmvdDisNum; } + void setRGBFormatFlag(bool value) { m_rgbFormat = value; } + bool getRGBFormatFlag() const { return m_rgbFormat; } + void setUseColorTrans(bool value) { m_useColorTrans = value; } + bool getUseColorTrans() const { return m_useColorTrans; } + void setPLTMode ( unsigned n) { m_PLTMode = n; } + unsigned getPLTMode () const { return m_PLTMode; } + void setJointCbCr ( bool b ) { m_JointCbCrMode = b; } + bool getJointCbCr () const { return m_JointCbCrMode; } void setIBCMode (unsigned n) { m_IBCMode = n; } unsigned getIBCMode () const { return m_IBCMode; } void setIBCLocalSearchRangeX (unsigned n) { m_IBCLocalSearchRangeX = n; } @@ -784,15 +1014,28 @@ public: unsigned getWrapAroundOffset () const { return m_wrapAroundOffset; } // ADD_NEW_TOOL : (encoder lib) add access functions here - - void setReshaper ( bool b ) { m_lumaReshapeEnable = b; } - bool getReshaper () const { return m_lumaReshapeEnable; } + void setLoopFilterAcrossVirtualBoundariesDisabledFlag( bool b ) { m_loopFilterAcrossVirtualBoundariesDisabledFlag = b; } + bool getLoopFilterAcrossVirtualBoundariesDisabledFlag() const { return m_loopFilterAcrossVirtualBoundariesDisabledFlag; } + void setNumVerVirtualBoundaries ( unsigned u ) { m_numVerVirtualBoundaries = u; } + unsigned getNumVerVirtualBoundaries () const { return m_numVerVirtualBoundaries; } + void setNumHorVirtualBoundaries ( unsigned u ) { m_numHorVirtualBoundaries = u; } + unsigned getNumHorVirtualBoundaries () const { return m_numHorVirtualBoundaries; } + void setVirtualBoundariesPosX ( unsigned u, unsigned idx ) { m_virtualBoundariesPosX[idx] = u; } + unsigned getVirtualBoundariesPosX ( unsigned idx ) const { return m_virtualBoundariesPosX[idx]; } + void setVirtualBoundariesPosY ( unsigned u, unsigned idx ) { m_virtualBoundariesPosY[idx] = u; } + unsigned getVirtualBoundariesPosY ( unsigned idx ) const { return m_virtualBoundariesPosY[idx]; } + void setUseISP ( bool b ) { m_ISP = b; } + bool getUseISP () const { return m_ISP; } + void setLmcs ( bool b ) { m_lmcsEnabled = b; } + bool getLmcs () const { return m_lmcsEnabled; } void setReshapeSignalType ( uint32_t signalType ) { m_reshapeSignalType = signalType; } uint32_t getReshapeSignalType () const { return m_reshapeSignalType; } void setReshapeIntraCMD (uint32_t intraCMD) { m_intraCMD = intraCMD; } uint32_t getReshapeIntraCMD () { return m_intraCMD; } void setReshapeCW (const ReshapeCW &reshapeCW) { m_reshapeCW = reshapeCW; } const ReshapeCW& getReshapeCW () { return m_reshapeCW; } + void setReshapeCSoffset (int CSoffset) { m_CSoffset = CSoffset; } + int getReshapeCSoffset () { return m_CSoffset; } void setMaxCUWidth ( uint32_t u ) { m_maxCUWidth = u; } uint32_t getMaxCUWidth () const { return m_maxCUWidth; } void setMaxCUHeight ( uint32_t u ) { m_maxCUHeight = u; } @@ -816,10 +1059,22 @@ public: bool getUseE0023FastEnc () const { return m_e0023FastEnc; } void setUseContentBasedFastQtbt ( bool b ) { m_contentBasedFastQtbt = b; } bool getUseContentBasedFastQtbt () const { return m_contentBasedFastQtbt; } + void setUseNonLinearAlfLuma ( bool b ) { m_useNonLinearAlfLuma = b; } + bool getUseNonLinearAlfLuma () const { return m_useNonLinearAlfLuma; } + void setUseNonLinearAlfChroma ( bool b ) { m_useNonLinearAlfChroma = b; } + bool getUseNonLinearAlfChroma () const { return m_useNonLinearAlfChroma; } + void setMaxNumAlfAlternativesChroma ( uint32_t u ) { m_maxNumAlfAlternativesChroma = u; } + uint32_t getMaxNumAlfAlternativesChroma () const { return m_maxNumAlfAlternativesChroma; } + void setUseMRL ( bool b ) { m_MRL = b; } + bool getUseMRL () const { return m_MRL; } + void setUseMIP ( bool b ) { m_MIP = b; } + bool getUseMIP () const { return m_MIP; } + void setUseFastMIP ( bool b ) { m_useFastMIP = b; } + bool getUseFastMIP () const { return m_useFastMIP; } + void setFastLocalDualTreeMode ( int i ) { m_fastLocalDualTreeMode = i; } + int getFastLocalDualTreeMode () const { return m_fastLocalDualTreeMode; } -#if MAX_TB_SIZE_SIGNALLING void setLog2MaxTbSize ( uint32_t u ) { m_log2MaxTbSize = u; } -#endif //====== Loop/Deblock Filter ======== void setLoopFilterDisable ( bool b ) { m_bLoopFilterDisable = b; } @@ -853,6 +1108,9 @@ public: void setChromaCrQpOffsetDualTree ( int i ) { m_chromaCrQpOffsetDualTree = i; } int getChromaCbQpOffsetDualTree () const { return m_chromaCbQpOffsetDualTree; } int getChromaCrQpOffsetDualTree () const { return m_chromaCrQpOffsetDualTree; } + void setChromaCbCrQpOffset ( int i ) { m_chromaCbCrQpOffset = i; } + void setChromaCbCrQpOffsetDualTree ( int i ) { m_chromaCbCrQpOffsetDualTree = i; } + int getChromaCbCrQpOffsetDualTree () const { return m_chromaCbCrQpOffsetDualTree; } #if ER_CHROMA_QP_WCG_PPS void setWCGChromaQpControl ( const WCGChromaQPControl &ctrl ) { m_wcgChromaQpControl = ctrl; } const WCGChromaQPControl &getWCGChromaQPControl () const { return m_wcgChromaQpControl; } @@ -907,12 +1165,11 @@ public: //==== Coding Structure ======== uint32_t getIntraPeriod () const { return m_uiIntraPeriod; } uint32_t getDecodingRefreshType () const { return m_uiDecodingRefreshType; } -#if JCTVC_Y0038_PARAMS bool getReWriteParamSets () const { return m_rewriteParamSets; } -#endif int getGOPSize () const { return m_iGOPSize; } int getMaxDecPicBuffering (uint32_t tlayer) { return m_maxDecPicBuffering[tlayer]; } int getNumReorderPics (uint32_t tlayer) { return m_numReorderPics[tlayer]; } + int getDrapPeriod () { return m_drapPeriod; } #if X0038_LAMBDA_FROM_QP_CAPABILITY int getIntraQPOffset () const { return m_intraQPOffset; } int getLambdaFromQPEnable () const { return m_lambdaFromQPEnable; } @@ -960,6 +1217,7 @@ public: //==== Tool list ======== void setBitDepth( const ChannelType chType, int internalBitDepthForChannel ) { m_bitDepth[chType] = internalBitDepthForChannel; } void setInputBitDepth( const ChannelType chType, int internalBitDepthForChannel ) { m_inputBitDepth[chType] = internalBitDepthForChannel; } + int* getInputBitDepth() { return m_inputBitDepth; } void setUseASR ( bool b ) { m_bUseASR = b; } void setUseHADME ( bool b ) { m_bUseHADME = b; } void setUseRDOQ ( bool b ) { m_useRDOQ = b; } @@ -973,21 +1231,15 @@ public: void setUseFastDecisionForMerge ( bool b ) { m_useFastDecisionForMerge = b; } void setUseCbfFastMode ( bool b ) { m_bUseCbfFastMode = b; } void setUseEarlySkipDetection ( bool b ) { m_useEarlySkipDetection = b; } - void setUseConstrainedIntraPred ( bool b ) { m_bUseConstrainedIntraPred = b; } void setFastUDIUseMPMEnabled ( bool b ) { m_bFastUDIUseMPMEnabled = b; } void setFastMEForGenBLowDelayEnabled ( bool b ) { m_bFastMEForGenBLowDelayEnabled = b; } void setUseBLambdaForNonKeyLowDelayPictures ( bool b ) { m_bUseBLambdaForNonKeyLowDelayPictures = b; } - void setPCMInputBitDepthFlag ( bool b ) { m_bPCMInputBitDepthFlag = b; } - void setPCMFilterDisableFlag ( bool b ) { m_bPCMFilterDisableFlag = b; } - void setUsePCM ( bool b ) { m_usePCM = b; } - void setPCMBitDepth( const ChannelType chType, int pcmBitDepthForChannel ) { m_PCMBitDepth[chType] = pcmBitDepthForChannel; } - void setPCMLog2MaxSize ( uint32_t u ) { m_pcmLog2MaxSize = u; } - void setPCMLog2MinSize ( uint32_t u ) { m_uiPCMLog2MinSize = u; } void setdQPs ( int* p ) { m_aidQP = p; } void setDeltaQpRD ( uint32_t u ) {m_uiDeltaQpRD = u; } void setFastDeltaQp ( bool b ) {m_bFastDeltaQP = b; } int getBitDepth (const ChannelType chType) const { return m_bitDepth[chType]; } + int* getBitDepth () { return m_bitDepth; } bool getUseASR () { return m_bUseASR; } bool getUseHADME () { return m_bUseHADME; } bool getUseRDOQ () { return m_useRDOQ; } @@ -1001,15 +1253,11 @@ public: bool getUseFastDecisionForMerge () const{ return m_useFastDecisionForMerge; } bool getUseCbfFastMode () const{ return m_bUseCbfFastMode; } bool getUseEarlySkipDetection () const{ return m_useEarlySkipDetection; } - bool getUseConstrainedIntraPred () { return m_bUseConstrainedIntraPred; } bool getFastUDIUseMPMEnabled () { return m_bFastUDIUseMPMEnabled; } bool getFastMEForGenBLowDelayEnabled () { return m_bFastMEForGenBLowDelayEnabled; } bool getUseBLambdaForNonKeyLowDelayPictures () { return m_bUseBLambdaForNonKeyLowDelayPictures; } - bool getPCMInputBitDepthFlag () { return m_bPCMInputBitDepthFlag; } - bool getPCMFilterDisableFlag () { return m_bPCMFilterDisableFlag; } - bool getUsePCM () { return m_usePCM; } - uint32_t getPCMLog2MaxSize () { return m_pcmLog2MaxSize; } - uint32_t getPCMLog2MinSize () { return m_uiPCMLog2MinSize; } + void setGopBasedTemporalFilterEnabled(bool flag) { m_gopBasedTemporalFilterEnabled = flag; } + bool getGopBasedTemporalFilterEnabled() { return m_gopBasedTemporalFilterEnabled; } bool getCrossComponentPredictionEnabledFlag () const { return m_crossComponentPredictionEnabledFlag; } void setCrossComponentPredictionEnabledFlag (const bool value) { m_crossComponentPredictionEnabledFlag = value; } @@ -1023,6 +1271,12 @@ public: void setTransformSkipRotationEnabledFlag (const bool value) { m_transformSkipRotationEnabledFlag = value; } bool getTransformSkipContextEnabledFlag () const { return m_transformSkipContextEnabledFlag; } void setTransformSkipContextEnabledFlag (const bool value) { m_transformSkipContextEnabledFlag = value; } + bool getUseChromaTS () { return m_useChromaTS; } + void setUseChromaTS (bool b) { m_useChromaTS = b; } + int getUseBDPCM () { return m_useBDPCM; } + void setUseBDPCM ( int b ) { m_useBDPCM = b; } + bool getUseJointCbCr () { return m_JointCbCrMode; } + void setUseJointCbCr (bool b) { m_JointCbCrMode = b; } bool getPersistentRiceAdaptationEnabledFlag () const { return m_persistentRiceAdaptationEnabledFlag; } void setPersistentRiceAdaptationEnabledFlag (const bool value) { m_persistentRiceAdaptationEnabledFlag = value; } bool getCabacBypassAlignmentEnabledFlag () const { return m_cabacBypassAlignmentEnabledFlag; } @@ -1035,26 +1289,37 @@ public: void setLog2MaxTransformSkipBlockSize ( uint32_t u ) { m_log2MaxTransformSkipBlockSize = u; } bool getIntraSmoothingDisabledFlag () const { return m_intraSmoothingDisabledFlag; } void setIntraSmoothingDisabledFlag (bool bValue) { m_intraSmoothingDisabledFlag=bValue; } - bool getUseFastISP () { return m_useFastISP; } + bool getUseFastISP () const { return m_useFastISP; } void setUseFastISP ( bool b ) { m_useFastISP = b; } const int* getdQPs () const { return m_aidQP; } uint32_t getDeltaQpRD () const { return m_uiDeltaQpRD; } bool getFastDeltaQp () const { return m_bFastDeltaQP; } - //====== Slice ======== - void setSliceMode ( SliceConstraint i ) { m_sliceMode = i; } - void setSliceArgument ( int i ) { m_sliceArgument = i; } - SliceConstraint getSliceMode () const { return m_sliceMode; } - int getSliceArgument () { return m_sliceArgument; } - //====== Dependent Slice ======== - void setSliceSegmentMode ( SliceConstraint i ) { m_sliceSegmentMode = i; } - void setSliceSegmentArgument ( int i ) { m_sliceSegmentArgument = i; } - SliceConstraint getSliceSegmentMode () const { return m_sliceSegmentMode; } - int getSliceSegmentArgument () { return m_sliceSegmentArgument;} - void setLFCrossSliceBoundaryFlag ( bool bValue ) { m_bLFCrossSliceBoundaryFlag = bValue; } - bool getLFCrossSliceBoundaryFlag () { return m_bLFCrossSliceBoundaryFlag; } - + //====== Tiles and Slices ======== + void setNoPicPartitionFlag( bool b ) { m_noPicPartitionFlag = b; } + bool getNoPicPartitionFlag() { return m_noPicPartitionFlag; } + void setTileColWidths( std::vector<uint32_t> tileColWidths ) { m_tileColumnWidth = tileColWidths; } + const std::vector<uint32_t>* getTileColWidths() const { return &m_tileColumnWidth; } + void setTileRowHeights( std::vector<uint32_t> tileRowHeights ) { m_tileRowHeight = tileRowHeights; } + const std::vector<uint32_t>* getTileRowHeights() const { return &m_tileRowHeight; } + void setRectSliceFlag( bool b ) { m_rectSliceFlag = b; } + bool getRectSliceFlag() { return m_rectSliceFlag; } + void setNumSlicesInPic( uint32_t u ) { m_numSlicesInPic = u; } + uint32_t getNumSlicesInPic() { return m_numSlicesInPic; } + void setTileIdxDeltaPresentFlag( bool b ) { m_tileIdxDeltaPresentFlag = b; } + bool getTileIdxDeltaPresentFlag() { return m_tileIdxDeltaPresentFlag; } + void setRectSlices( std::vector<RectSlice> rectSlices ) { m_rectSlices = rectSlices; } + const std::vector<RectSlice>* getRectSlices() const { return &m_rectSlices; } + void setRasterSliceSizes( std::vector<uint32_t> rasterSliceSizes ) { m_rasterSliceSize = rasterSliceSizes; } + const std::vector<uint32_t>* getRasterSliceSizes() const { return &m_rasterSliceSize; } + void setLFCrossTileBoundaryFlag( bool b ) { m_bLFCrossTileBoundaryFlag = b; } + bool getLFCrossTileBoundaryFlag() { return m_bLFCrossTileBoundaryFlag; } + void setLFCrossSliceBoundaryFlag( bool b ) { m_bLFCrossSliceBoundaryFlag = b; } + bool getLFCrossSliceBoundaryFlag() { return m_bLFCrossSliceBoundaryFlag; } + //====== Sub-picture and Slices ======== + void setSingleSlicePerSubPicFlagFlag( bool b ) { m_singleSlicePerSubPicFlag = b; } + bool getSingleSlicePerSubPicFlagFlag( ) { return m_singleSlicePerSubPicFlag; } void setUseSAO (bool bVal) { m_bUseSAO = bVal; } bool getUseSAO () { return m_bUseSAO; } void setTestSAODisableAtPictureLevel (bool bVal) { m_bTestSAODisableAtPictureLevel = bVal; } @@ -1069,91 +1334,20 @@ public: void setSaoCtuBoundary (bool val) { m_saoCtuBoundary = val; } bool getSaoCtuBoundary () { return m_saoCtuBoundary; } -#if K0238_SAO_GREEDY_MERGE_ENCODING void setSaoGreedyMergeEnc (bool val) { m_saoGreedyMergeEnc = val; } bool getSaoGreedyMergeEnc () { return m_saoGreedyMergeEnc; } -#endif -#if HEVC_TILES_WPP - void setLFCrossTileBoundaryFlag ( bool val ) { m_loopFilterAcrossTilesEnabledFlag = val; } - bool getLFCrossTileBoundaryFlag () { return m_loopFilterAcrossTilesEnabledFlag; } - void setTileUniformSpacingFlag ( bool b ) { m_tileUniformSpacingFlag = b; } - bool getTileUniformSpacingFlag () { return m_tileUniformSpacingFlag; } - void setNumColumnsMinus1 ( int i ) { m_iNumColumnsMinus1 = i; } - int getNumColumnsMinus1 () { return m_iNumColumnsMinus1; } - void setColumnWidth ( const std::vector<int>& columnWidth ) { m_tileColumnWidth = columnWidth; } - uint32_t getColumnWidth ( uint32_t columnIdx ) { return m_tileColumnWidth[columnIdx]; } - void setNumRowsMinus1 ( int i ) { m_iNumRowsMinus1 = i; } - int getNumRowsMinus1 () { return m_iNumRowsMinus1; } - void setRowHeight ( const std::vector<int>& rowHeight) { m_tileRowHeight = rowHeight; } - uint32_t getRowHeight ( uint32_t rowIdx ) { return m_tileRowHeight[rowIdx]; } -#endif - void xCheckGSParameters(); -#if HEVC_TILES_WPP void setEntropyCodingSyncEnabledFlag(bool b) { m_entropyCodingSyncEnabledFlag = b; } bool getEntropyCodingSyncEnabledFlag() const { return m_entropyCodingSyncEnabledFlag; } -#endif void setDecodedPictureHashSEIType(HashType m) { m_decodedPictureHashSEIType = m; } HashType getDecodedPictureHashSEIType() const { return m_decodedPictureHashSEIType; } void setBufferingPeriodSEIEnabled(bool b) { m_bufferingPeriodSEIEnabled = b; } bool getBufferingPeriodSEIEnabled() const { return m_bufferingPeriodSEIEnabled; } void setPictureTimingSEIEnabled(bool b) { m_pictureTimingSEIEnabled = b; } bool getPictureTimingSEIEnabled() const { return m_pictureTimingSEIEnabled; } - void setRecoveryPointSEIEnabled(bool b) { m_recoveryPointSEIEnabled = b; } - bool getRecoveryPointSEIEnabled() const { return m_recoveryPointSEIEnabled; } - void setToneMappingInfoSEIEnabled(bool b) { m_toneMappingInfoSEIEnabled = b; } - bool getToneMappingInfoSEIEnabled() { return m_toneMappingInfoSEIEnabled; } - void setTMISEIToneMapId(int b) { m_toneMapId = b; } - int getTMISEIToneMapId() { return m_toneMapId; } - void setTMISEIToneMapCancelFlag(bool b) { m_toneMapCancelFlag=b; } - bool getTMISEIToneMapCancelFlag() { return m_toneMapCancelFlag; } - void setTMISEIToneMapPersistenceFlag(bool b) { m_toneMapPersistenceFlag = b; } - bool getTMISEIToneMapPersistenceFlag() { return m_toneMapPersistenceFlag; } - void setTMISEICodedDataBitDepth(int b) { m_codedDataBitDepth = b; } - int getTMISEICodedDataBitDepth() { return m_codedDataBitDepth; } - void setTMISEITargetBitDepth(int b) { m_targetBitDepth = b; } - int getTMISEITargetBitDepth() { return m_targetBitDepth; } - void setTMISEIModelID(int b) { m_modelId = b; } - int getTMISEIModelID() { return m_modelId; } - void setTMISEIMinValue(int b) { m_minValue = b; } - int getTMISEIMinValue() { return m_minValue; } - void setTMISEIMaxValue(int b) { m_maxValue = b; } - int getTMISEIMaxValue() { return m_maxValue; } - void setTMISEISigmoidMidpoint(int b) { m_sigmoidMidpoint = b; } - int getTMISEISigmoidMidpoint() { return m_sigmoidMidpoint; } - void setTMISEISigmoidWidth(int b) { m_sigmoidWidth = b; } - int getTMISEISigmoidWidth() { return m_sigmoidWidth; } - void setTMISEIStartOfCodedInterva( int* p ) { m_startOfCodedInterval = p; } - int* getTMISEIStartOfCodedInterva() { return m_startOfCodedInterval; } - void setTMISEINumPivots(int b) { m_numPivots = b; } - int getTMISEINumPivots() { return m_numPivots; } - void setTMISEICodedPivotValue( int* p ) { m_codedPivotValue = p; } - int* getTMISEICodedPivotValue() { return m_codedPivotValue; } - void setTMISEITargetPivotValue( int* p ) { m_targetPivotValue = p; } - int* getTMISEITargetPivotValue() { return m_targetPivotValue; } - void setTMISEICameraIsoSpeedIdc(int b) { m_cameraIsoSpeedIdc = b; } - int getTMISEICameraIsoSpeedIdc() { return m_cameraIsoSpeedIdc; } - void setTMISEICameraIsoSpeedValue(int b) { m_cameraIsoSpeedValue = b; } - int getTMISEICameraIsoSpeedValue() { return m_cameraIsoSpeedValue; } - void setTMISEIExposureIndexIdc(int b) { m_exposureIndexIdc = b; } - int getTMISEIExposurIndexIdc() { return m_exposureIndexIdc; } - void setTMISEIExposureIndexValue(int b) { m_exposureIndexValue = b; } - int getTMISEIExposurIndexValue() { return m_exposureIndexValue; } - void setTMISEIExposureCompensationValueSignFlag(bool b) { m_exposureCompensationValueSignFlag = b; } - bool getTMISEIExposureCompensationValueSignFlag() { return m_exposureCompensationValueSignFlag; } - void setTMISEIExposureCompensationValueNumerator(int b) { m_exposureCompensationValueNumerator = b; } - int getTMISEIExposureCompensationValueNumerator() { return m_exposureCompensationValueNumerator; } - void setTMISEIExposureCompensationValueDenomIdc(int b) { m_exposureCompensationValueDenomIdc =b; } - int getTMISEIExposureCompensationValueDenomIdc() { return m_exposureCompensationValueDenomIdc; } - void setTMISEIRefScreenLuminanceWhite(int b) { m_refScreenLuminanceWhite = b; } - int getTMISEIRefScreenLuminanceWhite() { return m_refScreenLuminanceWhite; } - void setTMISEIExtendedRangeWhiteLevel(int b) { m_extendedRangeWhiteLevel = b; } - int getTMISEIExtendedRangeWhiteLevel() { return m_extendedRangeWhiteLevel; } - void setTMISEINominalBlackLevelLumaCodeValue(int b) { m_nominalBlackLevelLumaCodeValue = b; } - int getTMISEINominalBlackLevelLumaCodeValue() { return m_nominalBlackLevelLumaCodeValue; } - void setTMISEINominalWhiteLevelLumaCodeValue(int b) { m_nominalWhiteLevelLumaCodeValue = b; } - int getTMISEINominalWhiteLevelLumaCodeValue() { return m_nominalWhiteLevelLumaCodeValue; } - void setTMISEIExtendedWhiteLevelLumaCodeValue(int b) { m_extendedWhiteLevelLumaCodeValue =b; } - int getTMISEIExtendedWhiteLevelLumaCodeValue() { return m_extendedWhiteLevelLumaCodeValue; } + void setFrameFieldInfoSEIEnabled(bool b) { m_frameFieldInfoSEIEnabled = b; } + bool getFrameFieldInfoSEIEnabled() const { return m_frameFieldInfoSEIEnabled; } + void setDependentRAPIndicationSEIEnabled(bool b) { m_dependentRAPIndicationSEIEnabled = b; } + int getDependentRAPIndicationSEIEnabled() const { return m_dependentRAPIndicationSEIEnabled; } void setFramePackingArrangementSEIEnabled(bool b) { m_framePackingSEIEnabled = b; } bool getFramePackingArrangementSEIEnabled() const { return m_framePackingSEIEnabled; } void setFramePackingArrangementSEIType(int b) { m_framePackingSEIType = b; } @@ -1164,63 +1358,159 @@ public: int getFramePackingArrangementSEIQuincunx() { return m_framePackingSEIQuincunx; } void setFramePackingArrangementSEIInterpretation(int b) { m_framePackingSEIInterpretation = b; } int getFramePackingArrangementSEIInterpretation() { return m_framePackingSEIInterpretation; } - void setSegmentedRectFramePackingArrangementSEIEnabled(bool b) { m_segmentedRectFramePackingSEIEnabled = b; } - bool getSegmentedRectFramePackingArrangementSEIEnabled() const { return m_segmentedRectFramePackingSEIEnabled; } - void setSegmentedRectFramePackingArrangementSEICancel(int b) { m_segmentedRectFramePackingSEICancel = b; } - int getSegmentedRectFramePackingArrangementSEICancel() { return m_segmentedRectFramePackingSEICancel; } - void setSegmentedRectFramePackingArrangementSEIType(int b) { m_segmentedRectFramePackingSEIType = b; } - int getSegmentedRectFramePackingArrangementSEIType() { return m_segmentedRectFramePackingSEIType; } - void setSegmentedRectFramePackingArrangementSEIPersistence(int b) { m_segmentedRectFramePackingSEIPersistence = b; } - int getSegmentedRectFramePackingArrangementSEIPersistence() { return m_segmentedRectFramePackingSEIPersistence; } - void setDisplayOrientationSEIAngle(int b) { m_displayOrientationSEIAngle = b; } - int getDisplayOrientationSEIAngle() { return m_displayOrientationSEIAngle; } - void setTemporalLevel0IndexSEIEnabled(bool b) { m_temporalLevel0IndexSEIEnabled = b; } - bool getTemporalLevel0IndexSEIEnabled() const { return m_temporalLevel0IndexSEIEnabled; } - void setGradualDecodingRefreshInfoEnabled(bool b) { m_gradualDecodingRefreshInfoEnabled = b; } - bool getGradualDecodingRefreshInfoEnabled() const { return m_gradualDecodingRefreshInfoEnabled; } - void setNoDisplaySEITLayer(int b) { m_noDisplaySEITLayer = b; } - int getNoDisplaySEITLayer() { return m_noDisplaySEITLayer; } + void setBpDeltasGOPStructure(bool b) { m_bpDeltasGOPStructure = b; } + bool getBpDeltasGOPStructure() const { return m_bpDeltasGOPStructure; } void setDecodingUnitInfoSEIEnabled(bool b) { m_decodingUnitInfoSEIEnabled = b; } bool getDecodingUnitInfoSEIEnabled() const { return m_decodingUnitInfoSEIEnabled; } +#if HEVC_SEI void setSOPDescriptionSEIEnabled(bool b) { m_SOPDescriptionSEIEnabled = b; } bool getSOPDescriptionSEIEnabled() const { return m_SOPDescriptionSEIEnabled; } void setScalableNestingSEIEnabled(bool b) { m_scalableNestingSEIEnabled = b; } bool getScalableNestingSEIEnabled() const { return m_scalableNestingSEIEnabled; } void setTMCTSSEIEnabled(bool b) { m_tmctsSEIEnabled = b; } bool getTMCTSSEIEnabled() { return m_tmctsSEIEnabled; } +#endif + + void setErpSEIEnabled(bool b) { m_erpSEIEnabled = b; } + bool getErpSEIEnabled() { return m_erpSEIEnabled; } + void setErpSEICancelFlag(bool b) { m_erpSEICancelFlag = b; } + bool getErpSEICancelFlag() { return m_erpSEICancelFlag; } + void setErpSEIPersistenceFlag(bool b) { m_erpSEIPersistenceFlag = b; } + bool getErpSEIPersistenceFlag() { return m_erpSEIPersistenceFlag; } + void setErpSEIGuardBandFlag(bool b) { m_erpSEIGuardBandFlag = b; } + bool getErpSEIGuardBandFlag() { return m_erpSEIGuardBandFlag; } + void setErpSEIGuardBandType(uint32_t b) { m_erpSEIGuardBandType = b; } + uint32_t getErpSEIGuardBandType() { return m_erpSEIGuardBandType; } + void setErpSEILeftGuardBandWidth(uint32_t b) { m_erpSEILeftGuardBandWidth = b; } + uint32_t getErpSEILeftGuardBandWidth() { return m_erpSEILeftGuardBandWidth; } + void setErpSEIRightGuardBandWidth(uint32_t b) { m_erpSEIRightGuardBandWidth = b; } + uint32_t getErpSEIRightGuardBandWidth() { return m_erpSEIRightGuardBandWidth; } + void setSphereRotationSEIEnabled(bool b) { m_sphereRotationSEIEnabled = b; } + bool getSphereRotationSEIEnabled() { return m_sphereRotationSEIEnabled; } + void setSphereRotationSEICancelFlag(bool b) { m_sphereRotationSEICancelFlag = b; } + bool getSphereRotationSEICancelFlag() { return m_sphereRotationSEICancelFlag; } + void setSphereRotationSEIPersistenceFlag(bool b) { m_sphereRotationSEIPersistenceFlag = b; } + bool getSphereRotationSEIPersistenceFlag() { return m_sphereRotationSEIPersistenceFlag; } + void setSphereRotationSEIYaw(int b) { m_sphereRotationSEIYaw = b; } + int getSphereRotationSEIYaw() { return m_sphereRotationSEIYaw; } + void setSphereRotationSEIPitch(int b) { m_sphereRotationSEIPitch = b; } + int getSphereRotationSEIPitch() { return m_sphereRotationSEIPitch; } + void setSphereRotationSEIRoll(int b) { m_sphereRotationSEIRoll = b; } + int getSphereRotationSEIRoll() { return m_sphereRotationSEIRoll; } + void setOmniViewportSEIEnabled(bool b) { m_omniViewportSEIEnabled = b; } + bool getOmniViewportSEIEnabled() { return m_omniViewportSEIEnabled; } + void setOmniViewportSEIId(uint32_t b) { m_omniViewportSEIId = b; } + uint32_t getOmniViewportSEIId() { return m_omniViewportSEIId; } + void setOmniViewportSEICancelFlag(bool b) { m_omniViewportSEICancelFlag = b; } + bool getOmniViewportSEICancelFlag() { return m_omniViewportSEICancelFlag; } + void setOmniViewportSEIPersistenceFlag(bool b) { m_omniViewportSEIPersistenceFlag = b; } + bool getOmniViewportSEIPersistenceFlag() { return m_omniViewportSEIPersistenceFlag; } + void setOmniViewportSEICntMinus1(uint32_t b) { m_omniViewportSEICntMinus1 = b; } + uint32_t getOmniViewportSEICntMinus1() { return m_omniViewportSEICntMinus1; } + void setOmniViewportSEIAzimuthCentre(const std::vector<int>& vi) { m_omniViewportSEIAzimuthCentre = vi; } + int getOmniViewportSEIAzimuthCentre(int idx) { return m_omniViewportSEIAzimuthCentre[idx]; } + void setOmniViewportSEIElevationCentre(const std::vector<int>& vi){ m_omniViewportSEIElevationCentre = vi; } + int getOmniViewportSEIElevationCentre(int idx) { return m_omniViewportSEIElevationCentre[idx]; } + void setOmniViewportSEITiltCentre(const std::vector<int>& vi) { m_omniViewportSEITiltCentre = vi; } + int getOmniViewportSEITiltCentre(int idx) { return m_omniViewportSEITiltCentre[idx]; } + void setOmniViewportSEIHorRange(const std::vector<uint32_t>& vi) { m_omniViewportSEIHorRange = vi; } + uint32_t getOmniViewportSEIHorRange(int idx) { return m_omniViewportSEIHorRange[idx]; } + void setOmniViewportSEIVerRange(const std::vector<uint32_t>& vi) { m_omniViewportSEIVerRange = vi; } + uint32_t getOmniViewportSEIVerRange(int idx) { return m_omniViewportSEIVerRange[idx]; } + void setRwpSEIEnabled(bool b) { m_rwpSEIEnabled = b; } + bool getRwpSEIEnabled() { return m_rwpSEIEnabled; } + void setRwpSEIRwpCancelFlag(bool b) { m_rwpSEIRwpCancelFlag = b; } + bool getRwpSEIRwpCancelFlag() { return m_rwpSEIRwpCancelFlag; } + void setRwpSEIRwpPersistenceFlag (bool b) { m_rwpSEIRwpPersistenceFlag = b; } + bool getRwpSEIRwpPersistenceFlag () { return m_rwpSEIRwpPersistenceFlag; } + void setRwpSEIConstituentPictureMatchingFlag (bool b) { m_rwpSEIConstituentPictureMatchingFlag = b; } + bool getRwpSEIConstituentPictureMatchingFlag () { return m_rwpSEIConstituentPictureMatchingFlag; } + void setRwpSEINumPackedRegions (int value) { m_rwpSEINumPackedRegions = value; } + int getRwpSEINumPackedRegions () { return m_rwpSEINumPackedRegions; } + void setRwpSEIProjPictureWidth (int value) { m_rwpSEIProjPictureWidth = value; } + int getRwpSEIProjPictureWidth () { return m_rwpSEIProjPictureWidth; } + void setRwpSEIProjPictureHeight (int value) { m_rwpSEIProjPictureHeight = value; } + int getRwpSEIProjPictureHeight () { return m_rwpSEIProjPictureHeight; } + void setRwpSEIPackedPictureWidth (int value) { m_rwpSEIPackedPictureWidth = value; } + int getRwpSEIPackedPictureWidth () { return m_rwpSEIPackedPictureWidth; } + void setRwpSEIPackedPictureHeight (int value) { m_rwpSEIPackedPictureHeight = value; } + int getRwpSEIPackedPictureHeight () { return m_rwpSEIPackedPictureHeight; } + void setRwpSEIRwpTransformType(const std::vector<uint8_t>& rwpTransformType) { m_rwpSEIRwpTransformType =rwpTransformType; } + uint8_t getRwpSEIRwpTransformType(uint32_t idx) const { return m_rwpSEIRwpTransformType[idx]; } + void setRwpSEIRwpGuardBandFlag(const std::vector<bool>& rwpGuardBandFlag) { m_rwpSEIRwpGuardBandFlag = rwpGuardBandFlag; } + bool getRwpSEIRwpGuardBandFlag(uint32_t idx) const { return m_rwpSEIRwpGuardBandFlag[idx]; } + void setRwpSEIProjRegionWidth(const std::vector<uint32_t>& projRegionWidth) { m_rwpSEIProjRegionWidth = projRegionWidth; } + uint32_t getRwpSEIProjRegionWidth(uint32_t idx) const { return m_rwpSEIProjRegionWidth[idx]; } + void setRwpSEIProjRegionHeight(const std::vector<uint32_t>& projRegionHeight) { m_rwpSEIProjRegionHeight = projRegionHeight; } + uint32_t getRwpSEIProjRegionHeight(uint32_t idx) const { return m_rwpSEIProjRegionHeight[idx]; } + void setRwpSEIRwpSEIProjRegionTop(const std::vector<uint32_t>& projRegionTop) { m_rwpSEIRwpSEIProjRegionTop = projRegionTop; } + uint32_t getRwpSEIRwpSEIProjRegionTop(uint32_t idx) const { return m_rwpSEIRwpSEIProjRegionTop[idx]; } + void setRwpSEIProjRegionLeft(const std::vector<uint32_t>& projRegionLeft) { m_rwpSEIProjRegionLeft = projRegionLeft; } + uint32_t getRwpSEIProjRegionLeft(uint32_t idx) const { return m_rwpSEIProjRegionLeft[idx]; } + void setRwpSEIPackedRegionWidth(const std::vector<uint16_t>& packedRegionWidth) { m_rwpSEIPackedRegionWidth = packedRegionWidth; } + uint16_t getRwpSEIPackedRegionWidth(uint32_t idx) const { return m_rwpSEIPackedRegionWidth[idx]; } + void setRwpSEIPackedRegionHeight(const std::vector<uint16_t>& packedRegionHeight) { m_rwpSEIPackedRegionHeight = packedRegionHeight; } + uint16_t getRwpSEIPackedRegionHeight(uint32_t idx) const { return m_rwpSEIPackedRegionHeight[idx]; } + void setRwpSEIPackedRegionTop(const std::vector<uint16_t>& packedRegionTop) { m_rwpSEIPackedRegionTop = packedRegionTop; } + uint16_t getRwpSEIPackedRegionTop(uint32_t idx) const { return m_rwpSEIPackedRegionTop[idx]; } + void setRwpSEIPackedRegionLeft(const std::vector<uint16_t>& packedRegionLeft) { m_rwpSEIPackedRegionLeft = packedRegionLeft; } + uint16_t getRwpSEIPackedRegionLeft(uint32_t idx) const { return m_rwpSEIPackedRegionLeft[idx]; } + void setRwpSEIRwpLeftGuardBandWidth(const std::vector<uint8_t>& rwpLeftGuardBandWidth) { m_rwpSEIRwpLeftGuardBandWidth = rwpLeftGuardBandWidth; } + uint8_t getRwpSEIRwpLeftGuardBandWidth(uint32_t idx) const { return m_rwpSEIRwpLeftGuardBandWidth[idx]; } + void setRwpSEIRwpRightGuardBandWidth(const std::vector<uint8_t>& rwpRightGuardBandWidth) { m_rwpSEIRwpRightGuardBandWidth = rwpRightGuardBandWidth; } + uint8_t getRwpSEIRwpRightGuardBandWidth(uint32_t idx) const { return m_rwpSEIRwpRightGuardBandWidth[idx]; } + void setRwpSEIRwpTopGuardBandHeight(const std::vector<uint8_t>& rwpTopGuardBandHeight) { m_rwpSEIRwpTopGuardBandHeight = rwpTopGuardBandHeight; } + uint8_t getRwpSEIRwpTopGuardBandHeight(uint32_t idx) const { return m_rwpSEIRwpTopGuardBandHeight[idx]; } + void setRwpSEIRwpBottomGuardBandHeight(const std::vector<uint8_t>& rwpBottomGuardBandHeight) { m_rwpSEIRwpBottomGuardBandHeight = rwpBottomGuardBandHeight; } + uint8_t getRwpSEIRwpBottomGuardBandHeight(uint32_t idx) const { return m_rwpSEIRwpBottomGuardBandHeight[idx]; } + void setRwpSEIRwpGuardBandNotUsedForPredFlag(const std::vector<bool>& rwpGuardBandNotUsedForPredFlag) { m_rwpSEIRwpGuardBandNotUsedForPredFlag = rwpGuardBandNotUsedForPredFlag; } + bool getRwpSEIRwpGuardBandNotUsedForPredFlag(uint32_t idx) const { return m_rwpSEIRwpGuardBandNotUsedForPredFlag[idx]; } + void setRwpSEIRwpGuardBandType(const std::vector<uint8_t>& rwpGuardBandType) { m_rwpSEIRwpGuardBandType = rwpGuardBandType; } + uint8_t getRwpSEIRwpGuardBandType(uint32_t idx) const { return m_rwpSEIRwpGuardBandType[idx]; } + void setGcmpSEIEnabled(bool b) { m_gcmpSEIEnabled = b; } + bool getGcmpSEIEnabled() { return m_gcmpSEIEnabled; } + void setGcmpSEICancelFlag(bool b) { m_gcmpSEICancelFlag = b; } + bool getGcmpSEICancelFlag() { return m_gcmpSEICancelFlag; } + void setGcmpSEIPersistenceFlag(bool b) { m_gcmpSEIPersistenceFlag = b; } + bool getGcmpSEIPersistenceFlag() { return m_gcmpSEIPersistenceFlag; } + void setGcmpSEIPackingType(uint8_t u) { m_gcmpSEIPackingType = u; } + uint8_t getGcmpSEIPackingType() { return m_gcmpSEIPackingType; } + void setGcmpSEIMappingFunctionType(uint8_t u) { m_gcmpSEIMappingFunctionType = u; } + uint8_t getGcmpSEIMappingFunctionType() { return m_gcmpSEIMappingFunctionType; } + void setGcmpSEIFaceIndex(const std::vector<uint8_t>& gcmpFaceIndex) { m_gcmpSEIFaceIndex = gcmpFaceIndex; } + uint8_t getGcmpSEIFaceIndex(int idx) const { return m_gcmpSEIFaceIndex[idx]; } + void setGcmpSEIFaceRotation(const std::vector<uint8_t>& gcmpFaceRotation) { m_gcmpSEIFaceRotation = gcmpFaceRotation; } + uint8_t getGcmpSEIFaceRotation(int idx) const { return m_gcmpSEIFaceRotation[idx]; } + void setGcmpSEIFunctionCoeffU(const std::vector<double>& gcmpFunctionCoeffU) { m_gcmpSEIFunctionCoeffU = gcmpFunctionCoeffU; } + double getGcmpSEIFunctionCoeffU(int idx) const { return m_gcmpSEIFunctionCoeffU[idx]; } + void setGcmpSEIFunctionUAffectedByVFlag(const std::vector<bool>& gcmpFunctionUAffectedByVFlag) { m_gcmpSEIFunctionUAffectedByVFlag = gcmpFunctionUAffectedByVFlag; } + bool getGcmpSEIFunctionUAffectedByVFlag(int idx) const { return m_gcmpSEIFunctionUAffectedByVFlag[idx]; } + void setGcmpSEIFunctionCoeffV(const std::vector<double>& gcmpFunctionCoeffV) { m_gcmpSEIFunctionCoeffV = gcmpFunctionCoeffV; } + double getGcmpSEIFunctionCoeffV(int idx) const { return m_gcmpSEIFunctionCoeffV[idx]; } + void setGcmpSEIFunctionVAffectedByUFlag(const std::vector<bool>& gcmpFunctionVAffectedByUFlag) { m_gcmpSEIFunctionVAffectedByUFlag = gcmpFunctionVAffectedByUFlag; } + bool getGcmpSEIFunctionVAffectedByUFlag(int idx) const { return m_gcmpSEIFunctionVAffectedByUFlag[idx]; } + void setGcmpSEIGuardBandFlag(bool b) { m_gcmpSEIGuardBandFlag = b; } + bool getGcmpSEIGuardBandFlag() { return m_gcmpSEIGuardBandFlag; } + void setGcmpSEIGuardBandBoundaryType(bool b) { m_gcmpSEIGuardBandBoundaryType = b; } + bool getGcmpSEIGuardBandBoundaryType() { return m_gcmpSEIGuardBandBoundaryType; } + void setGcmpSEIGuardBandSamplesMinus1( uint8_t u ) { m_gcmpSEIGuardBandSamplesMinus1 = u; } + uint8_t getGcmpSEIGuardBandSamplesMinus1() { return m_gcmpSEIGuardBandSamplesMinus1; } + bool getSubpicureLevelInfoSEIEnabled() const { return m_subpicureLevelInfoSEIEnabled; } + void setSubpicureLevelInfoSEIEnabled(bool val) { m_subpicureLevelInfoSEIEnabled = val; } + bool getSampleAspectRatioInfoSEIEnabled() const { return m_sampleAspectRatioInfoSEIEnabled; } + void setSampleAspectRatioInfoSEIEnabled(const bool val) { m_sampleAspectRatioInfoSEIEnabled = val; } + bool getSariCancelFlag() const { return m_sariCancelFlag; } + void setSariCancelFlag(const bool val) { m_sariCancelFlag = val; } + bool getSariPersistenceFlag() const { return m_sariPersistenceFlag; } + void setSariPersistenceFlag(const bool val) { m_sariPersistenceFlag = val; } + int getSariAspectRatioIdc() const { return m_sariAspectRatioIdc; } + void setSariAspectRatioIdc(const int val) { m_sariAspectRatioIdc = val; } + int getSariSarWidth() const { return m_sariSarWidth; } + void setSariSarWidth(const int val) { m_sariSarWidth = val; } + int getSariSarHeight() const { return m_sariSarHeight; } + void setSariSarHeight(const int val) { m_sariSarHeight = val; } void setMCTSEncConstraint(bool b) { m_MCTSEncConstraint = b; } bool getMCTSEncConstraint() { return m_MCTSEncConstraint; } - void setTimeCodeSEIEnabled(bool b) { m_timeCodeSEIEnabled = b; } - bool getTimeCodeSEIEnabled() { return m_timeCodeSEIEnabled; } - void setNumberOfTimeSets(int value) { m_timeCodeSEINumTs = value; } - int getNumberOfTimesets() { return m_timeCodeSEINumTs; } - void setTimeSet(SEITimeSet element, int index) { m_timeSetArray[index] = element; } - SEITimeSet &getTimeSet(int index) { return m_timeSetArray[index]; } - const SEITimeSet &getTimeSet(int index) const { return m_timeSetArray[index]; } - void setKneeSEIEnabled(int b) { m_kneeSEIEnabled = b; } - bool getKneeSEIEnabled() { return m_kneeSEIEnabled; } - void setKneeSEIId(int b) { m_kneeSEIId = b; } - int getKneeSEIId() { return m_kneeSEIId; } - void setKneeSEICancelFlag(bool b) { m_kneeSEICancelFlag=b; } - bool getKneeSEICancelFlag() { return m_kneeSEICancelFlag; } - void setKneeSEIPersistenceFlag(bool b) { m_kneeSEIPersistenceFlag = b; } - bool getKneeSEIPersistenceFlag() { return m_kneeSEIPersistenceFlag; } - void setKneeSEIInputDrange(int b) { m_kneeSEIInputDrange = b; } - int getKneeSEIInputDrange() { return m_kneeSEIInputDrange; } - void setKneeSEIInputDispLuminance(int b) { m_kneeSEIInputDispLuminance = b; } - int getKneeSEIInputDispLuminance() { return m_kneeSEIInputDispLuminance; } - void setKneeSEIOutputDrange(int b) { m_kneeSEIOutputDrange = b; } - int getKneeSEIOutputDrange() { return m_kneeSEIOutputDrange; } - void setKneeSEIOutputDispLuminance(int b) { m_kneeSEIOutputDispLuminance = b; } - int getKneeSEIOutputDispLuminance() { return m_kneeSEIOutputDispLuminance; } - void setKneeSEINumKneePointsMinus1(int b) { m_kneeSEINumKneePointsMinus1 = b; } - int getKneeSEINumKneePointsMinus1() { return m_kneeSEINumKneePointsMinus1; } - void setKneeSEIInputKneePoint(int *p) { m_kneeSEIInputKneePoint = p; } - int* getKneeSEIInputKneePoint() { return m_kneeSEIInputKneePoint; } - void setKneeSEIOutputKneePoint(int *p) { m_kneeSEIOutputKneePoint = p; } - int* getKneeSEIOutputKneePoint() { return m_kneeSEIOutputKneePoint; } - void setColourRemapInfoSEIFileRoot( const std::string &s ) { m_colourRemapSEIFileRoot = s; } - const std::string &getColourRemapInfoSEIFileRoot() const { return m_colourRemapSEIFileRoot; } void setMasteringDisplaySEI(const SEIMasteringDisplay &src) { m_masteringDisplay = src; } #if U0033_ALTERNATIVE_TRANSFER_CHARACTERISTICS_SEI void setSEIAlternativeTransferCharacteristicsSEIEnable( bool b) { m_alternativeTransferCharacteristicsSEIEnabled = b; } @@ -1228,40 +1518,115 @@ public: void setSEIPreferredTransferCharacteristics(uint8_t v) { m_preferredTransferCharacteristics = v; } uint8_t getSEIPreferredTransferCharacteristics() const { return m_preferredTransferCharacteristics; } #endif - void setSEIGreenMetadataInfoSEIEnable( bool b) { m_greenMetadataInfoSEIEnabled = b; } - bool getSEIGreenMetadataInfoSEIEnable( ) const { return m_greenMetadataInfoSEIEnabled; } - void setSEIGreenMetadataType(uint8_t v) { m_greenMetadataType = v; } - uint8_t getSEIGreenMetadataType() const { return m_greenMetadataType; } - void setSEIXSDMetricType(uint8_t v) { m_xsdMetricType = v; } - uint8_t getSEIXSDMetricType() const { return m_xsdMetricType; } - const SEIMasteringDisplay &getMasteringDisplaySEI() const { return m_masteringDisplay; } + // film grain SEI + void setFilmGrainCharactersticsSEIEnabled (bool b) { m_fgcSEIEnabled = b; } + bool getFilmGrainCharactersticsSEIEnabled() { return m_fgcSEIEnabled; } + void setFilmGrainCharactersticsSEICancelFlag(bool b) { m_fgcSEICancelFlag = b; } + bool getFilmGrainCharactersticsSEICancelFlag() { return m_fgcSEICancelFlag; } + void setFilmGrainCharactersticsSEIPersistenceFlag(bool b) { m_fgcSEIPersistenceFlag = b; } + bool getFilmGrainCharactersticsSEIPersistenceFlag() { return m_fgcSEIPersistenceFlag; } + void setFilmGrainCharactersticsSEIModelID(uint8_t v ) { m_fgcSEIModelID = v; } + uint8_t getFilmGrainCharactersticsSEIModelID() { return m_fgcSEIModelID; } + void setFilmGrainCharactersticsSEISepColourDescPresent(bool b) { m_fgcSEISepColourDescPresentFlag = b; } + bool getFilmGrainCharactersticsSEISepColourDescPresent() { return m_fgcSEISepColourDescPresentFlag; } + void setFilmGrainCharactersticsSEIBlendingModeID(uint8_t v ) { m_fgcSEIBlendingModeID = v; } + uint8_t getFilmGrainCharactersticsSEIBlendingModeID() { return m_fgcSEIBlendingModeID; } + void setFilmGrainCharactersticsSEILog2ScaleFactor(uint8_t v ) { m_fgcSEILog2ScaleFactor = v; } + uint8_t getFilmGrainCharactersticsSEILog2ScaleFactor() { return m_fgcSEILog2ScaleFactor; } + void setFGCSEICompModelPresent(bool b, int index) { m_fgcSEICompModelPresent[index] = b; } + bool getFGCSEICompModelPresent(int index) { return m_fgcSEICompModelPresent[index]; } + // cll SEI + void setCLLSEIEnabled(bool b) { m_cllSEIEnabled = b; } + bool getCLLSEIEnabled() { return m_cllSEIEnabled; } + void setCLLSEIMaxContentLightLevel (uint16_t v) { m_cllSEIMaxContentLevel = v; } + uint16_t getCLLSEIMaxContentLightLevel() { return m_cllSEIMaxContentLevel; } + void setCLLSEIMaxPicAvgLightLevel(uint16_t v) { m_cllSEIMaxPicAvgLevel = v; } + uint16_t getCLLSEIMaxPicAvgLightLevel() { return m_cllSEIMaxPicAvgLevel; } + // ave SEI + void setAmbientViewingEnvironmentSEIEnabled (bool b) { m_aveSEIEnabled = b; } + bool getAmbientViewingEnvironmentSEIEnabled () { return m_aveSEIEnabled; } + void setAmbientViewingEnvironmentSEIIlluminance( uint32_t v ) { m_aveSEIAmbientIlluminance = v; } + uint32_t getAmbientViewingEnvironmentSEIIlluminance() { return m_aveSEIAmbientIlluminance; } + void setAmbientViewingEnvironmentSEIAmbientLightX( uint16_t v ) { m_aveSEIAmbientLightX = v; } + uint16_t getAmbientViewingEnvironmentSEIAmbientLightX() { return m_aveSEIAmbientLightX; } + void setAmbientViewingEnvironmentSEIAmbientLightY( uint16_t v ) { m_aveSEIAmbientLightY = v; } + uint16_t getAmbientViewingEnvironmentSEIAmbientLightY() { return m_aveSEIAmbientLightY; } + // ccv SEI + void setCcvSEIEnabled(bool b) { m_ccvSEIEnabled = b; } + bool getCcvSEIEnabled() { return m_ccvSEIEnabled; } + void setCcvSEICancelFlag(bool b) { m_ccvSEICancelFlag = b; } + bool getCcvSEICancelFlag() { return m_ccvSEICancelFlag; } + void setCcvSEIPersistenceFlag(bool b) { m_ccvSEIPersistenceFlag = b; } + bool getCcvSEIPersistenceFlag() { return m_ccvSEIPersistenceFlag; } + void setCcvSEIPrimariesPresentFlag(bool b) { m_ccvSEIPrimariesPresentFlag = b; } + bool getCcvSEIPrimariesPresentFlag() { return m_ccvSEIPrimariesPresentFlag; } + void setCcvSEIMinLuminanceValuePresentFlag(bool b) { m_ccvSEIMinLuminanceValuePresentFlag = b; } + bool getCcvSEIMinLuminanceValuePresentFlag() { return m_ccvSEIMinLuminanceValuePresentFlag; } + void setCcvSEIMaxLuminanceValuePresentFlag(bool b) { m_ccvSEIMaxLuminanceValuePresentFlag = b; } + bool getCcvSEIMaxLuminanceValuePresentFlag() { return m_ccvSEIMaxLuminanceValuePresentFlag; } + void setCcvSEIAvgLuminanceValuePresentFlag(bool b) { m_ccvSEIAvgLuminanceValuePresentFlag = b; } + bool getCcvSEIAvgLuminanceValuePresentFlag() { return m_ccvSEIAvgLuminanceValuePresentFlag; } + void setCcvSEIPrimariesX(double dValue, int index) { m_ccvSEIPrimariesX[index] = dValue; } + double getCcvSEIPrimariesX(int index) { return m_ccvSEIPrimariesX[index]; } + void setCcvSEIPrimariesY(double dValue, int index) { m_ccvSEIPrimariesY[index] = dValue; } + double getCcvSEIPrimariesY(int index) { return m_ccvSEIPrimariesY[index]; } + void setCcvSEIMinLuminanceValue (double dValue) { m_ccvSEIMinLuminanceValue = dValue; } + double getCcvSEIMinLuminanceValue () { return m_ccvSEIMinLuminanceValue; } + void setCcvSEIMaxLuminanceValue (double dValue) { m_ccvSEIMaxLuminanceValue = dValue; } + double getCcvSEIMaxLuminanceValue () { return m_ccvSEIMaxLuminanceValue; } + void setCcvSEIAvgLuminanceValue (double dValue) { m_ccvSEIAvgLuminanceValue = dValue; } + double getCcvSEIAvgLuminanceValue () { return m_ccvSEIAvgLuminanceValue; } void setUseWP ( bool b ) { m_useWeightedPred = b; } void setWPBiPred ( bool b ) { m_useWeightedBiPred = b; } bool getUseWP () { return m_useWeightedPred; } bool getWPBiPred () { return m_useWeightedBiPred; } - void setLog2ParallelMergeLevelMinus2 ( uint32_t u ) { m_log2ParallelMergeLevelMinus2 = u; } - uint32_t getLog2ParallelMergeLevelMinus2 () { return m_log2ParallelMergeLevelMinus2; } void setMaxNumMergeCand ( uint32_t u ) { m_maxNumMergeCand = u; } uint32_t getMaxNumMergeCand () { return m_maxNumMergeCand; } void setMaxNumAffineMergeCand ( uint32_t u ) { m_maxNumAffineMergeCand = u; } uint32_t getMaxNumAffineMergeCand () { return m_maxNumAffineMergeCand; } -#if HEVC_USE_SCALING_LISTS + void setMaxNumTriangleCand ( uint32_t u ) { m_maxNumTriangleCand = u; } + uint32_t getMaxNumTriangleCand () { return m_maxNumTriangleCand; } + void setMaxNumIBCMergeCand ( uint32_t u ) { m_maxNumIBCMergeCand = u; } + uint32_t getMaxNumIBCMergeCand () { return m_maxNumIBCMergeCand; } void setUseScalingListId ( ScalingListMode u ) { m_useScalingListId = u; } ScalingListMode getUseScalingListId () { return m_useScalingListId; } void setScalingListFileName ( const std::string &s ) { m_scalingListFileName = s; } const std::string& getScalingListFileName () const { return m_scalingListFileName; } -#endif + void setSliceLevelRpl ( bool b ) { m_sliceLevelRpl = b; } + bool getSliceLevelRpl () { return m_sliceLevelRpl; } + void setSliceLevelDblk ( bool b ) { m_sliceLevelDblk = b; } + bool getSliceLevelDblk () { return m_sliceLevelDblk; } + void setSliceLevelSao ( bool b ) { m_sliceLevelSao = b; } + bool getSliceLevelSao () { return m_sliceLevelSao; } + void setSliceLevelAlf ( bool b ) { m_sliceLevelAlf = b; } + bool getSliceLevelAlf () { return m_sliceLevelAlf; } + void setDisableScalingMatrixForLfnstBlks(bool u) { m_disableScalingMatrixForLfnstBlks = u; } + bool getDisableScalingMatrixForLfnstBlks() const { return m_disableScalingMatrixForLfnstBlks; } void setTMVPModeId ( int u ) { m_TMVPModeId = u; } int getTMVPModeId () { return m_TMVPModeId; } + void setConstantSliceHeaderParamsEnabledFlag ( bool u ) { m_constantSliceHeaderParamsEnabledFlag = u; } + bool getConstantSliceHeaderParamsEnabledFlag () { return m_constantSliceHeaderParamsEnabledFlag; } + void setPPSDepQuantEnabledIdc ( int u ) { m_PPSDepQuantEnabledIdc = u; } + int getPPSDepQuantEnabledIdc () { return m_PPSDepQuantEnabledIdc; } + void setPPSRefPicListSPSIdc0 ( int u ) { m_PPSRefPicListSPSIdc0 = u; } + int getPPSRefPicListSPSIdc0 () { return m_PPSRefPicListSPSIdc0; } + void setPPSRefPicListSPSIdc1 ( int u ) { m_PPSRefPicListSPSIdc1 = u; } + int getPPSRefPicListSPSIdc1 () { return m_PPSRefPicListSPSIdc1; } + void setPPSMvdL1ZeroIdc ( int u ) { m_PPSMvdL1ZeroIdc = u; } + int getPPSMvdL1ZeroIdc () { return m_PPSMvdL1ZeroIdc; } + void setPPSCollocatedFromL0Idc ( int u ) { m_PPSCollocatedFromL0Idc = u; } + int getPPSCollocatedFromL0Idc () { return m_PPSCollocatedFromL0Idc; } + void setPPSSixMinusMaxNumMergeCandPlus1 ( uint32_t u ) { m_PPSSixMinusMaxNumMergeCandPlus1 = u; } + uint32_t getPPSSixMinusMaxNumMergeCandPlus1 () { return m_PPSSixMinusMaxNumMergeCandPlus1; } + void setPPSMaxNumMergeCandMinusMaxNumTriangleCandPlus1 ( uint32_t u ) { m_PPSMaxNumMergeCandMinusMaxNumTriangleCandPlus1 = u; } + uint32_t getPPSMaxNumMergeCandMinusMaxNumTriangleCandPlus1 () { return m_PPSMaxNumMergeCandMinusMaxNumTriangleCandPlus1; } WeightedPredictionMethod getWeightedPredictionMethod() const { return m_weightedPredictionMethod; } void setWeightedPredictionMethod( WeightedPredictionMethod m ) { m_weightedPredictionMethod = m; } void setDepQuantEnabledFlag( bool b ) { m_DepQuantEnabledFlag = b; } bool getDepQuantEnabledFlag() { return m_DepQuantEnabledFlag; } -#if HEVC_USE_SIGN_HIDING void setSignDataHidingEnabledFlag( bool b ) { m_SignDataHidingEnabledFlag = b; } bool getSignDataHidingEnabledFlag() { return m_SignDataHidingEnabledFlag; } -#endif bool getUseRateCtrl () const { return m_RCEnableRateControl; } void setUseRateCtrl ( bool b ) { m_RCEnableRateControl = b; } int getTargetBitrate () { return m_RCTargetBitrate; } @@ -1284,33 +1649,31 @@ public: double getInitialCpbFullness () { return m_RCInitialCpbFullness; } void setInitialCpbFullness (double f) { m_RCInitialCpbFullness = f; } #endif - bool getTransquantBypassEnabledFlag() { return m_TransquantBypassEnabledFlag; } - void setTransquantBypassEnabledFlag(bool flag) { m_TransquantBypassEnabledFlag = flag; } - bool getCUTransquantBypassFlagForceValue() const { return m_CUTransquantBypassFlagForce; } - void setCUTransquantBypassFlagForceValue(bool flag) { m_CUTransquantBypassFlagForce = flag; } CostMode getCostMode( ) const { return m_costMode; } void setCostMode(CostMode m ) { m_costMode = m; } -#if HEVC_VPS void setVPS(VPS *p) { m_cVPS = *p; } VPS * getVPS() { return &m_cVPS; } -#endif + void setDPS(DPS *p) { m_dps = *p; } + DPS* getDPS() { return &m_dps; } void setUseRecalculateQPAccordingToLambda (bool b) { m_recalculateQPAccordingToLambda = b; } bool getUseRecalculateQPAccordingToLambda () { return m_recalculateQPAccordingToLambda; } -#if HEVC_USE_INTRA_SMOOTHING_T32 || HEVC_USE_INTRA_SMOOTHING_T64 - void setUseStrongIntraSmoothing ( bool b ) { m_useStrongIntraSmoothing = b; } - bool getUseStrongIntraSmoothing () { return m_useStrongIntraSmoothing; } - -#endif void setEfficientFieldIRAPEnabled( bool b ) { m_bEfficientFieldIRAPEnabled = b; } bool getEfficientFieldIRAPEnabled( ) const { return m_bEfficientFieldIRAPEnabled; } void setHarmonizeGopFirstFieldCoupleEnabled( bool b ) { m_bHarmonizeGopFirstFieldCoupleEnabled = b; } bool getHarmonizeGopFirstFieldCoupleEnabled( ) const { return m_bHarmonizeGopFirstFieldCoupleEnabled; } +#if HEVC_SEI void setActiveParameterSetsSEIEnabled ( int b ) { m_activeParameterSetsSEIEnabled = b; } int getActiveParameterSetsSEIEnabled () { return m_activeParameterSetsSEIEnabled; } +#endif + + bool getDecodingParameterSetEnabled() { return m_decodingParameterSetEnabled; } + void setDecodingParameterSetEnabled(bool i) { m_decodingParameterSetEnabled = i; } + bool getHrdParametersPresentFlag() { return m_hrdParametersPresentFlag; } + void setHrdParametersPresentFlag(bool i) { m_hrdParametersPresentFlag = i; } bool getVuiParametersPresentFlag() { return m_vuiParametersPresentFlag; } void setVuiParametersPresentFlag(bool i) { m_vuiParametersPresentFlag = i; } bool getAspectRatioInfoPresentFlag() { return m_aspectRatioInfoPresentFlag; } @@ -1321,16 +1684,6 @@ public: void setSarWidth(int i) { m_sarWidth = i; } int getSarHeight() { return m_sarHeight; } void setSarHeight(int i) { m_sarHeight = i; } - bool getOverscanInfoPresentFlag() { return m_overscanInfoPresentFlag; } - void setOverscanInfoPresentFlag(bool i) { m_overscanInfoPresentFlag = i; } - bool getOverscanAppropriateFlag() { return m_overscanAppropriateFlag; } - void setOverscanAppropriateFlag(bool i) { m_overscanAppropriateFlag = i; } - bool getVideoSignalTypePresentFlag() { return m_videoSignalTypePresentFlag; } - void setVideoSignalTypePresentFlag(bool i) { m_videoSignalTypePresentFlag = i; } - int getVideoFormat() { return m_videoFormat; } - void setVideoFormat(int i) { m_videoFormat = i; } - bool getVideoFullRangeFlag() { return m_videoFullRangeFlag; } - void setVideoFullRangeFlag(bool i) { m_videoFullRangeFlag = i; } bool getColourDescriptionPresentFlag() { return m_colourDescriptionPresentFlag; } void setColourDescriptionPresentFlag(bool i) { m_colourDescriptionPresentFlag = i; } int getColourPrimaries() { return m_colourPrimaries; } @@ -1345,34 +1698,14 @@ public: void setChromaSampleLocTypeTopField(int i) { m_chromaSampleLocTypeTopField = i; } int getChromaSampleLocTypeBottomField() { return m_chromaSampleLocTypeBottomField; } void setChromaSampleLocTypeBottomField(int i) { m_chromaSampleLocTypeBottomField = i; } - bool getNeutralChromaIndicationFlag() { return m_neutralChromaIndicationFlag; } - void setNeutralChromaIndicationFlag(bool i) { m_neutralChromaIndicationFlag = i; } - Window &getDefaultDisplayWindow() { return m_defaultDisplayWindow; } - void setDefaultDisplayWindow (int offsetLeft, int offsetRight, int offsetTop, int offsetBottom ) { m_defaultDisplayWindow.setWindow (offsetLeft, offsetRight, offsetTop, offsetBottom); } - bool getFrameFieldInfoPresentFlag() { return m_frameFieldInfoPresentFlag; } - void setFrameFieldInfoPresentFlag(bool i) { m_frameFieldInfoPresentFlag = i; } - bool getPocProportionalToTimingFlag() { return m_pocProportionalToTimingFlag; } - void setPocProportionalToTimingFlag(bool x) { m_pocProportionalToTimingFlag = x; } - int getNumTicksPocDiffOneMinus1() { return m_numTicksPocDiffOneMinus1; } - void setNumTicksPocDiffOneMinus1(int x) { m_numTicksPocDiffOneMinus1 = x; } - bool getBitstreamRestrictionFlag() { return m_bitstreamRestrictionFlag; } - void setBitstreamRestrictionFlag(bool i) { m_bitstreamRestrictionFlag = i; } -#if HEVC_TILES_WPP - bool getTilesFixedStructureFlag() { return m_tilesFixedStructureFlag; } - void setTilesFixedStructureFlag(bool i) { m_tilesFixedStructureFlag = i; } -#endif - bool getMotionVectorsOverPicBoundariesFlag() { return m_motionVectorsOverPicBoundariesFlag; } - void setMotionVectorsOverPicBoundariesFlag(bool i) { m_motionVectorsOverPicBoundariesFlag = i; } - int getMinSpatialSegmentationIdc() { return m_minSpatialSegmentationIdc; } - void setMinSpatialSegmentationIdc(int i) { m_minSpatialSegmentationIdc = i; } - int getMaxBytesPerPicDenom() { return m_maxBytesPerPicDenom; } - void setMaxBytesPerPicDenom(int i) { m_maxBytesPerPicDenom = i; } - int getMaxBitsPerMinCuDenom() { return m_maxBitsPerMinCuDenom; } - void setMaxBitsPerMinCuDenom(int i) { m_maxBitsPerMinCuDenom = i; } - int getLog2MaxMvLengthHorizontal() { return m_log2MaxMvLengthHorizontal; } - void setLog2MaxMvLengthHorizontal(int i) { m_log2MaxMvLengthHorizontal = i; } - int getLog2MaxMvLengthVertical() { return m_log2MaxMvLengthVertical; } - void setLog2MaxMvLengthVertical(int i) { m_log2MaxMvLengthVertical = i; } + int getChromaSampleLocType() { return m_chromaSampleLocType; } + void setChromaSampleLocType(int i) { m_chromaSampleLocType = i; } + bool getOverscanInfoPresentFlag() { return m_overscanInfoPresentFlag; } + void setOverscanInfoPresentFlag(bool i) { m_overscanInfoPresentFlag = i; } + bool getOverscanAppropriateFlag() { return m_overscanAppropriateFlag; } + void setOverscanAppropriateFlag(bool i) { m_overscanAppropriateFlag = i; } + bool getVideoFullRangeFlag() { return m_videoFullRangeFlag; } + void setVideoFullRangeFlag(bool i) { m_videoFullRangeFlag = i; } bool getProgressiveSourceFlag() const { return m_progressiveSourceFlag; } void setProgressiveSourceFlag(bool b) { m_progressiveSourceFlag = b; } @@ -1386,27 +1719,19 @@ public: bool getFrameOnlyConstraintFlag() const { return m_frameOnlyConstraintFlag; } void setFrameOnlyConstraintFlag(bool b) { m_frameOnlyConstraintFlag = b; } - uint32_t getBitDepthConstraintValue() const { return m_bitDepthConstraintValue; } - void setBitDepthConstraintValue(uint32_t v) { m_bitDepthConstraintValue=v; } - - ChromaFormat getChromaFormatConstraintValue() const { return m_chromaFormatConstraintValue; } - void setChromaFormatConstraintValue(ChromaFormat v) { m_chromaFormatConstraintValue=v; } bool getIntraConstraintFlag() const { return m_intraConstraintFlag; } void setIntraConstraintFlag(bool b) { m_intraConstraintFlag=b; } - bool getOnePictureOnlyConstraintFlag() const { return m_onePictureOnlyConstraintFlag; } - void setOnePictureOnlyConstraintFlag(bool b) { m_onePictureOnlyConstraintFlag=b; } - - bool getLowerBitRateConstraintFlag() const { return m_lowerBitRateConstraintFlag; } - void setLowerBitRateConstraintFlag(bool b) { m_lowerBitRateConstraintFlag=b; } +#if HEVC_SEI bool getChromaResamplingFilterHintEnabled() { return m_chromaResamplingFilterHintEnabled;} void setChromaResamplingFilterHintEnabled(bool i) { m_chromaResamplingFilterHintEnabled = i;} int getChromaResamplingHorFilterIdc() { return m_chromaResamplingHorFilterIdc;} void setChromaResamplingHorFilterIdc(int i) { m_chromaResamplingHorFilterIdc = i;} int getChromaResamplingVerFilterIdc() { return m_chromaResamplingVerFilterIdc;} void setChromaResamplingVerFilterIdc(int i) { m_chromaResamplingVerFilterIdc = i;} +#endif void setSummaryOutFilename(const std::string &s) { m_summaryOutFilename = s; } const std::string& getSummaryOutFilename() const { return m_summaryOutFilename; } @@ -1443,16 +1768,45 @@ public: void setForceSingleSplitThread( bool b ) { m_forceSingleSplitThread = b; } int getForceSingleSplitThread() const { return m_forceSingleSplitThread; } #endif -#if ENABLE_WPP_PARALLELISM - void setNumWppThreads( int n ) { m_numWppThreads = n; } - int getNumWppThreads() const { return m_numWppThreads; } - void setNumWppExtraLines( int n ) { m_numWppExtraLines = n; } - int getNumWppExtraLines() const { return m_numWppExtraLines; } - void setEnsureWppBitEqual( bool b) { m_ensureWppBitEqual = b; } - bool getEnsureWppBitEqual() const { return m_ensureWppBitEqual; } + void setUseALF( bool b ) { m_alf = b; } + bool getUseALF() const { return m_alf; } + +#if JVET_O0756_CALCULATE_HDRMETRICS + void setWhitePointDeltaE( uint32_t index, double value ) { m_whitePointDeltaE[ index ] = value; } + double getWhitePointDeltaE( uint32_t index ) const { return m_whitePointDeltaE[ index ]; } + void setMaxSampleValue(double value) { m_maxSampleValue = value;} + double getMaxSampleValue() const { return m_maxSampleValue;} + void setSampleRange(int value) { m_sampleRange = static_cast<hdrtoolslib::SampleRange>(value);} + hdrtoolslib::SampleRange getSampleRange() const { return m_sampleRange;} + void setColorPrimaries(int value) { m_colorPrimaries = static_cast<hdrtoolslib::ColorPrimaries>(value);} + hdrtoolslib::ColorPrimaries getColorPrimaries() const { return m_colorPrimaries;} + void setEnableTFunctionLUT(bool value) { m_enableTFunctionLUT = value;} + bool getEnableTFunctionLUT() const { return m_enableTFunctionLUT;} + void setChromaLocation(uint32_t index, int value) { m_chromaLocation[ index ] = static_cast<hdrtoolslib::ChromaLocation>(value);} + hdrtoolslib::ChromaLocation getChromaLocation(uint32_t index) const { return m_chromaLocation[index];} + void setChromaUPFilter(int value) { m_chromaUPFilter = value;} + int getChromaUPFilter() const { return m_chromaUPFilter;} + void setCropOffsetLeft(int value) { m_cropOffsetLeft = value;} + int getCropOffsetLeft() const { return m_cropOffsetLeft;} + void setCropOffsetTop(int value) { m_cropOffsetTop = value;} + int getCropOffsetTop() const { return m_cropOffsetTop;} + void setCropOffsetRight(int value) { m_cropOffsetRight = value;} + int getCropOffsetRight() const { return m_cropOffsetRight;} + void setCropOffsetBottom(int value) { m_cropOffsetBottom = value;} + int getCropOffsetBottom() const { return m_cropOffsetBottom;} + void setCalculateHdrMetrics(bool value) { m_calculateHdrMetrics = value;} + bool getCalcluateHdrMetrics() const { return m_calculateHdrMetrics;} #endif - void setUseALF( bool b ) { m_alf = b; } - bool getUseALF() const { return m_alf; } + + void setScalingRatio( double hor, double ver ) { m_scalingRatioHor = hor, m_scalingRatioVer = ver; } + void setRPREnabled( bool b ) { m_rprEnabled = b; } + bool isRPREnabled() const { return m_rprEnabled; } + void setSwitchPocPeriod( int p ) { m_switchPocPeriod = p;} + void setUpscaledOutput( int b ) { m_upscaledOutput = b; } + int getUpscaledOutput() const { return m_upscaledOutput; } + + void setNumRefLayers( int* numRefLayers ) { std::memcpy( m_numRefLayers, numRefLayers, sizeof( m_numRefLayers ) ); } + int getNumRefLayers( int layerIdx ) const { return m_numRefLayers[layerIdx]; } }; //! \} diff --git a/source/Lib/EncoderLib/EncCu.cpp b/source/Lib/EncoderLib/EncCu.cpp index 51f0e938634470972ecb2abdd9c31613eb26202e..c5775ca715aa414737912446f599ac95f7df8359 100644 --- a/source/Lib/EncoderLib/EncCu.cpp +++ b/source/Lib/EncoderLib/EncCu.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -52,10 +52,6 @@ #include <stdio.h> #include <cmath> #include <algorithm> -#if ENABLE_WPP_PARALLELISM -#include <mutex> -extern std::recursive_mutex g_cache_mutex; -#endif @@ -63,9 +59,7 @@ extern std::recursive_mutex g_cache_mutex; //! \{ // ==================================================================================================================== -// Constructor / destructor / create / destroy -// ==================================================================================================================== -const TriangleMotionInfo EncCu::m_triangleModeTest[TRIANGLE_MAX_NUM_CANDS] = +EncCu::EncCu() : m_triangleModeTest { TriangleMotionInfo( 0, 1, 0 ), TriangleMotionInfo( 1, 0, 1 ), TriangleMotionInfo( 1, 0, 2 ), TriangleMotionInfo( 0, 0, 1 ), TriangleMotionInfo( 0, 2, 0 ), TriangleMotionInfo( 1, 0, 3 ), TriangleMotionInfo( 1, 0, 4 ), TriangleMotionInfo( 1, 1, 0 ), TriangleMotionInfo( 0, 3, 0 ), TriangleMotionInfo( 0, 4, 0 ), @@ -75,7 +69,8 @@ const TriangleMotionInfo EncCu::m_triangleModeTest[TRIANGLE_MAX_NUM_CANDS] = TriangleMotionInfo( 1, 3, 4 ), TriangleMotionInfo( 1, 4, 0 ), TriangleMotionInfo( 1, 3, 1 ), TriangleMotionInfo( 1, 2, 3 ), TriangleMotionInfo( 1, 4, 1 ), TriangleMotionInfo( 0, 4, 1 ), TriangleMotionInfo( 0, 2, 3 ), TriangleMotionInfo( 1, 4, 2 ), TriangleMotionInfo( 0, 3, 2 ), TriangleMotionInfo( 1, 4, 3 ), TriangleMotionInfo( 0, 3, 1 ), TriangleMotionInfo( 0, 2, 4 ), TriangleMotionInfo( 1, 2, 4 ), TriangleMotionInfo( 0, 4, 2 ), TriangleMotionInfo( 0, 3, 4 ), -}; +} +{} void EncCu::create( EncCfg* encCfg ) { @@ -87,11 +82,15 @@ void EncCu::create( EncCfg* encCfg ) unsigned numHeights = gp_sizeIdxInfo->numHeights(); m_pTempCS = new CodingStructure** [numWidths]; m_pBestCS = new CodingStructure** [numWidths]; + m_pTempCS2 = new CodingStructure** [numWidths]; + m_pBestCS2 = new CodingStructure** [numWidths]; for( unsigned w = 0; w < numWidths; w++ ) { m_pTempCS[w] = new CodingStructure* [numHeights]; m_pBestCS[w] = new CodingStructure* [numHeights]; + m_pTempCS2[w] = new CodingStructure* [numHeights]; + m_pBestCS2[w] = new CodingStructure* [numHeights]; for( unsigned h = 0; h < numHeights; h++ ) { @@ -103,13 +102,21 @@ void EncCu::create( EncCfg* encCfg ) m_pTempCS[w][h] = new CodingStructure( m_unitCache.cuCache, m_unitCache.puCache, m_unitCache.tuCache ); m_pBestCS[w][h] = new CodingStructure( m_unitCache.cuCache, m_unitCache.puCache, m_unitCache.tuCache ); - m_pTempCS[w][h]->create( chromaFormat, Area( 0, 0, width, height ), false ); - m_pBestCS[w][h]->create( chromaFormat, Area( 0, 0, width, height ), false ); + m_pTempCS[w][h]->create(chromaFormat, Area(0, 0, width, height), false, (bool)encCfg->getPLTMode()); + m_pBestCS[w][h]->create(chromaFormat, Area(0, 0, width, height), false, (bool)encCfg->getPLTMode()); + + m_pTempCS2[w][h] = new CodingStructure( m_unitCache.cuCache, m_unitCache.puCache, m_unitCache.tuCache ); + m_pBestCS2[w][h] = new CodingStructure( m_unitCache.cuCache, m_unitCache.puCache, m_unitCache.tuCache ); + + m_pTempCS2[w][h]->create(chromaFormat, Area(0, 0, width, height), false, (bool)encCfg->getPLTMode()); + m_pBestCS2[w][h]->create(chromaFormat, Area(0, 0, width, height), false, (bool)encCfg->getPLTMode()); } else { m_pTempCS[w][h] = nullptr; m_pBestCS[w][h] = nullptr; + m_pTempCS2[w][h] = nullptr; + m_pBestCS2[w][h] = nullptr; } } } @@ -129,17 +136,38 @@ void EncCu::create( EncCfg* encCfg ) for (unsigned ui = 0; ui < MRG_MAX_NUM_CANDS; ui++) { m_acRealMergeBuffer[ui].create(chromaFormat, Area(0, 0, uiMaxWidth, uiMaxHeight)); + m_acMergeTmpBuffer[ui].create(chromaFormat, Area(0, 0, uiMaxWidth, uiMaxHeight)); } - for( unsigned ui = 0; ui < TRIANGLE_MAX_NUM_UNI_CANDS; ui++ ) + const unsigned maxNumTriangleCand = encCfg->getMaxNumTriangleCand(); + for (unsigned i = 0; i < maxNumTriangleCand; i++) { - for( unsigned uj = 0; uj < TRIANGLE_MAX_NUM_UNI_CANDS; uj++ ) + for (unsigned j = 0; j < maxNumTriangleCand; j++) { - if(ui == uj) + if (i == j) continue; - uint8_t idxBits0 = ui + (ui == TRIANGLE_MAX_NUM_UNI_CANDS - 1 ? 0 : 1); - uint8_t candIdx1Enc = uj - (uj > ui ? 1 : 0); - uint8_t idxBits1 = candIdx1Enc + (candIdx1Enc == TRIANGLE_MAX_NUM_UNI_CANDS - 2 ? 0 : 1); - m_triangleIdxBins[1][ui][uj] = m_triangleIdxBins[0][ui][uj] = 1 + idxBits0 + idxBits1; + uint8_t idxBits0 = i + (i == maxNumTriangleCand - 1 ? 0 : 1); + uint8_t candIdx1Enc = j - (j > i ? 1 : 0); + uint8_t idxBits1 = candIdx1Enc + (candIdx1Enc == maxNumTriangleCand - 2 ? 0 : 1); + m_triangleIdxBins[1][i][j] = m_triangleIdxBins[0][i][j] = 1 + idxBits0 + idxBits1; + } + } + if (maxNumTriangleCand != 5) + { + // update the table + int index = 0; + for (unsigned i = 0; i < maxNumTriangleCand; i++) + { + for (unsigned j = 0; j < maxNumTriangleCand; j++) + { + if (i == j) + continue; + for (unsigned dir = 0; dir < 2; dir++, index++) + { + m_triangleModeTest[index].m_splitDir = dir; + m_triangleModeTest[index].m_candIdx0 = i; + m_triangleModeTest[index].m_candIdx1 = j; + } + } } } for( unsigned ui = 0; ui < TRIANGLE_MAX_NUM_CANDS; ui++ ) @@ -166,14 +194,24 @@ void EncCu::destroy() delete m_pBestCS[w][h]; delete m_pTempCS[w][h]; + + if( m_pBestCS2[w][h] ) m_pBestCS2[w][h]->destroy(); + if( m_pTempCS2[w][h] ) m_pTempCS2[w][h]->destroy(); + + delete m_pBestCS2[w][h]; + delete m_pTempCS2[w][h]; } delete[] m_pTempCS[w]; delete[] m_pBestCS[w]; + delete[] m_pTempCS2[w]; + delete[] m_pBestCS2[w]; } delete[] m_pBestCS; m_pBestCS = nullptr; delete[] m_pTempCS; m_pTempCS = nullptr; + delete[] m_pBestCS2; m_pBestCS2 = nullptr; + delete[] m_pTempCS2; m_pTempCS2 = nullptr; #if REUSE_CU_RESULTS if (m_tmpStorageLCU) @@ -197,6 +235,7 @@ void EncCu::destroy() for (unsigned ui = 0; ui < MRG_MAX_NUM_CANDS; ui++) { m_acRealMergeBuffer[ui].destroy(); + m_acMergeTmpBuffer[ui].destroy(); } for( unsigned ui = 0; ui < TRIANGLE_MAX_NUM_CANDS; ui++ ) { @@ -226,31 +265,20 @@ void EncCu::init( EncLib* pcEncLib, const SPS& sps PARL_PARAM( const int tId ) ) m_CtxCache = pcEncLib->getCtxCache( PARL_PARAM0( tId ) ); m_pcRateCtrl = pcEncLib->getRateCtrl(); m_pcSliceEncoder = pcEncLib->getSliceEncoder(); -#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM +#if ENABLE_SPLIT_PARALLELISM m_pcEncLib = pcEncLib; m_dataId = tId; #endif m_pcLoopFilter = pcEncLib->getLoopFilter(); - m_shareState = NO_SHARE; - m_pcInterSearch->setShareState(0); - setShareStateDec(0); - - m_shareBndPosX = -1; - m_shareBndPosY = -1; - m_shareBndSizeW = 0; - m_shareBndSizeH = 0; DecCu::init( m_pcTrQuant, m_pcIntraSearch, m_pcInterSearch ); m_modeCtrl->init( m_pcEncCfg, m_pcRateCtrl, m_pcRdCost ); m_pcInterSearch->setModeCtrl( m_modeCtrl ); + m_modeCtrl->setInterSearch(m_pcInterSearch); m_pcIntraSearch->setModeCtrl( m_modeCtrl ); - if ( ( m_pcEncCfg->getIBCHashSearch() && m_pcEncCfg->getIBCMode() ) || m_pcEncCfg->getAllowDisFracMMVD() ) - { - m_ibcHashMap.init(m_pcEncCfg->getSourceWidth(), m_pcEncCfg->getSourceHeight()); - } } // ==================================================================================================================== @@ -260,7 +288,9 @@ void EncCu::init( EncLib* pcEncLib, const SPS& sps PARL_PARAM( const int tId ) ) void EncCu::compressCtu( CodingStructure& cs, const UnitArea& area, const unsigned ctuRsAddr, const int prevQP[], const int currQP[] ) { m_modeCtrl->initCTUEncoding( *cs.slice ); + cs.treeType = TREE_D; + cs.slice->m_mapPltCost.clear(); #if ENABLE_SPLIT_PARALLELISM if( m_pcEncCfg->getNumSplitThreads() > 1 ) { @@ -295,8 +325,8 @@ void EncCu::compressCtu( CodingStructure& cs, const UnitArea& area, const unsign if( auto* cacheCtrl = dynamic_cast<CacheBlkInfoCtrl*>( m_modeCtrl ) ) { cacheCtrl->tick(); } #endif // init the partitioning manager - Partitioner *partitioner = PartitionerFactory::get( *cs.slice ); - partitioner->initCtu( area, CH_L, *cs.slice ); + QTBTPartitioner partitioner; + partitioner.initCtu(area, CH_L, *cs.slice); if (m_pcEncCfg->getIBCMode()) { if (area.lx() == 0 && area.ly() == 0) @@ -327,34 +357,36 @@ void EncCu::compressCtu( CodingStructure& cs, const UnitArea& area, const unsign CodingStructure *tempCS = m_pTempCS[gp_sizeIdxInfo->idxFrom( area.lumaSize().width )][gp_sizeIdxInfo->idxFrom( area.lumaSize().height )]; CodingStructure *bestCS = m_pBestCS[gp_sizeIdxInfo->idxFrom( area.lumaSize().width )][gp_sizeIdxInfo->idxFrom( area.lumaSize().height )]; - cs.initSubStructure( *tempCS, partitioner->chType, partitioner->currArea(), false ); - cs.initSubStructure( *bestCS, partitioner->chType, partitioner->currArea(), false ); + cs.initSubStructure(*tempCS, partitioner.chType, partitioner.currArea(), false); + cs.initSubStructure(*bestCS, partitioner.chType, partitioner.currArea(), false); tempCS->currQP[CH_L] = bestCS->currQP[CH_L] = tempCS->baseQP = bestCS->baseQP = currQP[CH_L]; tempCS->prevQP[CH_L] = bestCS->prevQP[CH_L] = prevQP[CH_L]; - xCompressCU( tempCS, bestCS, *partitioner ); - + xCompressCU(tempCS, bestCS, partitioner); + cs.slice->m_mapPltCost.clear(); // all signals were already copied during compression if the CTU was split - at this point only the structures are copied to the top level CS const bool copyUnsplitCTUSignals = bestCS->cus.size() == 1; - cs.useSubStructure( *bestCS, partitioner->chType, CS::getArea( *bestCS, area, partitioner->chType ), copyUnsplitCTUSignals, false, false, copyUnsplitCTUSignals ); + cs.useSubStructure(*bestCS, partitioner.chType, CS::getArea(*bestCS, area, partitioner.chType), copyUnsplitCTUSignals, + false, false, copyUnsplitCTUSignals); if (CS::isDualITree (cs) && isChromaEnabled (cs.pcv->chrFormat)) { m_CABACEstimator->getCtx() = m_CurrCtx->start; - partitioner->initCtu( area, CH_C, *cs.slice ); + partitioner.initCtu(area, CH_C, *cs.slice); - cs.initSubStructure( *tempCS, partitioner->chType, partitioner->currArea(), false ); - cs.initSubStructure( *bestCS, partitioner->chType, partitioner->currArea(), false ); + cs.initSubStructure(*tempCS, partitioner.chType, partitioner.currArea(), false); + cs.initSubStructure(*bestCS, partitioner.chType, partitioner.currArea(), false); tempCS->currQP[CH_C] = bestCS->currQP[CH_C] = tempCS->baseQP = bestCS->baseQP = currQP[CH_C]; tempCS->prevQP[CH_C] = bestCS->prevQP[CH_C] = prevQP[CH_C]; - xCompressCU( tempCS, bestCS, *partitioner ); + xCompressCU(tempCS, bestCS, partitioner); const bool copyUnsplitCTUSignals = bestCS->cus.size() == 1; - cs.useSubStructure( *bestCS, partitioner->chType, CS::getArea( *bestCS, area, partitioner->chType ), copyUnsplitCTUSignals, false, false, copyUnsplitCTUSignals ); + cs.useSubStructure(*bestCS, partitioner.chType, CS::getArea(*bestCS, area, partitioner.chType), + copyUnsplitCTUSignals, false, false, copyUnsplitCTUSignals); } if (m_pcEncCfg->getUseRateCtrl()) @@ -364,14 +396,7 @@ void EncCu::compressCtu( CodingStructure& cs, const UnitArea& area, const unsign // reset context states and uninit context pointer m_CABACEstimator->getCtx() = m_CurrCtx->start; m_CurrCtx = 0; - delete partitioner; -#if ENABLE_SPLIT_PARALLELISM && ENABLE_WPP_PARALLELISM - if( m_pcEncCfg->getNumSplitThreads() > 1 && m_pcEncCfg->getNumWppThreads() > 1 ) - { - cs.picture->finishCtuPart( area ); - } -#endif // Ensure that a coding was found // Selected mode's RD-cost must be not MAX_DOUBLE. @@ -517,16 +542,6 @@ bool EncCu::xCheckBestMode( CodingStructure *&tempCS, CodingStructure *&bestCS, if( m_modeCtrl->useModeResult( encTestMode, tempCS, partitioner ) ) { - if( tempCS->cus.size() == 1 ) - { - // if tempCS is not a split-mode - CodingUnit &cu = *tempCS->cus.front(); - - if( CU::isLosslessCoded( cu ) && !cu.ipcm ) - { - xFillPCMBuffer( cu ); - } - } std::swap( tempCS, bestCS ); // store temp best CI for next CU coding @@ -542,17 +557,9 @@ bool EncCu::xCheckBestMode( CodingStructure *&tempCS, CodingStructure *&bestCS, } -void EncCu::xCompressCU( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner ) +void EncCu::xCompressCU( CodingStructure*& tempCS, CodingStructure*& bestCS, Partitioner& partitioner, double maxCostAllowed ) { - if (m_shareState == NO_SHARE) - { - tempCS->sharedBndPos = tempCS->area.Y().lumaPos(); - tempCS->sharedBndSize.width = tempCS->area.lwidth(); - tempCS->sharedBndSize.height = tempCS->area.lheight(); - bestCS->sharedBndPos = bestCS->area.Y().lumaPos(); - bestCS->sharedBndSize.width = bestCS->area.lwidth(); - bestCS->sharedBndSize.height = bestCS->area.lheight(); - } + CHECK(maxCostAllowed < 0, "Wrong value of maxCostAllowed!"); #if ENABLE_SPLIT_PARALLELISM CHECK( m_dataId != tempCS->picture->scheduler.getDataId(), "Working in the wrong dataId!" ); @@ -567,6 +574,40 @@ void EncCu::xCompressCU( CodingStructure *&tempCS, CodingStructure *&bestCS, Par } #endif + uint32_t compBegin; + uint32_t numComp; + bool jointPLT = false; + if (partitioner.isSepTree( *tempCS )) + { + if (isLuma(partitioner.chType)) + { + compBegin = COMPONENT_Y; + numComp = 1; + } + else + { + compBegin = COMPONENT_Cb; + numComp = 2; + } + } + else + { + compBegin = COMPONENT_Y; + numComp = 3; + jointPLT = true; + } + SplitSeries splitmode = -1; + uint8_t bestLastPLTSize[MAX_NUM_CHANNEL_TYPE]; + Pel bestLastPLT[MAX_NUM_COMPONENT][MAXPLTPREDSIZE]; // store LastPLT for + uint8_t curLastPLTSize[MAX_NUM_CHANNEL_TYPE]; + Pel curLastPLT[MAX_NUM_COMPONENT][MAXPLTPREDSIZE]; // store LastPLT if no partition + for (int i = compBegin; i < (compBegin + numComp); i++) + { + ComponentID comID = jointPLT ? (ComponentID)compBegin : ((i > 0) ? COMPONENT_Cb : COMPONENT_Y); + bestLastPLTSize[comID] = 0; + curLastPLTSize[comID] = tempCS->prevPLT.curPLTSize[comID]; + memcpy(curLastPLT[i], tempCS->prevPLT.curPLT[i], tempCS->prevPLT.curPLTSize[comID] * sizeof(Pel)); + } Slice& slice = *tempCS->slice; const PPS &pps = *tempCS->pps; @@ -574,15 +615,16 @@ void EncCu::xCompressCU( CodingStructure *&tempCS, CodingStructure *&bestCS, Par const uint32_t uiLPelX = tempCS->area.Y().lumaPos().x; const uint32_t uiTPelY = tempCS->area.Y().lumaPos().y; + const ModeType modeTypeParent = partitioner.modeType; + const TreeType treeTypeParent = partitioner.treeType; + const ChannelType chTypeParent = partitioner.chType; const UnitArea currCsArea = clipArea( CS::getArea( *bestCS, bestCS->area, partitioner.chType ), *tempCS->picture ); - tempCS->chType = partitioner.chType; - bestCS->chType = partitioner.chType; m_modeCtrl->initCULevel( partitioner, *tempCS ); if( partitioner.currQtDepth == 0 && partitioner.currMtDepth == 0 && !tempCS->slice->isIntra() && ( sps.getUseSBT() || sps.getUseInterMTS() ) ) { auto slsSbt = dynamic_cast<SaveLoadEncInfoSbt*>( m_modeCtrl ); - int maxSLSize = sps.getUseSBT() ? tempCS->slice->getSPS()->getMaxSbtSize() : MTS_INTER_MAX_CU_SIZE; + int maxSLSize = sps.getUseSBT() ? tempCS->slice->getSPS()->getMaxTbSize() : MTS_INTER_MAX_CU_SIZE; slsSbt->resetSaveloadSbt( maxSLSize ); #if ENABLE_SPLIT_PARALLELISM CHECK( tempCS->picture->scheduler.getSplitJobId() != 0, "The SBT search reset need to happen in sequential region." ); @@ -606,8 +648,8 @@ void EncCu::xCompressCU( CodingStructure *&tempCS, CodingStructure *&bestCS, Par { // TODO M0133 : double check encoder decisions with respect to chroma QG detection and actual encode int lgMinCuSize = sps.getLog2MinCodingBlockSize() + - std::max<int>( 0, sps.getLog2DiffMaxMinCodingBlockSize() - int( pps.getPpsRangeExtension().getCuChromaQpOffsetSubdiv()/2 ) ); - m_cuChromaQpOffsetIdxPlus1 = ( ( uiLPelX >> lgMinCuSize ) + ( uiTPelY >> lgMinCuSize ) ) % ( pps.getPpsRangeExtension().getChromaQpOffsetListLen() + 1 ); + std::max<int>( 0, sps.getLog2DiffMaxMinCodingBlockSize() - int( slice.getCuChromaQpOffsetSubdiv()/2 ) ); + m_cuChromaQpOffsetIdxPlus1 = ( ( uiLPelX >> lgMinCuSize ) + ( uiTPelY >> lgMinCuSize ) ) % ( pps.getChromaQpOffsetListLen() + 1 ); } if( !m_modeCtrl->anyMode() ) @@ -623,14 +665,45 @@ void EncCu::xCompressCU( CodingStructure *&tempCS, CodingStructure *&bestCS, Par DTRACE( g_trace_ctx, D_COMMON, "@(%4d,%4d) [%2dx%2d]\n", tempCS->area.lx(), tempCS->area.ly(), tempCS->area.lwidth(), tempCS->area.lheight() ); - int startShareThisLevel = 0; m_pcInterSearch->resetSavedAffineMotion(); + double bestIntPelCost = MAX_DOUBLE; + + if (tempCS->slice->getSPS()->getUseColorTrans()) + { + tempCS->tmpColorSpaceCost = MAX_DOUBLE; + bestCS->tmpColorSpaceCost = MAX_DOUBLE; + tempCS->firstColorSpaceSelected = true; + bestCS->firstColorSpaceSelected = true; + } + + if (tempCS->slice->getSPS()->getUseColorTrans() && !CS::isDualITree(*tempCS)) + { + tempCS->firstColorSpaceTestOnly = false; + bestCS->firstColorSpaceTestOnly = false; + tempCS->tmpColorSpaceIntraCost[0] = MAX_DOUBLE; + tempCS->tmpColorSpaceIntraCost[1] = MAX_DOUBLE; + bestCS->tmpColorSpaceIntraCost[0] = MAX_DOUBLE; + bestCS->tmpColorSpaceIntraCost[1] = MAX_DOUBLE; + + if (tempCS->bestParent && tempCS->bestParent->firstColorSpaceTestOnly) + { + tempCS->firstColorSpaceTestOnly = bestCS->firstColorSpaceTestOnly = true; + } + } + do { + for (int i = compBegin; i < (compBegin + numComp); i++) + { + ComponentID comID = jointPLT ? (ComponentID)compBegin : ((i > 0) ? COMPONENT_Cb : COMPONENT_Y); + tempCS->prevPLT.curPLTSize[comID] = curLastPLTSize[comID]; + memcpy(tempCS->prevPLT.curPLT[i], curLastPLT[i], curLastPLTSize[comID] * sizeof(Pel)); + } EncTestMode currTestMode = m_modeCtrl->currTestMode(); + currTestMode.maxCostAllowed = maxCostAllowed; - if (pps.getUseDQP() && CS::isDualITree(*tempCS) && isChroma(partitioner.chType)) + if (pps.getUseDQP() && partitioner.isSepTree(*tempCS) && isChroma( partitioner.chType )) { const Position chromaCentral(tempCS->area.Cb().chromaPos().offset(tempCS->area.Cb().chromaSize().width >> 1, tempCS->area.Cb().chromaSize().height >> 1)); const Position lumaRefPos(chromaCentral.x << getComponentScaleX(COMPONENT_Cb, tempCS->area.chromaFormat), chromaCentral.y << getComponentScaleY(COMPONENT_Cb, tempCS->area.chromaFormat)); @@ -660,7 +733,11 @@ void EncCu::xCompressCU( CodingStructure *&tempCS, CodingStructure *&bestCS, Par #endif if (currTestMode.qp >= 0) { - updateLambda (&slice, currTestMode.qp, CS::isDualITree (*tempCS) || (partitioner.currDepth == 0)); + updateLambda (&slice, currTestMode.qp, + #if WCG_EXT && ER_CHROMA_QP_WCG_PPS + m_pcEncCfg->getWCGChromaQPControl().isEnabled(), + #endif + CS::isDualITree (*tempCS) || (partitioner.currDepth == 0)); } } #endif @@ -669,9 +746,13 @@ void EncCu::xCompressCU( CodingStructure *&tempCS, CodingStructure *&bestCS, Par { if( ( currTestMode.opts & ETO_IMV ) != 0 ) { - tempCS->bestCS = bestCS; - xCheckRDCostInterIMV( tempCS, bestCS, partitioner, currTestMode ); - tempCS->bestCS = nullptr; + const bool skipAltHpelIF = ( int( ( currTestMode.opts & ETO_IMV ) >> ETO_IMV_SHIFT ) == 4 ) && ( bestIntPelCost > 1.25 * bestCS->cost ); + if (!skipAltHpelIF) + { + tempCS->bestCS = bestCS; + xCheckRDCostInterIMV(tempCS, bestCS, partitioner, currTestMode, bestIntPelCost); + tempCS->bestCS = nullptr; + } } else { @@ -708,11 +789,40 @@ void EncCu::xCompressCU( CodingStructure *&tempCS, CodingStructure *&bestCS, Par } else if( currTestMode.type == ETM_INTRA ) { - xCheckRDCostIntra( tempCS, bestCS, partitioner, currTestMode ); + if (slice.getSPS()->getUseColorTrans() && !CS::isDualITree(*tempCS)) + { + bool skipSecColorSpace = false; + skipSecColorSpace = xCheckRDCostIntra(tempCS, bestCS, partitioner, currTestMode, (m_pcEncCfg->getRGBFormatFlag() ? true : false)); + + if (!skipSecColorSpace && !tempCS->firstColorSpaceTestOnly) + { + xCheckRDCostIntra(tempCS, bestCS, partitioner, currTestMode, (m_pcEncCfg->getRGBFormatFlag() ? false : true)); + } + + if (!tempCS->firstColorSpaceTestOnly) + { + if (tempCS->tmpColorSpaceIntraCost[0] != MAX_DOUBLE && tempCS->tmpColorSpaceIntraCost[1] != MAX_DOUBLE) + { + double skipCostRatio = m_pcEncCfg->getRGBFormatFlag() ? 1.1 : 1.0; + if (tempCS->tmpColorSpaceIntraCost[1] > (skipCostRatio*tempCS->tmpColorSpaceIntraCost[0])) + { + tempCS->firstColorSpaceTestOnly = bestCS->firstColorSpaceTestOnly = true; + } + } + } + else + { + CHECK(tempCS->tmpColorSpaceIntraCost[1] != MAX_DOUBLE, "the RD test of the second color space should be skipped"); + } + } + else + { + xCheckRDCostIntra(tempCS, bestCS, partitioner, currTestMode, false); + } } - else if( currTestMode.type == ETM_IPCM ) + else if (currTestMode.type == ETM_PALETTE) { - xCheckIntraPCM( tempCS, bestCS, partitioner, currTestMode ); + xCheckPLT( tempCS, bestCS, partitioner, currTestMode ); } else if (currTestMode.type == ETM_IBC) { @@ -724,8 +834,78 @@ void EncCu::xCompressCU( CodingStructure *&tempCS, CodingStructure *&bestCS, Par } else if( isModeSplit( currTestMode ) ) { + if (bestCS->cus.size() != 0) + { + splitmode = bestCS->cus[0]->splitSeries; + } + assert( partitioner.modeType == tempCS->modeType ); + int signalModeConsVal = tempCS->signalModeCons( getPartSplit( currTestMode ), partitioner, modeTypeParent ); + int numRoundRdo = signalModeConsVal == LDT_MODE_TYPE_SIGNAL ? 2 : 1; + bool skipInterPass = false; + for( int i = 0; i < numRoundRdo; i++ ) + { + //change cons modes + if( signalModeConsVal == LDT_MODE_TYPE_SIGNAL ) + { + CHECK( numRoundRdo != 2, "numRoundRdo shall be 2 - [LDT_MODE_TYPE_SIGNAL]" ); + tempCS->modeType = partitioner.modeType = (i == 0) ? MODE_TYPE_INTER : MODE_TYPE_INTRA; + } + else if( signalModeConsVal == LDT_MODE_TYPE_INFER ) + { + CHECK( numRoundRdo != 1, "numRoundRdo shall be 1 - [LDT_MODE_TYPE_INFER]" ); + tempCS->modeType = partitioner.modeType = MODE_TYPE_INTRA; + } + else if( signalModeConsVal == LDT_MODE_TYPE_INHERIT ) + { + CHECK( numRoundRdo != 1, "numRoundRdo shall be 1 - [LDT_MODE_TYPE_INHERIT]" ); + tempCS->modeType = partitioner.modeType = modeTypeParent; + } - xCheckModeSplit( tempCS, bestCS, partitioner, currTestMode ); + //for lite intra encoding fast algorithm, set the status to save inter coding info + if( modeTypeParent == MODE_TYPE_ALL && tempCS->modeType == MODE_TYPE_INTER ) + { + m_pcIntraSearch->setSaveCuCostInSCIPU( true ); + m_pcIntraSearch->setNumCuInSCIPU( 0 ); + } + else if( modeTypeParent == MODE_TYPE_ALL && tempCS->modeType != MODE_TYPE_INTER ) + { + m_pcIntraSearch->setSaveCuCostInSCIPU( false ); + if( tempCS->modeType == MODE_TYPE_ALL ) + { + m_pcIntraSearch->setNumCuInSCIPU( 0 ); + } + } + + xCheckModeSplit( tempCS, bestCS, partitioner, currTestMode, modeTypeParent, skipInterPass ); + //recover cons modes + tempCS->modeType = partitioner.modeType = modeTypeParent; + tempCS->treeType = partitioner.treeType = treeTypeParent; + partitioner.chType = chTypeParent; + if( modeTypeParent == MODE_TYPE_ALL ) + { + m_pcIntraSearch->setSaveCuCostInSCIPU( false ); + if( numRoundRdo == 2 && tempCS->modeType == MODE_TYPE_INTRA ) + { + m_pcIntraSearch->initCuAreaCostInSCIPU(); + } + } + if( skipInterPass ) + { + break; + } + } + if (splitmode != bestCS->cus[0]->splitSeries) + { + splitmode = bestCS->cus[0]->splitSeries; + const CodingUnit& cu = *bestCS->cus.front(); + cu.cs->prevPLT = bestCS->prevPLT; + for (int i = compBegin; i < (compBegin + numComp); i++) + { + ComponentID comID = jointPLT ? (ComponentID)compBegin : ((i > 0) ? COMPONENT_Cb : COMPONENT_Y); + bestLastPLTSize[comID] = bestCS->cus[0]->cs->prevPLT.curPLTSize[comID]; + memcpy(bestLastPLT[i], bestCS->cus[0]->cs->prevPLT.curPLT[i], bestCS->cus[0]->cs->prevPLT.curPLTSize[comID] * sizeof(Pel)); + } + } } else { @@ -733,12 +913,6 @@ void EncCu::xCompressCU( CodingStructure *&tempCS, CodingStructure *&bestCS, Par } } while( m_modeCtrl->nextMode( *tempCS, partitioner ) ); - if(startShareThisLevel == 1) - { - m_shareState = NO_SHARE; - m_pcInterSearch->setShareState(m_shareState); - setShareStateDec(m_shareState); - } ////////////////////////////////////////////////////////////////////////// // Finishing CU @@ -753,30 +927,47 @@ void EncCu::xCompressCU( CodingStructure *&tempCS, CodingStructure *&bestCS, Par } #endif + if( tempCS->cost == MAX_DOUBLE && bestCS->cost == MAX_DOUBLE ) + { + //although some coding modes were planned to be tried in RDO, no coding mode actually finished encoding due to early termination + //thus tempCS->cost and bestCS->cost are both MAX_DOUBLE; in this case, skip the following process for normal case + m_modeCtrl->finishCULevel( partitioner ); + return; + } + // set context states m_CABACEstimator->getCtx() = m_CurrCtx->best; // QP from last processed CU for further processing + //copy the qp of the last non-chroma CU + int numCUInThisNode = (int)bestCS->cus.size(); + if( numCUInThisNode > 1 && bestCS->cus.back()->chType == CHANNEL_TYPE_CHROMA && !CS::isDualITree( *bestCS ) ) + { + CHECK( bestCS->cus[numCUInThisNode-2]->chType != CHANNEL_TYPE_LUMA, "wrong chType" ); + bestCS->prevQP[partitioner.chType] = bestCS->cus[numCUInThisNode-2]->qp; + } + else + { bestCS->prevQP[partitioner.chType] = bestCS->cus.back()->qp; + } if ((!slice.isIntra() || slice.getSPS()->getIBCFlag()) - && bestCS->chType == CHANNEL_TYPE_LUMA + && partitioner.chType == CHANNEL_TYPE_LUMA && bestCS->cus.size() == 1 && (bestCS->cus.back()->predMode == MODE_INTER || bestCS->cus.back()->predMode == MODE_IBC) && bestCS->area.Y() == (*bestCS->cus.back()).Y() ) { const CodingUnit& cu = *bestCS->cus.front(); - const PredictionUnit& pu = *cu.firstPU; - if (!cu.affine && !cu.triangle) - { - MotionInfo mi = pu.getMotionInfo(); - mi.GBiIdx = (mi.interDir == 3) ? cu.GBiIdx : GBI_DEFAULT; - cu.cs->addMiToLut(CU::isIBC(cu) ? cu.cs->motionLut.lutIbc : cu.cs->motionLut.lut, mi); - } + bool isIbcSmallBlk = CU::isIBC(cu) && (cu.lwidth() * cu.lheight() <= 16); + CU::saveMotionInHMVP( cu, isIbcSmallBlk ); } bestCS->picture->getPredBuf(currCsArea).copyFrom(bestCS->getPredBuf(currCsArea)); bestCS->picture->getRecoBuf( currCsArea ).copyFrom( bestCS->getRecoBuf( currCsArea ) ); m_modeCtrl->finishCULevel( partitioner ); + if( m_pcIntraSearch->getSaveCuCostInSCIPU() && bestCS->cus.size() == 1 ) + { + m_pcIntraSearch->saveCuAreaCostInSCIPU( Area( partitioner.currArea().lumaPos(), partitioner.currArea().lumaSize() ), bestCS->cost ); + } #if ENABLE_SPLIT_PARALLELISM if( tempCS->picture->scheduler.getSplitJobId() == 0 && m_pcEncCfg->getNumSplitThreads() != 1 ) @@ -785,94 +976,77 @@ void EncCu::xCompressCU( CodingStructure *&tempCS, CodingStructure *&bestCS, Par } #endif - // Assert if Best prediction mode is NONE - // Selected mode's RD-cost must be not MAX_DOUBLE. - CHECK( bestCS->cus.empty() , "No possible encoding found" ); - CHECK( bestCS->cus[0]->predMode == NUMBER_OF_PREDICTION_MODES, "No possible encoding found" ); - CHECK( bestCS->cost == MAX_DOUBLE , "No possible encoding found" ); -} - -#if SHARP_LUMA_DELTA_QP || ENABLE_QPA_SUB_CTU -void EncCu::updateLambda (Slice* slice, const int dQP, const bool updateRdCostLambda) -{ -#if WCG_EXT && !ENABLE_QPA_SUB_CTU - int NumberBFrames = ( m_pcEncCfg->getGOPSize() - 1 ); - int SHIFT_QP = 12; - double dLambda_scale = 1.0 - Clip3( 0.0, 0.5, 0.05*(double)(slice->getPic()->fieldPic ? NumberBFrames/2 : NumberBFrames) ); - - int bitdepth_luma_qp_scale = 6 - * (slice->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA) - 8 - - DISTORTION_PRECISION_ADJUSTMENT(slice->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA))); - double qp_temp = (double) dQP + bitdepth_luma_qp_scale - SHIFT_QP; - - double dQPFactor = m_pcEncCfg->getGOPEntry( m_pcSliceEncoder->getGopId() ).m_QPFactor; - - if( slice->getSliceType() == I_SLICE ) + if (bestCS->cus.size() == 1) // no partition { - if( m_pcEncCfg->getIntraQpFactor() >= 0.0 /*&& m_pcEncCfg->getGOPEntry( m_pcSliceEncoder->getGopId() ).m_sliceType != I_SLICE*/ ) + CHECK(bestCS->cus[0]->tileIdx != bestCS->pps->getTileIdx(bestCS->area.lumaPos()), "Wrong tile index!"); + if (bestCS->cus[0]->predMode == MODE_PLT) { - dQPFactor = m_pcEncCfg->getIntraQpFactor(); + for (int i = compBegin; i < (compBegin + numComp); i++) + { + ComponentID comID = jointPLT ? (ComponentID)compBegin : ((i > 0) ? COMPONENT_Cb : COMPONENT_Y); + bestCS->prevPLT.curPLTSize[comID] = curLastPLTSize[comID]; + memcpy(bestCS->prevPLT.curPLT[i], curLastPLT[i], curLastPLTSize[comID] * sizeof(Pel)); + } + bestCS->reorderPrevPLT(bestCS->prevPLT, bestCS->cus[0]->curPLTSize, bestCS->cus[0]->curPLT, bestCS->cus[0]->reuseflag, compBegin, numComp, jointPLT); } else { - if( m_pcEncCfg->getLambdaFromQPEnable() ) - { - dQPFactor = 0.57; - } - else + for (int i = compBegin; i<(compBegin + numComp); i++) { - dQPFactor = 0.57*dLambda_scale; + ComponentID comID = jointPLT ? (ComponentID)compBegin : ((i > 0) ? COMPONENT_Cb : COMPONENT_Y); + bestCS->prevPLT.curPLTSize[comID] = curLastPLTSize[comID]; + memcpy(bestCS->prevPLT.curPLT[i], curLastPLT[i], bestCS->prevPLT.curPLTSize[comID] * sizeof(Pel)); } } } - else if( m_pcEncCfg->getLambdaFromQPEnable() ) + else { - dQPFactor = 0.57*dQPFactor; + for (int i = compBegin; i<(compBegin + numComp); i++) + { + ComponentID comID = jointPLT ? (ComponentID)compBegin : ((i > 0) ? COMPONENT_Cb : COMPONENT_Y); + bestCS->prevPLT.curPLTSize[comID] = bestLastPLTSize[comID]; + memcpy(bestCS->prevPLT.curPLT[i], bestLastPLT[i], bestCS->prevPLT.curPLTSize[comID] * sizeof(Pel)); + } } + const CodingUnit& cu = *bestCS->cus.front(); + cu.cs->prevPLT = bestCS->prevPLT; + // Assert if Best prediction mode is NONE + // Selected mode's RD-cost must be not MAX_DOUBLE. + CHECK( bestCS->cus.empty() , "No possible encoding found" ); + CHECK( bestCS->cus[0]->predMode == NUMBER_OF_PREDICTION_MODES, "No possible encoding found" ); + CHECK( bestCS->cost == MAX_DOUBLE , "No possible encoding found" ); +} - double dLambda = dQPFactor*pow( 2.0, qp_temp/3.0 ); - int depth = slice->getDepth(); - - if( !m_pcEncCfg->getLambdaFromQPEnable() && depth>0 ) - { - int qp_temp_slice = slice->getSliceQp() + bitdepth_luma_qp_scale - SHIFT_QP; // avoid lambda over adjustment, use slice_qp here - dLambda *= Clip3( 2.00, 4.00, (qp_temp_slice / 6.0) ); // (j == B_SLICE && p_cur_frm->layer != 0 ) - } - if( !m_pcEncCfg->getUseHADME() && slice->getSliceType( ) != I_SLICE ) +#if SHARP_LUMA_DELTA_QP || ENABLE_QPA_SUB_CTU +void EncCu::updateLambda (Slice* slice, const int dQP, + #if WCG_EXT && ER_CHROMA_QP_WCG_PPS + const bool useWCGChromaControl, + #endif + const bool updateRdCostLambda) +{ +#if WCG_EXT && ER_CHROMA_QP_WCG_PPS + if (useWCGChromaControl) { - dLambda *= 0.95; - } + const double lambda = m_pcSliceEncoder->initializeLambda (slice, m_pcSliceEncoder->getGopId(), slice->getSliceQp(), (double)dQP); + const int clippedQP = Clip3 (-slice->getSPS()->getQpBDOffset (CHANNEL_TYPE_LUMA), MAX_QP, dQP); - const int temporalId = m_pcEncCfg->getGOPEntry( m_pcSliceEncoder->getGopId() ).m_temporalId; - const std::vector<double> &intraLambdaModifiers = m_pcEncCfg->getIntraLambdaModifier(); - double lambdaModifier; - if( slice->getSliceType( ) != I_SLICE || intraLambdaModifiers.empty()) - { - lambdaModifier = m_pcEncCfg->getLambdaModifier(temporalId); - } - else - { - lambdaModifier = intraLambdaModifiers[(temporalId < intraLambdaModifiers.size()) ? temporalId : (intraLambdaModifiers.size() - 1)]; + m_pcSliceEncoder->setUpLambda (slice, lambda, clippedQP); + return; } - dLambda *= lambdaModifier; - - int qpBDoffset = slice->getSPS()->getQpBDOffset(CHANNEL_TYPE_LUMA); - int iQP = Clip3(-qpBDoffset, MAX_QP, (int)floor((double)dQP + 0.5)); - m_pcSliceEncoder->setUpLambda(slice, dLambda, iQP); - -#else +#endif int iQP = dQP; const double oldQP = (double)slice->getSliceQpBase(); #if ENABLE_QPA_SUB_CTU const double oldLambda = (m_pcEncCfg->getUsePerceptQPA() && !m_pcEncCfg->getUseRateCtrl() && slice->getPPS()->getUseDQP()) ? slice->getLambdas()[0] : - m_pcSliceEncoder->calculateLambda (slice, m_pcSliceEncoder->getGopId(), slice->getDepth(), oldQP, oldQP, iQP); + m_pcSliceEncoder->calculateLambda (slice, m_pcSliceEncoder->getGopId(), oldQP, oldQP, iQP); #else - const double oldLambda = m_pcSliceEncoder->calculateLambda (slice, m_pcSliceEncoder->getGopId(), slice->getDepth(), oldQP, oldQP, iQP); + const double oldLambda = m_pcSliceEncoder->calculateLambda (slice, m_pcSliceEncoder->getGopId(), oldQP, oldQP, iQP); #endif const double newLambda = oldLambda * pow (2.0, ((double)dQP - oldQP) / 3.0); #if RDOQ_CHROMA_LAMBDA - const double chromaLambda = newLambda / m_pcRdCost->getChromaWeight(); - const double lambdaArray[MAX_NUM_COMPONENT] = {newLambda, chromaLambda, chromaLambda}; + const double lambdaArray[MAX_NUM_COMPONENT] = {newLambda / m_pcRdCost->getDistortionWeight (COMPONENT_Y), + newLambda / m_pcRdCost->getDistortionWeight (COMPONENT_Cb), + newLambda / m_pcRdCost->getDistortionWeight (COMPONENT_Cr)}; m_pcTrQuant->setLambdas (lambdaArray); #else m_pcTrQuant->setLambda (newLambda); @@ -880,10 +1054,12 @@ void EncCu::updateLambda (Slice* slice, const int dQP, const bool updateRdCostLa if (updateRdCostLambda) { m_pcRdCost->setLambda (newLambda, slice->getSPS()->getBitDepths()); - } +#if WCG_EXT + m_pcRdCost->saveUnadjustedLambda(); #endif + } } -#endif +#endif // SHARP_LUMA_DELTA_QP || ENABLE_QPA_SUB_CTU #if ENABLE_SPLIT_PARALLELISM //#undef DEBUG_PARALLEL_TIMINGS @@ -901,35 +1077,25 @@ void EncCu::xCompressCUParallel( CodingStructure *&tempCS, CodingStructure *&bes std::fill( jobUsed, jobUsed + NUM_RESERVERD_SPLIT_JOBS, false ); const UnitArea currArea = CS::getArea( *tempCS, partitioner.currArea(), partitioner.chType ); -#if ENABLE_WPP_PARALLELISM - const int wppTId = picture->scheduler.getWppThreadId(); -#endif const bool doParallel = !m_pcEncCfg->getForceSingleSplitThread(); -#if _MSC_VER && ENABLE_WPP_PARALLELISM -#pragma omp parallel for schedule(dynamic,1) num_threads(NUM_SPLIT_THREADS_IF_MSVC) if(doParallel) -#else omp_set_num_threads( m_pcEncCfg->getNumSplitThreads() ); #pragma omp parallel for schedule(dynamic,1) if(doParallel) -#endif for( int jId = 1; jId <= numJobs; jId++ ) { // thread start -#if ENABLE_WPP_PARALLELISM - picture->scheduler.setWppThreadId( wppTId ); -#endif picture->scheduler.setSplitThreadId(); picture->scheduler.setSplitJobId( jId ); - Partitioner* jobPartitioner = PartitionerFactory::get( *tempCS->slice ); + QTBTPartitioner jobPartitioner; EncCu* jobCuEnc = m_pcEncLib->getCuEncoder( picture->scheduler.getSplitDataId( jId ) ); auto* jobBlkCache = dynamic_cast<CacheBlkInfoCtrl*>( jobCuEnc->m_modeCtrl ); #if REUSE_CU_RESULTS auto* jobBestCache = dynamic_cast<BestEncInfoCache*>( jobCuEnc->m_modeCtrl ); #endif - jobPartitioner->copyState( partitioner ); - jobCuEnc ->copyState( this, *jobPartitioner, currArea, true ); + jobPartitioner.copyState( partitioner ); + jobCuEnc ->copyState( this, jobPartitioner, currArea, true ); if( jobBlkCache ) { jobBlkCache ->tick(); } #if REUSE_CU_RESULTS @@ -941,9 +1107,7 @@ void EncCu::xCompressCUParallel( CodingStructure *&tempCS, CodingStructure *&bes jobUsed[jId] = true; - jobCuEnc->xCompressCU( jobTemp, jobBest, *jobPartitioner ); - - delete jobPartitioner; + jobCuEnc->xCompressCU( jobTemp, jobBest, jobPartitioner ); picture->scheduler.setSplitJobId( 0 ); // thread stop @@ -1044,41 +1208,35 @@ void EncCu::copyState( EncCu* other, Partitioner& partitioner, const UnitArea& c m_modeCtrl ->copyState( *other->m_modeCtrl, partitioner.currArea() ); m_pcRdCost ->copyState( *other->m_pcRdCost ); m_pcTrQuant ->copyState( *other->m_pcTrQuant ); - if( m_pcEncCfg->getReshaper() ) + if( m_pcEncCfg->getLmcs() ) { EncReshape *encReshapeThis = dynamic_cast<EncReshape*>( m_pcReshape); EncReshape *encReshapeOther = dynamic_cast<EncReshape*>(other->m_pcReshape); encReshapeThis->copyState( *encReshapeOther ); } - m_shareState = other->m_shareState; - m_shareBndPosX = other->m_shareBndPosX; - m_shareBndPosY = other->m_shareBndPosY; - m_shareBndSizeW = other->m_shareBndSizeW; - m_shareBndSizeH = other->m_shareBndSizeH; - setShareStateDec( other->getShareStateDec() ); - m_pcInterSearch->setShareState( other->m_pcInterSearch->getShareState() ); m_CABACEstimator->getCtx() = other->m_CABACEstimator->getCtx(); } #endif -void EncCu::xCheckModeSplit(CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode ) +void EncCu::xCheckModeSplit(CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode, const ModeType modeTypeParent, bool &skipInterPass ) { const int qp = encTestMode.qp; const Slice &slice = *tempCS->slice; - const bool bIsLosslessMode = false; // False at this level. Next level down may set it to true. const int oldPrevQp = tempCS->prevQP[partitioner.chType]; const auto oldMotionLut = tempCS->motionLut; #if ENABLE_QPA_SUB_CTU const PPS &pps = *tempCS->pps; const uint32_t currDepth = partitioner.currDepth; #endif + const auto oldPLT = tempCS->prevPLT; const PartSplit split = getPartSplit( encTestMode ); + const ModeType modeTypeChild = partitioner.modeType; CHECK( split == CU_DONT_SPLIT, "No proper split provided!" ); - tempCS->initStructData( qp, bIsLosslessMode ); + tempCS->initStructData( qp ); m_CABACEstimator->getCtx() = m_CurrCtx->start; @@ -1086,10 +1244,11 @@ void EncCu::xCheckModeSplit(CodingStructure *&tempCS, CodingStructure *&bestCS, const TempCtx ctxStartQt( m_CtxCache, SubCtx( Ctx::SplitQtFlag, m_CABACEstimator->getCtx() ) ); const TempCtx ctxStartHv( m_CtxCache, SubCtx( Ctx::SplitHvFlag, m_CABACEstimator->getCtx() ) ); const TempCtx ctxStart12( m_CtxCache, SubCtx( Ctx::Split12Flag, m_CABACEstimator->getCtx() ) ); - + const TempCtx ctxStartMC( m_CtxCache, SubCtx( Ctx::ModeConsFlag, m_CABACEstimator->getCtx() ) ); m_CABACEstimator->resetBits(); m_CABACEstimator->split_cu_mode( split, *tempCS, partitioner ); + m_CABACEstimator->mode_constraint( split, *tempCS, partitioner, modeTypeChild ); const double factor = ( tempCS->currQP[partitioner.chType] > 30 ? 1.1 : 1.075 ); tempCS->useDbCost = m_pcEncCfg->getUseEncDbOpt(); @@ -1101,10 +1260,10 @@ void EncCu::xCheckModeSplit(CodingStructure *&tempCS, CodingStructure *&bestCS, m_CABACEstimator->getCtx() = SubCtx( Ctx::SplitQtFlag, ctxStartQt ); m_CABACEstimator->getCtx() = SubCtx( Ctx::SplitHvFlag, ctxStartHv ); m_CABACEstimator->getCtx() = SubCtx( Ctx::Split12Flag, ctxStart12 ); - + m_CABACEstimator->getCtx() = SubCtx( Ctx::ModeConsFlag, ctxStartMC ); if (cost > bestCS->cost + bestCS->costDbOffset #if ENABLE_QPA_SUB_CTU - || (m_pcEncCfg->getUsePerceptQPA() && !m_pcEncCfg->getUseRateCtrl() && pps.getUseDQP() && (pps.getCuQpDeltaSubdiv() > 0) && (split == CU_HORZ_SPLIT || split == CU_VERT_SPLIT) && + || (m_pcEncCfg->getUsePerceptQPA() && !m_pcEncCfg->getUseRateCtrl() && pps.getUseDQP() && (slice.getCuQpDeltaSubdiv() > 0) && (split == CU_HORZ_SPLIT || split == CU_VERT_SPLIT) && (currDepth == 0)) // force quad-split or no split at CTU level #endif ) @@ -1113,45 +1272,25 @@ void EncCu::xCheckModeSplit(CodingStructure *&tempCS, CodingStructure *&bestCS, return; } - int startShareThisLevel = 0; - const uint32_t uiLPelX = tempCS->area.Y().lumaPos().x; - const uint32_t uiTPelY = tempCS->area.Y().lumaPos().y; - - int splitRatio = 1; - CHECK(!(split == CU_QUAD_SPLIT || split == CU_HORZ_SPLIT || split == CU_VERT_SPLIT - || split == CU_TRIH_SPLIT || split == CU_TRIV_SPLIT), "invalid split type"); - splitRatio = (split == CU_HORZ_SPLIT || split == CU_VERT_SPLIT) ? 1 : 2; - - bool isOneChildSmall = ((tempCS->area.lwidth())*(tempCS->area.lheight()) >> splitRatio) < MRG_SHARELIST_SHARSIZE; - - if ((((tempCS->area.lwidth())*(tempCS->area.lheight())) > (MRG_SHARELIST_SHARSIZE * 1))) + const bool chromaNotSplit = modeTypeParent == MODE_TYPE_ALL && modeTypeChild == MODE_TYPE_INTRA ? true : false; + if( partitioner.treeType != TREE_D ) { - m_shareState = NO_SHARE; + tempCS->treeType = TREE_L; } - - if (m_shareState == NO_SHARE)//init state + else { - if (isOneChildSmall) + if( chromaNotSplit ) { - m_shareState = GEN_ON_SHARED_BOUND;//share start state - startShareThisLevel = 1; + CHECK( partitioner.chType != CHANNEL_TYPE_LUMA, "chType must be luma" ); + tempCS->treeType = partitioner.treeType = TREE_L; + } + else + { + tempCS->treeType = partitioner.treeType = TREE_D; } - } - if ((m_shareState == GEN_ON_SHARED_BOUND) && (!slice.isIntra() || slice.getSPS()->getIBCFlag())) - { - tempCS->motionLut.lutShare = tempCS->motionLut.lut; - tempCS->motionLut.lutShareIbc = tempCS->motionLut.lutIbc; - m_shareBndPosX = uiLPelX; - m_shareBndPosY = uiTPelY; - m_shareBndSizeW = tempCS->area.lwidth(); - m_shareBndSizeH = tempCS->area.lheight(); - m_shareState = SHARING; } - m_pcInterSearch->setShareState(m_shareState); - setShareStateDec(m_shareState); - partitioner.splitCurrArea( split, *tempCS ); bool qgEnableChildren = partitioner.currQgEnable(); // QG possible at children level @@ -1163,6 +1302,12 @@ void EncCu::xCheckModeSplit(CodingStructure *&tempCS, CodingStructure *&bestCS, AffineMVInfo tmpMVInfo; bool isAffMVInfoSaved; m_pcInterSearch->savePrevAffMVInfo(0, tmpMVInfo, isAffMVInfoSaved); + BlkUniMvInfo tmpUniMvInfo; + bool isUniMvInfoSaved = false; + if (!tempCS->slice->isIntra()) + { + m_pcInterSearch->savePrevUniMvInfo(tempCS->area.Y(), tmpUniMvInfo, isUniMvInfoSaved); + } do { @@ -1178,15 +1323,11 @@ void EncCu::xCheckModeSplit(CodingStructure *&tempCS, CodingStructure *&bestCS, tempCS->initSubStructure( *tempSubCS, partitioner.chType, subCUArea, false ); tempCS->initSubStructure( *bestSubCS, partitioner.chType, subCUArea, false ); - tempSubCS->sharedBndPos.x = (m_shareState == SHARING) ? m_shareBndPosX : tempSubCS->area.Y().lumaPos().x; - tempSubCS->sharedBndPos.y = (m_shareState == SHARING) ? m_shareBndPosY : tempSubCS->area.Y().lumaPos().y; - tempSubCS->sharedBndSize.width = (m_shareState == SHARING) ? m_shareBndSizeW : tempSubCS->area.lwidth(); - tempSubCS->sharedBndSize.height = (m_shareState == SHARING) ? m_shareBndSizeH : tempSubCS->area.lheight(); - bestSubCS->sharedBndPos.x = (m_shareState == SHARING) ? m_shareBndPosX : tempSubCS->area.Y().lumaPos().x; - bestSubCS->sharedBndPos.y = (m_shareState == SHARING) ? m_shareBndPosY : tempSubCS->area.Y().lumaPos().y; - bestSubCS->sharedBndSize.width = (m_shareState == SHARING) ? m_shareBndSizeW : tempSubCS->area.lwidth(); - bestSubCS->sharedBndSize.height = (m_shareState == SHARING) ? m_shareBndSizeH : tempSubCS->area.lheight(); - xCompressCU( tempSubCS, bestSubCS, partitioner ); + tempSubCS->bestParent = bestSubCS->bestParent = bestCS; + double newMaxCostAllowed = isLuma(partitioner.chType) ? std::min(encTestMode.maxCostAllowed, bestCS->cost - m_pcRdCost->calcRdCost(tempCS->fracBits, tempCS->dist)) : MAX_DOUBLE; + newMaxCostAllowed = std::max(0.0, newMaxCostAllowed); + xCompressCU(tempSubCS, bestSubCS, partitioner, newMaxCostAllowed); + tempSubCS->bestParent = bestSubCS->bestParent = nullptr; if( bestSubCS->cost == MAX_DOUBLE ) { @@ -1197,6 +1338,10 @@ void EncCu::xCheckModeSplit(CodingStructure *&tempCS, CodingStructure *&bestCS, m_CurrCtx--; partitioner.exitCurrSplit(); xCheckBestMode( tempCS, bestCS, partitioner, encTestMode ); + if( partitioner.chType == CHANNEL_TYPE_LUMA ) + { + tempCS->motionLut = oldMotionLut; + } return; } @@ -1207,35 +1352,161 @@ void EncCu::xCheckModeSplit(CodingStructure *&tempCS, CodingStructure *&bestCS, { tempCS->prevQP[partitioner.chType] = bestSubCS->prevQP[partitioner.chType]; } + if( partitioner.isConsInter() ) + { + for( int i = 0; i < bestSubCS->cus.size(); i++ ) + { + CHECK( bestSubCS->cus[i]->predMode != MODE_INTER, "all CUs must be inter mode in an Inter coding region (SCIPU)" ); + } + } + else if( partitioner.isConsIntra() ) + { + for( int i = 0; i < bestSubCS->cus.size(); i++ ) + { + CHECK( bestSubCS->cus[i]->predMode == MODE_INTER, "all CUs must not be inter mode in an Intra coding region (SCIPU)" ); + } + } tempSubCS->releaseIntermediateData(); bestSubCS->releaseIntermediateData(); + if( !tempCS->slice->isIntra() && partitioner.isConsIntra() ) + { + tempCS->cost = m_pcRdCost->calcRdCost( tempCS->fracBits, tempCS->dist ); + if( tempCS->cost > bestCS->cost ) + { + tempCS->cost = MAX_DOUBLE; + tempCS->costDbOffset = 0; + tempCS->useDbCost = m_pcEncCfg->getUseEncDbOpt(); + m_CurrCtx--; + partitioner.exitCurrSplit(); + if( partitioner.chType == CHANNEL_TYPE_LUMA ) + { + tempCS->motionLut = oldMotionLut; + } + return; + } + } } } while( partitioner.nextPart( *tempCS ) ); partitioner.exitCurrSplit(); - if (startShareThisLevel == 1) - { - m_shareState = NO_SHARE; - m_pcInterSearch->setShareState(m_shareState); - setShareStateDec(m_shareState); - } m_CurrCtx--; + if( chromaNotSplit ) + { + //Note: In local dual tree region, the chroma CU refers to the central luma CU's QP. + //If the luma CU QP shall be predQP (no residual in it and before it in the QG), it must be revised to predQP before encoding the chroma CU + //Otherwise, the chroma CU uses predQP+deltaQP in encoding but is decoded as using predQP, thus causing encoder-decoded mismatch on chroma qp. + if( tempCS->pps->getUseDQP() ) + { + //find parent CS that including all coded CUs in the QG before this node + CodingStructure* qgCS = tempCS; + bool deltaQpCodedBeforeThisNode = false; + if( partitioner.currArea().lumaPos() != partitioner.currQgPos ) + { + int numParentNodeToQgCS = 0; + while( qgCS->area.lumaPos() != partitioner.currQgPos ) + { + CHECK( qgCS->parent == nullptr, "parent of qgCS shall exsit" ); + qgCS = qgCS->parent; + numParentNodeToQgCS++; + } + + //check whether deltaQP has been coded (in luma CU or luma&chroma CU) before this node + CodingStructure* parentCS = tempCS->parent; + for( int i = 0; i < numParentNodeToQgCS; i++ ) + { + //checking each parent + CHECK( parentCS == nullptr, "parentCS shall exsit" ); + for( const auto &cu : parentCS->cus ) + { + if( cu->rootCbf && !isChroma( cu->chType ) ) + { + deltaQpCodedBeforeThisNode = true; + break; + } + } + parentCS = parentCS->parent; + } + } + + //revise luma CU qp before the first luma CU with residual in the SCIPU to predQP + if( !deltaQpCodedBeforeThisNode ) + { + //get pred QP of the QG + const CodingUnit* cuFirst = qgCS->getCU( CHANNEL_TYPE_LUMA ); + CHECK( cuFirst->lumaPos() != partitioner.currQgPos, "First cu of the Qg is wrong" ); + int predQp = CU::predictQP( *cuFirst, qgCS->prevQP[CHANNEL_TYPE_LUMA] ); + + //revise to predQP + int firstCuHasResidual = (int)tempCS->cus.size(); + for( int i = 0; i < tempCS->cus.size(); i++ ) + { + if( tempCS->cus[i]->rootCbf ) + { + firstCuHasResidual = i; + break; + } + } + + for( int i = 0; i < firstCuHasResidual; i++ ) + { + tempCS->cus[i]->qp = predQp; + } + } + } + assert( tempCS->treeType == TREE_L ); + uint32_t numCuPuTu[6]; + tempCS->picture->cs->getNumCuPuTuOffset( numCuPuTu ); + tempCS->picture->cs->useSubStructure( *tempCS, partitioner.chType, CS::getArea( *tempCS, partitioner.currArea(), partitioner.chType ), false, true, false, false ); + + partitioner.chType = CHANNEL_TYPE_CHROMA; + tempCS->treeType = partitioner.treeType = TREE_C; + + m_CurrCtx++; + + const unsigned wIdx = gp_sizeIdxInfo->idxFrom( partitioner.currArea().lwidth() ); + const unsigned hIdx = gp_sizeIdxInfo->idxFrom( partitioner.currArea().lheight() ); + CodingStructure *tempCSChroma = m_pTempCS2[wIdx][hIdx]; + CodingStructure *bestCSChroma = m_pBestCS2[wIdx][hIdx]; + tempCS->initSubStructure( *tempCSChroma, partitioner.chType, partitioner.currArea(), false ); + tempCS->initSubStructure( *bestCSChroma, partitioner.chType, partitioner.currArea(), false ); + tempCS->treeType = TREE_D; + xCompressCU( tempCSChroma, bestCSChroma, partitioner ); + + //attach chromaCS to luma CS and update cost + bool keepResi = KEEP_PRED_AND_RESI_SIGNALS; + //bestCSChroma->treeType = tempCSChroma->treeType = TREE_C; + CHECK( bestCSChroma->treeType != TREE_C || tempCSChroma->treeType != TREE_C, "wrong treeType for chroma CS" ); + tempCS->useSubStructure( *bestCSChroma, partitioner.chType, CS::getArea( *bestCSChroma, partitioner.currArea(), partitioner.chType ), KEEP_PRED_AND_RESI_SIGNALS, true, keepResi, true ); + + //release tmp resource + tempCSChroma->releaseIntermediateData(); + bestCSChroma->releaseIntermediateData(); + //tempCS->picture->cs->releaseIntermediateData(); + tempCS->picture->cs->clearCuPuTuIdxMap( partitioner.currArea(), numCuPuTu[0], numCuPuTu[1], numCuPuTu[2], numCuPuTu + 3 ); + + m_CurrCtx--; + + //recover luma tree status + partitioner.chType = CHANNEL_TYPE_LUMA; + partitioner.treeType = TREE_D; + partitioner.modeType = MODE_TYPE_ALL; + } + // Finally, generate split-signaling bits for RD-cost check const PartSplit implicitSplit = partitioner.getImplicitSplit( *tempCS ); { bool enforceQT = implicitSplit == CU_QUAD_SPLIT; -#if HM_QTBT_REPRODUCE_FAST_LCTU_BUG // LARGE CTU bug if( m_pcEncCfg->getUseFastLCTU() ) { unsigned minDepth = 0; - unsigned maxDepth = g_aucLog2[tempCS->sps->getCTUSize()] - g_aucLog2[tempCS->sps->getMinQTSize(slice.getSliceType(), partitioner.chType)]; + unsigned maxDepth = floorLog2(tempCS->sps->getCTUSize()) - floorLog2(tempCS->sps->getMinQTSize(slice.getSliceType(), partitioner.chType)); if( auto ad = dynamic_cast<AdaptiveDepthPartitioner*>( &partitioner ) ) { @@ -1248,14 +1519,14 @@ void EncCu::xCheckModeSplit(CodingStructure *&tempCS, CodingStructure *&bestCS, enforceQT = true; } } -#endif if( !enforceQT ) { m_CABACEstimator->resetBits(); m_CABACEstimator->split_cu_mode( split, *tempCS, partitioner ); - + partitioner.modeType = modeTypeParent; + m_CABACEstimator->mode_constraint( split, *tempCS, partitioner, modeTypeChild ); tempCS->fracBits += m_CABACEstimator->getEstFracBits(); // split bits } } @@ -1273,60 +1544,52 @@ void EncCu::xCheckModeSplit(CodingStructure *&tempCS, CodingStructure *&bestCS, // The exception is each slice / slice-segment must have at least one CTU. if (bestCS->cost != MAX_DOUBLE) { -#if HEVC_TILES_WPP - const TileMap& tileMap = *tempCS->picture->tileMap; -#endif -#if HEVC_TILES_WPP || HEVC_DEPENDENT_SLICES - const uint32_t CtuAddr = CU::getCtuAddr( *bestCS->getCU( partitioner.chType ) ); -#endif - const bool isEndOfSlice = slice.getSliceMode() == FIXED_NUMBER_OF_BYTES - && ((slice.getSliceBits() + CS::getEstBits(*bestCS)) > slice.getSliceArgument() << 3) -#if HEVC_TILES_WPP - && CtuAddr != tileMap.getCtuTsToRsAddrMap(slice.getSliceCurStartCtuTsAddr()) -#endif -#if HEVC_DEPENDENT_SLICES - && CtuAddr != tileMap.getCtuTsToRsAddrMap(slice.getSliceSegmentCurStartCtuTsAddr()); -#else - ; -#endif - -#if HEVC_DEPENDENT_SLICES - const bool isEndOfSliceSegment = slice.getSliceSegmentMode() == FIXED_NUMBER_OF_BYTES - && ((slice.getSliceSegmentBits() + CS::getEstBits(*bestCS)) > slice.getSliceSegmentArgument() << 3) - && CtuAddr != tileMap.getCtuTsToRsAddrMap(slice.getSliceSegmentCurStartCtuTsAddr()); - // Do not need to check slice condition for slice-segment since a slice-segment is a subset of a slice. - if (isEndOfSlice || isEndOfSliceSegment) -#else - if(isEndOfSlice) -#endif - { - bestCS->cost = MAX_DOUBLE; - bestCS->costDbOffset = 0; - } } else { bestCS->costDbOffset = 0; } tempCS->useDbCost = m_pcEncCfg->getUseEncDbOpt(); + if( tempCS->cus.size() > 0 && modeTypeParent == MODE_TYPE_ALL && modeTypeChild == MODE_TYPE_INTER ) + { + int areaSizeNoResiCu = 0; + for( int k = 0; k < tempCS->cus.size(); k++ ) + { + areaSizeNoResiCu += (tempCS->cus[k]->rootCbf == false) ? tempCS->cus[k]->lumaSize().area() : 0; + } + if( areaSizeNoResiCu >= (tempCS->area.lumaSize().area() >> 1) ) + { + skipInterPass = true; + } + } // RD check for sub partitioned coding structure. xCheckBestMode( tempCS, bestCS, partitioner, encTestMode ); if (isAffMVInfoSaved) m_pcInterSearch->addAffMVInfo(tmpMVInfo); + if (!tempCS->slice->isIntra() && isUniMvInfoSaved) + { + m_pcInterSearch->addUniMvInfo(tmpUniMvInfo); + } tempCS->motionLut = oldMotionLut; + tempCS->prevPLT = oldPLT; + tempCS->releaseIntermediateData(); tempCS->prevQP[partitioner.chType] = oldPrevQp; } - -void EncCu::xCheckRDCostIntra( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode ) +bool EncCu::xCheckRDCostIntra(CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode, bool adaptiveColorTrans) { - const PPS &pps = *tempCS->pps; + double bestInterCost = m_modeCtrl->getBestInterCost(); + double costSize2Nx2NmtsFirstPass = m_modeCtrl->getMtsSize2Nx2NFirstPassCost(); + bool skipSecondMtsPass = m_modeCtrl->getSkipSecondMTSPass(); + const SPS& sps = *tempCS->sps; + const int maxSizeMTS = MTS_INTRA_MAX_CU_SIZE; + uint8_t considerMtsSecondPass = ( sps.getUseIntraMTS() && isLuma( partitioner.chType ) && partitioner.currArea().lwidth() <= maxSizeMTS && partitioner.currArea().lheight() <= maxSizeMTS ) ? 1 : 0; bool useIntraSubPartitions = false; double maxCostAllowedForChroma = MAX_DOUBLE; @@ -1334,196 +1597,428 @@ void EncCu::xCheckRDCostIntra( CodingStructure *&tempCS, CodingStructure *&bestC Distortion interHad = m_modeCtrl->getInterHad(); + double dct2Cost = MAX_DOUBLE; + double bestNonDCT2Cost = MAX_DOUBLE; + double trGrpBestCost [ 4 ] = { MAX_DOUBLE, MAX_DOUBLE, MAX_DOUBLE, MAX_DOUBLE }; + double globalBestCost = MAX_DOUBLE; + bool bestSelFlag [ 4 ] = { false, false, false, false }; + bool trGrpCheck [ 4 ] = { true, true, true, true }; + int startMTSIdx [ 4 ] = { 0, 1, 2, 3 }; + int endMTSIdx [ 4 ] = { 0, 1, 2, 3 }; + double trGrpStopThreshold[ 3 ] = { 1.001, 1.001, 1.001 }; + int bestMtsFlag = 0; + int bestLfnstIdx = 0; + + const int maxLfnstIdx = ( partitioner.isSepTree( *tempCS ) && partitioner.chType == CHANNEL_TYPE_CHROMA && ( partitioner.currArea().lwidth() < 8 || partitioner.currArea().lheight() < 8 ) ) + || ( partitioner.currArea().lwidth() > sps.getMaxTbSize() || partitioner.currArea().lheight() > sps.getMaxTbSize() ) ? 0 : 2; + bool skipOtherLfnst = false; + int startLfnstIdx = 0; + int endLfnstIdx = sps.getUseLFNST() ? maxLfnstIdx : 0; + + int grpNumMax = sps.getUseLFNST() ? m_pcEncCfg->getMTSIntraMaxCand() : 1; + m_modeCtrl->setISPWasTested(false); + m_pcIntraSearch->invalidateBestModeCost(); + if (sps.getUseColorTrans() && !CS::isDualITree(*tempCS)) + { + if ((m_pcEncCfg->getRGBFormatFlag() && adaptiveColorTrans) || (!m_pcEncCfg->getRGBFormatFlag() && !adaptiveColorTrans)) + { + m_pcIntraSearch->invalidateBestRdModeFirstColorSpace(); + } + } + + bool foundZeroRootCbf = false; + if (sps.getUseColorTrans()) { + CHECK(tempCS->treeType != TREE_D || partitioner.treeType != TREE_D, "localtree should not be applied when adaptive color transform is enabled"); + CHECK(tempCS->modeType != MODE_TYPE_ALL || partitioner.modeType != MODE_TYPE_ALL, "localtree should not be applied when adaptive color transform is enabled"); + CHECK(adaptiveColorTrans && (CS::isDualITree(*tempCS) || partitioner.chType != CHANNEL_TYPE_LUMA), "adaptive color transform cannot be applied to dual-tree"); + } - tempCS->initStructData( encTestMode.qp, encTestMode.lossless ); + for( int trGrpIdx = 0; trGrpIdx < grpNumMax; trGrpIdx++ ) + { + const uint8_t startMtsFlag = trGrpIdx > 0; + const uint8_t endMtsFlag = sps.getUseLFNST() ? considerMtsSecondPass : 0; - CodingUnit &cu = tempCS->addCU( CS::getArea( *tempCS, tempCS->area, partitioner.chType ), partitioner.chType ); + if( ( trGrpIdx == 0 || ( !skipSecondMtsPass && considerMtsSecondPass ) ) && trGrpCheck[ trGrpIdx ] ) + { + for( int lfnstIdx = startLfnstIdx; lfnstIdx <= endLfnstIdx; lfnstIdx++ ) + { + for( uint8_t mtsFlag = startMtsFlag; mtsFlag <= endMtsFlag; mtsFlag++ ) + { + if (sps.getUseColorTrans() && !CS::isDualITree(*tempCS)) + { + m_pcIntraSearch->setSavedRdModeIdx(trGrpIdx*(NUM_LFNST_NUM_PER_SET * 2) + lfnstIdx * 2 + mtsFlag); + } + if (mtsFlag > 0 && lfnstIdx > 0) + { + continue; + } + //3) if interHad is 0, only try further modes if some intra mode was already better than inter + if( sps.getUseLFNST() && m_pcEncCfg->getUsePbIntraFast() && !tempCS->slice->isIntra() && bestCU && CU::isInter( *bestCS->getCU( partitioner.chType ) ) && interHad == 0 ) + { + continue; + } - partitioner.setCUData( cu ); - cu.slice = tempCS->slice; -#if HEVC_TILES_WPP - cu.tileIdx = tempCS->picture->tileMap->getTileIdxMap( tempCS->area.lumaPos() ); -#endif - cu.skip = false; - cu.mmvdSkip = false; - cu.predMode = MODE_INTRA; - cu.transQuantBypass = encTestMode.lossless; - cu.chromaQpAdj = cu.transQuantBypass ? 0 : m_cuChromaQpOffsetIdxPlus1; - cu.qp = encTestMode.qp; - //cu.ipcm = false; - cu.ispMode = NOT_INTRA_SUBPARTITIONS; + tempCS->initStructData( encTestMode.qp ); - CU::addPUs( cu ); + CodingUnit &cu = tempCS->addCU( CS::getArea( *tempCS, tempCS->area, partitioner.chType ), partitioner.chType ); - tempCS->interHad = interHad; + partitioner.setCUData( cu ); + cu.slice = tempCS->slice; + cu.tileIdx = tempCS->pps->getTileIdx( tempCS->area.lumaPos() ); + cu.skip = false; + cu.mmvdSkip = false; + cu.predMode = MODE_INTRA; + cu.chromaQpAdj = m_cuChromaQpOffsetIdxPlus1; + cu.qp = encTestMode.qp; + cu.lfnstIdx = lfnstIdx; + cu.mtsFlag = mtsFlag; + cu.ispMode = NOT_INTRA_SUBPARTITIONS; + cu.colorTransform = adaptiveColorTrans; - m_bestModeUpdated = tempCS->useDbCost = bestCS->useDbCost = false; + CU::addPUs( cu ); - if( isLuma( partitioner.chType ) ) - { - //the Intra SubPartitions mode uses the value of the best cost so far (luma if it is the fast version) to avoid test non-necessary lines - const double bestCostSoFar = CS::isDualITree( *tempCS ) ? m_modeCtrl->getBestCostWithoutSplitFlags() : bestCU && bestCU->predMode == MODE_INTRA ? bestCS->lumaCost : bestCS->cost; - m_pcIntraSearch->estIntraPredLumaQT( cu, partitioner, bestCostSoFar ); + tempCS->interHad = interHad; - useIntraSubPartitions = cu.ispMode != NOT_INTRA_SUBPARTITIONS; - if( !CS::isDualITree( *tempCS ) ) - { - tempCS->lumaCost = m_pcRdCost->calcRdCost( tempCS->fracBits, tempCS->dist ); - if( useIntraSubPartitions ) - { - //the difference between the best cost so far and the current luma cost is stored to avoid testing the Cr component if the cost of luma + Cb is larger than the best cost - maxCostAllowedForChroma = bestCS->cost < MAX_DOUBLE ? bestCS->cost - tempCS->lumaCost : MAX_DOUBLE; - } - } + m_bestModeUpdated = tempCS->useDbCost = bestCS->useDbCost = false; - if (m_pcEncCfg->getUsePbIntraFast() && tempCS->dist == std::numeric_limits<Distortion>::max() - && tempCS->interHad == 0) - { - interHad = 0; - // JEM assumes only perfect reconstructions can from now on beat the inter mode - m_modeCtrl->enforceInterHad( 0 ); - return; - } + bool validCandRet = false; + if( isLuma( partitioner.chType ) ) + { + //ISP uses the value of the best cost so far (luma if it is the fast version) to avoid test non-necessary subpartitions + double bestCostSoFar = partitioner.isSepTree(*tempCS) ? m_modeCtrl->getBestCostWithoutSplitFlags() : bestCU && bestCU->predMode == MODE_INTRA ? bestCS->lumaCost : bestCS->cost; + if (partitioner.isSepTree(*tempCS) && encTestMode.maxCostAllowed < bestCostSoFar) + { + bestCostSoFar = encTestMode.maxCostAllowed; + } + validCandRet = m_pcIntraSearch->estIntraPredLumaQT(cu, partitioner, bestCostSoFar, mtsFlag, startMTSIdx[trGrpIdx], endMTSIdx[trGrpIdx], (trGrpIdx > 0), !cu.colorTransform ? bestCS : nullptr); + if ((!validCandRet || (cu.ispMode && cu.firstTU->cbf[COMPONENT_Y] == 0))) + { + continue; + } + if (m_pcEncCfg->getUseFastISP() && validCandRet && !mtsFlag && !lfnstIdx && !cu.colorTransform) + { + m_modeCtrl->setISPMode(cu.ispMode); + m_modeCtrl->setISPLfnstIdx(cu.lfnstIdx); + m_modeCtrl->setMIPFlagISPPass(cu.mipFlag); + m_modeCtrl->setBestISPIntraModeRelCU(cu.ispMode ? PU::getFinalIntraMode(*cu.firstPU, CHANNEL_TYPE_LUMA) : UINT8_MAX); + m_modeCtrl->setBestDCT2NonISPCostRelCU(m_modeCtrl->getMtsFirstPassNoIspCost()); + } - if( !CS::isDualITree( *tempCS ) ) - { - cu.cs->picture->getRecoBuf( cu.Y() ).copyFrom( cu.cs->getRecoBuf( COMPONENT_Y ) ); - cu.cs->picture->getPredBuf(cu.Y()).copyFrom(cu.cs->getPredBuf(COMPONENT_Y)); - } - } + if (sps.getUseColorTrans() && m_pcEncCfg->getRGBFormatFlag() && !CS::isDualITree(*tempCS) && !cu.colorTransform) + { + double curLumaCost = m_pcRdCost->calcRdCost(tempCS->fracBits, tempCS->dist); + if (curLumaCost > bestCS->cost) + { + continue; + } + } - if( tempCS->area.chromaFormat != CHROMA_400 && ( partitioner.chType == CHANNEL_TYPE_CHROMA || !CS::isDualITree( *tempCS ) ) ) - { - TUIntraSubPartitioner subTuPartitioner( partitioner ); - m_pcIntraSearch->estIntraPredChromaQT( cu, ( !useIntraSubPartitions || ( CS::isDualITree( *cu.cs ) && !isLuma( CHANNEL_TYPE_CHROMA ) ) ) ? partitioner : subTuPartitioner, maxCostAllowedForChroma ); - if( useIntraSubPartitions && !cu.ispMode ) - { - //At this point the temp cost is larger than the best cost. Therefore, we can already skip the remaining calculations - return; - } - } + useIntraSubPartitions = cu.ispMode != NOT_INTRA_SUBPARTITIONS; + if( !partitioner.isSepTree( *tempCS ) ) + { + tempCS->lumaCost = m_pcRdCost->calcRdCost( tempCS->fracBits, tempCS->dist ); + if( useIntraSubPartitions ) + { + //the difference between the best cost so far and the current luma cost is stored to avoid testing the Cr component if the cost of luma + Cb is larger than the best cost + maxCostAllowedForChroma = bestCS->cost < MAX_DOUBLE ? bestCS->cost - tempCS->lumaCost : MAX_DOUBLE; + } + } + + if (m_pcEncCfg->getUsePbIntraFast() && tempCS->dist == std::numeric_limits<Distortion>::max() + && tempCS->interHad == 0) + { + interHad = 0; + // JEM assumes only perfect reconstructions can from now on beat the inter mode + m_modeCtrl->enforceInterHad( 0 ); + continue; + } - cu.rootCbf = false; + if( !partitioner.isSepTree( *tempCS ) ) + { + if (!cu.colorTransform) + { + cu.cs->picture->getRecoBuf(cu.Y()).copyFrom(cu.cs->getRecoBuf(COMPONENT_Y)); + cu.cs->picture->getPredBuf(cu.Y()).copyFrom(cu.cs->getPredBuf(COMPONENT_Y)); + } + else + { + cu.cs->picture->getRecoBuf(cu).copyFrom(cu.cs->getRecoBuf(cu)); + cu.cs->picture->getPredBuf(cu).copyFrom(cu.cs->getPredBuf(cu)); + } + } + } - for( uint32_t t = 0; t < getNumberValidTBlocks( *cu.cs->pcv ); t++ ) - { - cu.rootCbf |= cu.firstTU->cbf[t] != 0; - } + if( tempCS->area.chromaFormat != CHROMA_400 && ( partitioner.chType == CHANNEL_TYPE_CHROMA || !cu.isSepTree() ) && !cu.colorTransform ) + { + TUIntraSubPartitioner subTuPartitioner( partitioner ); + m_pcIntraSearch->estIntraPredChromaQT( cu, ( !useIntraSubPartitions || ( cu.isSepTree() && !isLuma( CHANNEL_TYPE_CHROMA ) ) ) ? partitioner : subTuPartitioner, maxCostAllowedForChroma ); + if( useIntraSubPartitions && !cu.ispMode ) + { + //At this point the temp cost is larger than the best cost. Therefore, we can already skip the remaining calculations + continue; + } + } - // Get total bits for current mode: encode CU - m_CABACEstimator->resetBits(); + cu.rootCbf = false; - if( pps.getTransquantBypassEnabledFlag() ) - { - m_CABACEstimator->cu_transquant_bypass_flag( cu ); - } + for( uint32_t t = 0; t < getNumberValidTBlocks( *cu.cs->pcv ); t++ ) + { + cu.rootCbf |= cu.firstTU->cbf[t] != 0; + } - if ((!cu.cs->slice->isIntra() || cu.cs->slice->getSPS()->getIBCFlag()) - && cu.Y().valid() - ) - { - m_CABACEstimator->cu_skip_flag ( cu ); - } - m_CABACEstimator->pred_mode ( cu ); - m_CABACEstimator->pcm_data ( cu, partitioner ); - m_CABACEstimator->extend_ref_line( cu ); - m_CABACEstimator->isp_mode ( cu ); - m_CABACEstimator->cu_pred_data ( cu ); + if (!cu.rootCbf) + { + cu.colorTransform = false; + foundZeroRootCbf = true; + } - // Encode Coefficients - CUCtx cuCtx; - cuCtx.isDQPCoded = true; - cuCtx.isChromaQpAdjCoded = true; - m_CABACEstimator->cu_residual( cu, partitioner, cuCtx ); + // Get total bits for current mode: encode CU + m_CABACEstimator->resetBits(); - tempCS->fracBits = m_CABACEstimator->getEstFracBits(); - tempCS->cost = m_pcRdCost->calcRdCost(tempCS->fracBits, tempCS->dist); + if ((!cu.cs->slice->isIntra() || cu.cs->slice->getSPS()->getIBCFlag()) + && cu.Y().valid() + ) + { + m_CABACEstimator->cu_skip_flag ( cu ); + } + m_CABACEstimator->pred_mode ( cu ); + m_CABACEstimator->adaptive_color_transform(cu); + m_CABACEstimator->cu_pred_data ( cu ); + m_CABACEstimator->bdpcm_mode ( cu, ComponentID(partitioner.chType) ); + if (!CS::isDualITree(*cu.cs) && isLuma(partitioner.chType)) + m_CABACEstimator->bdpcm_mode(cu, ComponentID(CHANNEL_TYPE_CHROMA)); - const double tmpCostWithoutSplitFlags = tempCS->cost; - xEncodeDontSplit( *tempCS, partitioner ); + // Encode Coefficients + CUCtx cuCtx; + cuCtx.isDQPCoded = true; + cuCtx.isChromaQpAdjCoded = true; + m_CABACEstimator->cu_residual( cu, partitioner, cuCtx ); - xCheckDQP( *tempCS, partitioner ); + tempCS->fracBits = m_CABACEstimator->getEstFracBits(); + tempCS->cost = m_pcRdCost->calcRdCost(tempCS->fracBits, tempCS->dist); - if( tempCS->cost < bestCS->cost ) - { - m_modeCtrl->setBestCostWithoutSplitFlags( tmpCostWithoutSplitFlags ); - } - xCalDebCost( *tempCS, partitioner ); - tempCS->useDbCost = m_pcEncCfg->getUseEncDbOpt(); + const double tmpCostWithoutSplitFlags = tempCS->cost; + xEncodeDontSplit( *tempCS, partitioner ); + + xCheckDQP( *tempCS, partitioner ); + + // Check if low frequency non-separable transform (LFNST) is too expensive + if( lfnstIdx && !cuCtx.lfnstLastScanPos && !cu.ispMode ) + { + bool cbfAtZeroDepth = cu.isSepTree() ? cu.rootCbf : std::min( cu.firstTU->blocks[ 1 ].width, cu.firstTU->blocks[ 1 ].height ) < 4 ? TU::getCbfAtDepth( *cu.firstTU, COMPONENT_Y, 0 ) : cu.rootCbf; + if( cbfAtZeroDepth ) + { + tempCS->cost = MAX_DOUBLE; + } + } + + if( mtsFlag == 0 && lfnstIdx == 0 ) + { + dct2Cost = tempCS->cost; + } + else if (tmpCostWithoutSplitFlags < bestNonDCT2Cost) + { + bestNonDCT2Cost = tmpCostWithoutSplitFlags; + } + + if( tempCS->cost < bestCS->cost ) + { + m_modeCtrl->setBestCostWithoutSplitFlags( tmpCostWithoutSplitFlags ); + } + + if( !mtsFlag ) static_cast< double& >( costSize2Nx2NmtsFirstPass ) = tempCS->cost; + + if( sps.getUseLFNST() && !tempCS->cus.empty() ) + { + skipOtherLfnst = m_modeCtrl->checkSkipOtherLfnst( encTestMode, tempCS, partitioner ); + } + + xCalDebCost( *tempCS, partitioner ); + tempCS->useDbCost = m_pcEncCfg->getUseEncDbOpt(); #if WCG_EXT - DTRACE_MODE_COST( *tempCS, m_pcRdCost->getLambda( true ) ); + DTRACE_MODE_COST( *tempCS, m_pcRdCost->getLambda( true ) ); #else - DTRACE_MODE_COST( *tempCS, m_pcRdCost->getLambda() ); + DTRACE_MODE_COST( *tempCS, m_pcRdCost->getLambda() ); #endif - xCheckBestMode( tempCS, bestCS, partitioner, encTestMode ); + if (sps.getUseColorTrans() && !CS::isDualITree(*tempCS)) + { + int colorSpaceIdx = ((m_pcEncCfg->getRGBFormatFlag() && adaptiveColorTrans) || (!m_pcEncCfg->getRGBFormatFlag() && !adaptiveColorTrans)) ? 0 : 1; + if (tempCS->cost < tempCS->tmpColorSpaceIntraCost[colorSpaceIdx]) + { + tempCS->tmpColorSpaceIntraCost[colorSpaceIdx] = tempCS->cost; + bestCS->tmpColorSpaceIntraCost[colorSpaceIdx] = tempCS->cost; + } + } + if( !sps.getUseLFNST() ) + { + xCheckBestMode( tempCS, bestCS, partitioner, encTestMode ); + } + else + { + if( xCheckBestMode( tempCS, bestCS, partitioner, encTestMode ) ) + { + trGrpBestCost[ trGrpIdx ] = globalBestCost = bestCS->cost; + bestSelFlag [ trGrpIdx ] = true; + bestMtsFlag = mtsFlag; + bestLfnstIdx = lfnstIdx; + if( bestCS->cus.size() == 1 ) + { + CodingUnit &cu = *bestCS->cus.front(); + if (cu.firstTU->mtsIdx[COMPONENT_Y] == MTS_SKIP) + { + if( ( floorLog2( cu.firstTU->blocks[ COMPONENT_Y ].width ) + floorLog2( cu.firstTU->blocks[ COMPONENT_Y ].height ) ) >= 6 ) + { + endLfnstIdx = 0; + } + } + } + } - } //for emtCuFlag -} + //we decide to skip the non-DCT-II transforms and LFNST according to the ISP results + if ((endMtsFlag > 0 || endLfnstIdx > 0) && (cu.ispMode || (bestCS && bestCS->cus[0]->ispMode)) && tempCS->slice->isIntra() && m_pcEncCfg->getUseFastISP()) + { + double bestCostDct2NoIsp = m_modeCtrl->getMtsFirstPassNoIspCost(); + double bestIspCost = m_modeCtrl->getIspCost(); + CHECKD( bestCostDct2NoIsp <= bestIspCost, "wrong cost!" ); + double threshold = 1.4; -void EncCu::xCheckIntraPCM(CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode ) -{ - tempCS->initStructData( encTestMode.qp, encTestMode.lossless ); + double lfnstThreshold = 1.01 * threshold; + if( m_modeCtrl->getStopNonDCT2Transforms() || bestCostDct2NoIsp > bestIspCost*lfnstThreshold ) + { + endLfnstIdx = lfnstIdx; + } - CodingUnit &cu = tempCS->addCU( CS::getArea( *tempCS, tempCS->area, partitioner.chType ), partitioner.chType ); + if ( m_modeCtrl->getStopNonDCT2Transforms() || bestCostDct2NoIsp > bestIspCost*threshold ) + { + skipSecondMtsPass = true; + m_modeCtrl->setSkipSecondMTSPass( true ); + break; + } + } + //now we check whether the second pass of SIZE_2Nx2N and the whole Intra SIZE_NxN should be skipped or not + if( !mtsFlag && !tempCS->slice->isIntra() && bestCU && bestCU->predMode != MODE_INTRA ) + { + const double thEmtInterFastSkipIntra = 1.4; // Skip checking Intra if "2Nx2N using DCT2" is worse than best Inter mode + if( costSize2Nx2NmtsFirstPass > thEmtInterFastSkipIntra * bestInterCost ) + { + skipSecondMtsPass = true; + m_modeCtrl->setSkipSecondMTSPass( true ); + break; + } + } + } - partitioner.setCUData( cu ); - cu.slice = tempCS->slice; -#if HEVC_TILES_WPP - cu.tileIdx = tempCS->picture->tileMap->getTileIdxMap( tempCS->area.lumaPos() ); -#endif - cu.skip = false; + } //for emtCuFlag + if( skipOtherLfnst ) + { + startLfnstIdx = lfnstIdx; + endLfnstIdx = lfnstIdx; + break; + } + } //for lfnstIdx + } //if (!skipSecondMtsPass && considerMtsSecondPass && trGrpCheck[iGrpIdx]) + + if( sps.getUseLFNST() && trGrpIdx < 3 ) + { + trGrpCheck[ trGrpIdx + 1 ] = false; + + if( bestSelFlag[ trGrpIdx ] && considerMtsSecondPass ) + { + double dCostRatio = dct2Cost / trGrpBestCost[ trGrpIdx ]; + trGrpCheck[ trGrpIdx + 1 ] = ( bestMtsFlag != 0 || bestLfnstIdx != 0 ) && dCostRatio < trGrpStopThreshold[ trGrpIdx ]; + } + } + } //trGrpIdx + if(!adaptiveColorTrans) + m_modeCtrl->setBestNonDCT2Cost(bestNonDCT2Cost); + return foundZeroRootCbf; +} + + +void EncCu::xCheckPLT(CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode) +{ + tempCS->initStructData(encTestMode.qp); + CodingUnit &cu = tempCS->addCU(CS::getArea(*tempCS, tempCS->area, partitioner.chType), partitioner.chType); + partitioner.setCUData(cu); + cu.slice = tempCS->slice; + cu.tileIdx = tempCS->pps->getTileIdx(tempCS->area.lumaPos()); + cu.skip = false; cu.mmvdSkip = false; - cu.predMode = MODE_INTRA; - cu.transQuantBypass = encTestMode.lossless; - cu.chromaQpAdj = cu.transQuantBypass ? 0 : m_cuChromaQpOffsetIdxPlus1; - cu.qp = encTestMode.qp; - cu.ipcm = true; + cu.predMode = MODE_PLT; - tempCS->addPU( CS::getArea( *tempCS, tempCS->area, partitioner.chType ), partitioner.chType ); + cu.chromaQpAdj = m_cuChromaQpOffsetIdxPlus1; + cu.qp = encTestMode.qp; + cu.bdpcmMode = 0; - tempCS->addTU( CS::getArea( *tempCS, tempCS->area, partitioner.chType ), partitioner.chType ); + tempCS->addPU(CS::getArea(*tempCS, tempCS->area, partitioner.chType), partitioner.chType); + tempCS->addTU(CS::getArea(*tempCS, tempCS->area, partitioner.chType), partitioner.chType); + // Search + tempCS->dist = 0; + if (cu.isSepTree()) + { + if (isLuma(partitioner.chType)) + { + m_pcIntraSearch->PLTSearch(*tempCS, partitioner, COMPONENT_Y, 1); + } + if (tempCS->area.chromaFormat != CHROMA_400 && (partitioner.chType == CHANNEL_TYPE_CHROMA)) + { + m_pcIntraSearch->PLTSearch(*tempCS, partitioner, COMPONENT_Cb, 2); + } + } + else + { + m_pcIntraSearch->PLTSearch(*tempCS, partitioner, COMPONENT_Y, 3); + } - m_pcIntraSearch->IPCMSearch(*tempCS, partitioner); m_CABACEstimator->getCtx() = m_CurrCtx->start; - m_CABACEstimator->resetBits(); - - if( tempCS->pps->getTransquantBypassEnabledFlag() ) + if ((!cu.cs->slice->isIntra() || cu.cs->slice->getSPS()->getIBCFlag()) + && cu.Y().valid()) { - m_CABACEstimator->cu_transquant_bypass_flag( cu ); + m_CABACEstimator->cu_skip_flag(cu); } + m_CABACEstimator->pred_mode(cu); - if ((!cu.cs->slice->isIntra() || cu.cs->slice->getSPS()->getIBCFlag()) - && cu.Y().valid() - ) + // signaling + CUCtx cuCtx; + cuCtx.isDQPCoded = true; + cuCtx.isChromaQpAdjCoded = true; + if (cu.isSepTree()) { - m_CABACEstimator->cu_skip_flag ( cu ); + if (isLuma(partitioner.chType)) + { + m_CABACEstimator->cu_palette_info(cu, COMPONENT_Y, 1, cuCtx); + } + if (tempCS->area.chromaFormat != CHROMA_400 && (partitioner.chType == CHANNEL_TYPE_CHROMA)) + { + m_CABACEstimator->cu_palette_info(cu, COMPONENT_Cb, 2, cuCtx); + } + } + else + { + m_CABACEstimator->cu_palette_info(cu, COMPONENT_Y, 3, cuCtx); } - m_CABACEstimator->pred_mode ( cu ); - m_CABACEstimator->pcm_data ( cu, partitioner ); - - tempCS->fracBits = m_CABACEstimator->getEstFracBits(); - tempCS->cost = m_pcRdCost->calcRdCost(tempCS->fracBits, tempCS->dist); - - xEncodeDontSplit( *tempCS, partitioner ); + tempCS->cost = m_pcRdCost->calcRdCost(tempCS->fracBits, tempCS->dist); - xCheckDQP( *tempCS, partitioner ); - - xCalDebCost( *tempCS, partitioner ); + xEncodeDontSplit(*tempCS, partitioner); + xCheckDQP(*tempCS, partitioner); + xCalDebCost(*tempCS, partitioner); tempCS->useDbCost = m_pcEncCfg->getUseEncDbOpt(); + const Area currCuArea = cu.block(getFirstComponentOfChannel(partitioner.chType)); + cu.slice->m_mapPltCost[currCuArea.pos()][currCuArea.size()] = tempCS->cost; #if WCG_EXT - DTRACE_MODE_COST( *tempCS, m_pcRdCost->getLambda( true ) ); + DTRACE_MODE_COST(*tempCS, m_pcRdCost->getLambda(true)); #else - DTRACE_MODE_COST( *tempCS, m_pcRdCost->getLambda() ); + DTRACE_MODE_COST(*tempCS, m_pcRdCost->getLambda()); #endif - xCheckBestMode( tempCS, bestCS, partitioner, encTestMode ); + xCheckBestMode(tempCS, bestCS, partitioner, encTestMode); } void EncCu::xCheckDQP( CodingStructure& cs, Partitioner& partitioner, bool bKeepCtx ) @@ -1536,7 +2031,7 @@ void EncCu::xCheckDQP( CodingStructure& cs, Partitioner& partitioner, bool bKeep return; } - if (CS::isDualITree(cs) && isChroma(partitioner.chType)) + if (partitioner.isSepTree(cs) && isChroma(partitioner.chType)) { return; } @@ -1554,7 +2049,8 @@ void EncCu::xCheckDQP( CodingStructure& cs, Partitioner& partitioner, bool bKeep bool hasResidual = false; for( const auto &cu : cs.cus ) { - if( cu->rootCbf ) + //not include the chroma CU because chroma CU is decided based on corresponding luma QP and deltaQP is not signaled at chroma CU + if( cu->rootCbf && !isChroma( cu->chType )) { hasResidual = true; break; @@ -1580,7 +2076,8 @@ void EncCu::xCheckDQP( CodingStructure& cs, Partitioner& partitioner, bool bKeep // NOTE: reset QPs for CUs without residuals up to first coded CU for( const auto &cu : cs.cus ) { - if( cu->rootCbf ) + //not include the chroma CU because chroma CU is decided based on corresponding luma QP and deltaQP is not signaled at chroma CU + if( cu->rootCbf && !isChroma( cu->chType )) { break; } @@ -1612,8 +2109,7 @@ void EncCu::xFillPCMBuffer( CodingUnit &cu ) const CPelBuf source = tu.cs->getOrgBuf( compArea ); PelBuf destination = tu.getPcmbuf( compID ); - - if (tu.cs->slice->getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag() && compID == COMPONENT_Y) + if (tu.cs->picHeader->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag() && compID == COMPONENT_Y) { CompArea tmpArea(COMPONENT_Y, compArea.chromaFormat, Position(0, 0), compArea.size()); PelBuf tempOrgBuf = m_tmpStorageLCU->getBuf(tmpArea); @@ -1630,17 +2126,17 @@ void EncCu::xCheckRDCostHashInter( CodingStructure *&tempCS, CodingStructure *&b { bool isPerfectMatch = false; - tempCS->initStructData(encTestMode.qp, encTestMode.lossless); + tempCS->initStructData(encTestMode.qp); m_pcInterSearch->resetBufferedUniMotions(); m_pcInterSearch->setAffineModeSelected(false); CodingUnit &cu = tempCS->addCU(tempCS->area, partitioner.chType); partitioner.setCUData(cu); cu.slice = tempCS->slice; + cu.tileIdx = tempCS->pps->getTileIdx(tempCS->area.lumaPos()); cu.skip = false; cu.predMode = MODE_INTER; - cu.transQuantBypass = encTestMode.lossless; - cu.chromaQpAdj = cu.transQuantBypass ? 0 : m_cuChromaQpOffsetIdxPlus1; + cu.chromaQpAdj = m_cuChromaQpOffsetIdxPlus1; cu.qp = encTestMode.qp; CU::addPUs(cu); cu.mmvdSkip = false; @@ -1648,13 +2144,13 @@ void EncCu::xCheckRDCostHashInter( CodingStructure *&tempCS, CodingStructure *&b if (m_pcInterSearch->predInterHashSearch(cu, partitioner, isPerfectMatch)) { - double equGBiCost = MAX_DOUBLE; + double equBcwCost = MAX_DOUBLE; m_bestModeUpdated = tempCS->useDbCost = bestCS->useDbCost = false; xEncodeInterResidual(tempCS, bestCS, partitioner, encTestMode, 0 , 0 - , &equGBiCost + , &equBcwCost ); if ( m_bestModeUpdated && bestCS->cost != MAX_DOUBLE ) @@ -1662,9 +2158,9 @@ void EncCu::xCheckRDCostHashInter( CodingStructure *&tempCS, CodingStructure *&b xCalDebCost( *bestCS, partitioner ); } } - tempCS->initStructData(encTestMode.qp, encTestMode.lossless); - - if (cu.lwidth() != 64) + tempCS->initStructData(encTestMode.qp); + int minSize = min(cu.lwidth(), cu.lheight()); + if (minSize < 64) { isPerfectMatch = false; } @@ -1677,7 +2173,7 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *& CHECK( slice.getSliceType() == I_SLICE, "Merge modes not available for I-slices" ); - tempCS->initStructData( encTestMode.qp, encTestMode.lossless ); + tempCS->initStructData( encTestMode.qp ); MergeCtx mergeCtx; const SPS &sps = *tempCS->sps; @@ -1697,19 +2193,16 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *& cu.cs = tempCS; cu.predMode = MODE_INTER; cu.slice = tempCS->slice; -#if HEVC_TILES_WPP - cu.tileIdx = tempCS->picture->tileMap->getTileIdxMap(tempCS->area.lumaPos()); -#endif + cu.tileIdx = tempCS->pps->getTileIdx( tempCS->area.lumaPos() ); PredictionUnit pu( tempCS->area ); pu.cu = &cu; pu.cs = tempCS; - pu.shareParentPos = tempCS->sharedBndPos; - pu.shareParentSize = tempCS->sharedBndSize; PU::getInterMergeCandidates(pu, mergeCtx , 0 ); PU::getInterMMVDMergeCandidates(pu, mergeCtx); + pu.regularMergeFlag = true; } bool candHasNoResidual[MRG_MAX_NUM_CANDS + MMVD_ADD_NUM]; for (uint32_t ui = 0; ui < MRG_MAX_NUM_CANDS + MMVD_ADD_NUM; ui++) @@ -1720,18 +2213,38 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *& bool bestIsSkip = false; bool bestIsMMVDSkip = true; PelUnitBuf acMergeBuffer[MRG_MAX_NUM_CANDS]; + PelUnitBuf acMergeTmpBuffer[MRG_MAX_NUM_CANDS]; PelUnitBuf acMergeRealBuffer[MMVD_MRG_MAX_RD_BUF_NUM]; PelUnitBuf * acMergeTempBuffer[MMVD_MRG_MAX_RD_NUM]; PelUnitBuf * singleMergeTempBuffer; int insertPos; unsigned uiNumMrgSATDCand = mergeCtx.numValidMergeCand + MMVD_ADD_NUM; - static_vector<unsigned, MRG_MAX_NUM_CANDS + MMVD_ADD_NUM> RdModeList; + struct ModeInfo + { + uint32_t mergeCand; + bool isRegularMerge; + bool isMMVD; + bool isCIIP; + ModeInfo() : mergeCand(0), isRegularMerge(false), isMMVD(false), isCIIP(false) {} + ModeInfo(const uint32_t mergeCand, const bool isRegularMerge, const bool isMMVD, const bool isCIIP) : + mergeCand(mergeCand), isRegularMerge(isRegularMerge), isMMVD(isMMVD), isCIIP(isCIIP) {} + }; + + static_vector<ModeInfo, MRG_MAX_NUM_CANDS + MMVD_ADD_NUM> RdModeList; bool mrgTempBufSet = false; + const int candNum = MRG_MAX_NUM_CANDS + (tempCS->sps->getUseMMVD() ? MMVD_ADD_NUM : 0); - for (unsigned i = 0; i < MRG_MAX_NUM_CANDS + MMVD_ADD_NUM; i++) + for (int i = 0; i < candNum; i++) { - RdModeList.push_back(i); + if (i < mergeCtx.numValidMergeCand) + { + RdModeList.push_back(ModeInfo(i, true, false, false)); + } + else + { + RdModeList.push_back(ModeInfo(std::min(MMVD_ADD_NUM, i - mergeCtx.numValidMergeCand), false, true, false)); + } } const UnitArea localUnitArea(tempCS->area.chromaFormat, Area(0, 0, tempCS->area.Y().width, tempCS->area.Y().height)); @@ -1748,9 +2261,7 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *& } } - static_vector<unsigned, MRG_MAX_NUM_CANDS + MMVD_ADD_NUM> RdModeList2; // store the Intra mode for Intrainter - RdModeList2.clear(); - bool isIntrainterEnabled = sps.getUseMHIntra(); + bool isIntrainterEnabled = sps.getUseCiip(); if (bestCS->area.lwidth() * bestCS->area.lheight() < 64 || bestCS->area.lwidth() >= MAX_CU_SIZE || bestCS->area.lheight() >= MAX_CU_SIZE) { isIntrainterEnabled = false; @@ -1793,31 +2304,27 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *& { RdModeList.clear(); mrgTempBufSet = true; - const double sqrtLambdaForFirstPass = m_pcRdCost->getMotionLambda( encTestMode.lossless ); + const TempCtx ctxStart(m_CtxCache, m_CABACEstimator->getCtx()); CodingUnit &cu = tempCS->addCU( tempCS->area, partitioner.chType ); - const double sqrtLambdaForFirstPassIntra = m_pcRdCost->getMotionLambda(cu.transQuantBypass) / double(1 << SCALE_BITS); - + const double sqrtLambdaForFirstPassIntra = m_pcRdCost->getMotionLambda( ) * FRAC_BITS_SCALE; partitioner.setCUData( cu ); cu.slice = tempCS->slice; -#if HEVC_TILES_WPP - cu.tileIdx = tempCS->picture->tileMap->getTileIdxMap( tempCS->area.lumaPos() ); -#endif + cu.tileIdx = tempCS->pps->getTileIdx( tempCS->area.lumaPos() ); cu.skip = false; cu.mmvdSkip = false; cu.triangle = false; //cu.affine cu.predMode = MODE_INTER; //cu.LICFlag - cu.transQuantBypass = encTestMode.lossless; - cu.chromaQpAdj = cu.transQuantBypass ? 0 : m_cuChromaQpOffsetIdxPlus1; + cu.chromaQpAdj = m_cuChromaQpOffsetIdxPlus1; cu.qp = encTestMode.qp; //cu.emtFlag is set below PredictionUnit &pu = tempCS->addPU( cu, partitioner.chType ); DistParam distParam; - const bool bUseHadamard= !encTestMode.lossless; + const bool bUseHadamard = !tempCS->slice->getDisableSATDForRD(); m_pcRdCost->setDistParam (distParam, tempCS->getOrgBuf().Y(), m_acMergeBuffer[0].Y(), sps.getBitDepth (CHANNEL_TYPE_LUMA), COMPONENT_Y, bUseHadamard); const UnitArea localUnitArea( tempCS->area.chromaFormat, Area( 0, 0, tempCS->area.Y().width, tempCS->area.Y().height) ); @@ -1828,7 +2335,8 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *& PU::spanMotionInfo( pu, mergeCtx ); pu.mvRefine = true; distParam.cur = singleMergeTempBuffer->Y(); - m_pcInterSearch->motionCompensation(pu, *singleMergeTempBuffer); + acMergeTmpBuffer[uiMergeCand] = m_acMergeTmpBuffer[uiMergeCand].getBuf(localUnitArea); + m_pcInterSearch->motionCompensation(pu, *singleMergeTempBuffer, REF_PIC_LIST_X, true, true, &(acMergeTmpBuffer[uiMergeCand])); acMergeBuffer[uiMergeCand] = m_acRealMergeBuffer[uiMergeCand].getBuf(localUnitArea); acMergeBuffer[uiMergeCand].copyFrom(*singleMergeTempBuffer); pu.mvRefine = false; @@ -1855,17 +2363,11 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *& } Distortion uiSad = distParam.distFunc(distParam); - uint32_t uiBitsCand = uiMergeCand + 1; - if( uiMergeCand == tempCS->slice->getMaxNumMergeCand() - 1 ) - { - uiBitsCand--; - } -#if !JVET_MMVD_OFF_MACRO - uiBitsCand++; // for mmvd_flag -#endif - double cost = (double)uiSad + (double)uiBitsCand * sqrtLambdaForFirstPass; + m_CABACEstimator->getCtx() = ctxStart; + uint64_t fracBits = m_pcInterSearch->xCalcPuMeBits(pu); + double cost = (double)uiSad + (double)fracBits * sqrtLambdaForFirstPassIntra; insertPos = -1; - updateDoubleCandList(uiMergeCand, cost, RdModeList, candCostList, RdModeList2, (uint32_t)NUM_LUMA_MODE, uiNumMrgSATDCand, &insertPos); + updateCandList(ModeInfo(uiMergeCand, true, false, false), cost, RdModeList, candCostList, uiNumMrgSATDCand, &insertPos); if (insertPos != -1) { if (insertPos == RdModeList.size() - 1) @@ -1886,161 +2388,107 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *& if (isIntrainterEnabled) { - int numTestIntraMode = 4; // prepare for Intra bits calculation - const TempCtx ctxStart(m_CtxCache, m_CABACEstimator->getCtx()); - const TempCtx ctxStartIntraMode(m_CtxCache, SubCtx(Ctx::MHIntraPredMode, m_CABACEstimator->getCtx())); - - // for Intrainter fast, recored the best intra mode during the first round for mrege 0 - int bestMHIntraMode = -1; - double bestMHIntraCost = MAX_DOUBLE; - - pu.mhIntraFlag = true; + pu.ciipFlag = true; // save the to-be-tested merge candidates - uint32_t MHIntraMergeCand[NUM_MRG_SATD_CAND]; + uint32_t CiipMergeCand[NUM_MRG_SATD_CAND]; for (uint32_t mergeCnt = 0; mergeCnt < std::min(NUM_MRG_SATD_CAND, (const int)mergeCtx.numValidMergeCand); mergeCnt++) { - MHIntraMergeCand[mergeCnt] = RdModeList[mergeCnt]; + CiipMergeCand[mergeCnt] = RdModeList[mergeCnt].mergeCand; } for (uint32_t mergeCnt = 0; mergeCnt < std::min(std::min(NUM_MRG_SATD_CAND, (const int)mergeCtx.numValidMergeCand), 4); mergeCnt++) { - uint32_t mergeCand = MHIntraMergeCand[mergeCnt]; - acMergeBuffer[mergeCand] = m_acRealMergeBuffer[mergeCand].getBuf(localUnitArea); + uint32_t mergeCand = CiipMergeCand[mergeCnt]; + acMergeTmpBuffer[mergeCand] = m_acMergeTmpBuffer[mergeCand].getBuf(localUnitArea); // estimate merge bits - uint32_t bitsCand = mergeCand + 1; - if (mergeCand == pu.cs->slice->getMaxNumMergeCand() - 1) - { - bitsCand--; - } + mergeCtx.setMergeInfo(pu, mergeCand); // first round - for (uint32_t intraCnt = 0; intraCnt < numTestIntraMode; intraCnt++) + pu.intraDir[0] = PLANAR_IDX; + uint32_t intraCnt = 0; + // generate intrainter Y prediction + if (mergeCnt == 0) { - pu.intraDir[0] = (intraCnt < 2) ? intraCnt : ((intraCnt == 2) ? HOR_IDX : VER_IDX); - - // fast 2 - if (mergeCnt > 0 && bestMHIntraMode != pu.intraDir[0]) - { - continue; - } - int narrowCase = PU::getNarrowShape(pu.lwidth(), pu.lheight()); - if (narrowCase == 1 && pu.intraDir[0] == HOR_IDX) - { - continue; - } - if (narrowCase == 2 && pu.intraDir[0] == VER_IDX) - { - continue; - } - // generate intrainter Y prediction - if (mergeCnt == 0) - { - bool isUseFilter = IntraPrediction::useFilteredIntraRefSamples(COMPONENT_Y, pu, true, pu); - m_pcIntraSearch->initIntraPatternChType(*pu.cu, pu.Y(), isUseFilter); - m_pcIntraSearch->predIntraAng(COMPONENT_Y, pu.cs->getPredBuf(pu).Y(), pu, isUseFilter); - m_pcIntraSearch->switchBuffer(pu, COMPONENT_Y, pu.cs->getPredBuf(pu).Y(), m_pcIntraSearch->getPredictorPtr2(COMPONENT_Y, intraCnt)); - } - pu.cs->getPredBuf(pu).copyFrom(acMergeBuffer[mergeCand]); - if (pu.cs->slice->getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag()) - { - pu.cs->getPredBuf(pu).Y().rspSignal(m_pcReshape->getFwdLUT()); - } - m_pcIntraSearch->geneWeightedPred(COMPONENT_Y, pu.cs->getPredBuf(pu).Y(), pu, m_pcIntraSearch->getPredictorPtr2(COMPONENT_Y, intraCnt)); + m_pcIntraSearch->initIntraPatternChType(*pu.cu, pu.Y()); + m_pcIntraSearch->predIntraAng(COMPONENT_Y, pu.cs->getPredBuf(pu).Y(), pu); + m_pcIntraSearch->switchBuffer(pu, COMPONENT_Y, pu.cs->getPredBuf(pu).Y(), m_pcIntraSearch->getPredictorPtr2(COMPONENT_Y, intraCnt)); + } + pu.cs->getPredBuf(pu).copyFrom(acMergeTmpBuffer[mergeCand]); + if (pu.cs->picHeader->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag()) + { + pu.cs->getPredBuf(pu).Y().rspSignal(m_pcReshape->getFwdLUT()); + } + m_pcIntraSearch->geneWeightedPred(COMPONENT_Y, pu.cs->getPredBuf(pu).Y(), pu, m_pcIntraSearch->getPredictorPtr2(COMPONENT_Y, intraCnt)); - // calculate cost - if (pu.cs->slice->getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag()) - { - pu.cs->getPredBuf(pu).Y().rspSignal(m_pcReshape->getInvLUT()); - } - distParam.cur = pu.cs->getPredBuf(pu).Y(); - Distortion sadValue = distParam.distFunc(distParam); - if (pu.cs->slice->getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag()) - { - pu.cs->getPredBuf(pu).Y().rspSignal(m_pcReshape->getFwdLUT()); - } - m_CABACEstimator->getCtx() = SubCtx(Ctx::MHIntraPredMode, ctxStartIntraMode); - uint64_t fracModeBits = m_pcIntraSearch->xFracModeBitsIntra(pu, pu.intraDir[0], CHANNEL_TYPE_LUMA); - double cost = (double)sadValue + (double)(bitsCand + 1) * sqrtLambdaForFirstPass + (double)fracModeBits * sqrtLambdaForFirstPassIntra; - insertPos = -1; - updateDoubleCandList(mergeCand + MRG_MAX_NUM_CANDS + MMVD_ADD_NUM, cost, RdModeList, candCostList, RdModeList2, pu.intraDir[0], uiNumMrgSATDCand, &insertPos); - if (insertPos != -1) - { - for (int i = int(RdModeList.size()) - 1; i > insertPos; i--) - { - swap(acMergeTempBuffer[i - 1], acMergeTempBuffer[i]); - } - swap(singleMergeTempBuffer, acMergeTempBuffer[insertPos]); - } - // fast 2 - if (mergeCnt == 0 && cost < bestMHIntraCost) + // calculate cost + if (pu.cs->picHeader->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag()) + { + pu.cs->getPredBuf(pu).Y().rspSignal(m_pcReshape->getInvLUT()); + } + distParam.cur = pu.cs->getPredBuf(pu).Y(); + Distortion sadValue = distParam.distFunc(distParam); + if (pu.cs->picHeader->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag()) + { + pu.cs->getPredBuf(pu).Y().rspSignal(m_pcReshape->getFwdLUT()); + } + m_CABACEstimator->getCtx() = ctxStart; + pu.regularMergeFlag = false; + uint64_t fracBits = m_pcInterSearch->xCalcPuMeBits(pu); + double cost = (double)sadValue + (double)fracBits * sqrtLambdaForFirstPassIntra; + insertPos = -1; + updateCandList(ModeInfo(mergeCand, false, false, true), cost, RdModeList, candCostList, uiNumMrgSATDCand, &insertPos); + if (insertPos != -1) + { + for (int i = int(RdModeList.size()) - 1; i > insertPos; i--) { - bestMHIntraMode = pu.intraDir[0]; - bestMHIntraCost = cost; + swap(acMergeTempBuffer[i - 1], acMergeTempBuffer[i]); } + swap(singleMergeTempBuffer, acMergeTempBuffer[insertPos]); } } - pu.mhIntraFlag = false; - m_CABACEstimator->getCtx() = ctxStart; + pu.ciipFlag = false; } -#if !JVET_MMVD_OFF_MACRO - cu.mmvdSkip = true; - int tempNum = 0; - tempNum = MMVD_ADD_NUM; - for (uint32_t mergeCand = mergeCtx.numValidMergeCand; mergeCand < mergeCtx.numValidMergeCand + tempNum; mergeCand++) + if ( pu.cs->sps->getUseMMVD() ) { - const int mmvdMergeCand = mergeCand - mergeCtx.numValidMergeCand; - int bitsBaseIdx = 0; - int bitsRefineStep = 0; - int bitsDirection = 2; - int bitsCand = 0; - int baseIdx; - int refineStep; - baseIdx = mmvdMergeCand / MMVD_MAX_REFINE_NUM; - refineStep = (mmvdMergeCand - (baseIdx * MMVD_MAX_REFINE_NUM)) / 4; - bitsBaseIdx = baseIdx + 1; - if (baseIdx == MMVD_BASE_MV_NUM - 1) - { - bitsBaseIdx--; - } - - bitsRefineStep = refineStep + 1; - if (refineStep == MMVD_REFINE_STEP - 1) - { - bitsRefineStep--; - } - - bitsCand = bitsBaseIdx + bitsRefineStep + bitsDirection; - bitsCand++; // for mmvd_flag - - mergeCtx.setMmvdMergeCandiInfo(pu, mmvdMergeCand); - - PU::spanMotionInfo(pu, mergeCtx); - pu.mvRefine = true; - distParam.cur = singleMergeTempBuffer->Y(); - pu.mmvdEncOptMode = (refineStep > 2 ? 2 : 1); - CHECK(!pu.mmvdMergeFlag, "MMVD merge should be set"); - // Don't do chroma MC here - m_pcInterSearch->motionCompensation(pu, *singleMergeTempBuffer, REF_PIC_LIST_X, true, false); - pu.mmvdEncOptMode = 0; - pu.mvRefine = false; - Distortion uiSad = distParam.distFunc(distParam); - - - double cost = (double)uiSad + (double)bitsCand * sqrtLambdaForFirstPass; - insertPos = -1; - updateDoubleCandList(mergeCand, cost, RdModeList, candCostList, RdModeList2, (uint32_t)NUM_LUMA_MODE, uiNumMrgSATDCand, &insertPos); - if (insertPos != -1) + cu.mmvdSkip = true; + pu.regularMergeFlag = true; + const int tempNum = (mergeCtx.numValidMergeCand > 1) ? MMVD_ADD_NUM : MMVD_ADD_NUM >> 1; + for (int mmvdMergeCand = 0; mmvdMergeCand < tempNum; mmvdMergeCand++) { - for (int i = int(RdModeList.size()) - 1; i > insertPos; i--) + int baseIdx = mmvdMergeCand / MMVD_MAX_REFINE_NUM; + int refineStep = (mmvdMergeCand - (baseIdx * MMVD_MAX_REFINE_NUM)) / 4; + if (refineStep >= m_pcEncCfg->getMmvdDisNum()) + continue; + mergeCtx.setMmvdMergeCandiInfo(pu, mmvdMergeCand); + + PU::spanMotionInfo(pu, mergeCtx); + pu.mvRefine = true; + distParam.cur = singleMergeTempBuffer->Y(); + pu.mmvdEncOptMode = (refineStep > 2 ? 2 : 1); + CHECK(!pu.mmvdMergeFlag, "MMVD merge should be set"); + // Don't do chroma MC here + m_pcInterSearch->motionCompensation(pu, *singleMergeTempBuffer, REF_PIC_LIST_X, true, false); + pu.mmvdEncOptMode = 0; + pu.mvRefine = false; + Distortion uiSad = distParam.distFunc(distParam); + + m_CABACEstimator->getCtx() = ctxStart; + uint64_t fracBits = m_pcInterSearch->xCalcPuMeBits(pu); + double cost = (double)uiSad + (double)fracBits * sqrtLambdaForFirstPassIntra; + insertPos = -1; + updateCandList(ModeInfo(mmvdMergeCand, false, true, false), cost, RdModeList, candCostList, uiNumMrgSATDCand, &insertPos); + if (insertPos != -1) { - swap(acMergeTempBuffer[i - 1], acMergeTempBuffer[i]); + for (int i = int(RdModeList.size()) - 1; i > insertPos; i--) + { + swap(acMergeTempBuffer[i - 1], acMergeTempBuffer[i]); + } + swap(singleMergeTempBuffer, acMergeTempBuffer[insertPos]); } - swap(singleMergeTempBuffer, acMergeTempBuffer[insertPos]); } } -#endif // Try to limit number of candidates using SATD-costs for( uint32_t i = 1; i < uiNumMrgSATDCand; i++ ) { @@ -2055,34 +2503,36 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *& if (isIntrainterEnabled) { - pu.mhIntraFlag = true; + pu.ciipFlag = true; for (uint32_t mergeCnt = 0; mergeCnt < uiNumMrgSATDCand; mergeCnt++) { - if (RdModeList[mergeCnt] >= (MRG_MAX_NUM_CANDS + MMVD_ADD_NUM)) + if (RdModeList[mergeCnt].isCIIP) { - pu.intraDir[0] = RdModeList2[mergeCnt]; + pu.intraDir[0] = PLANAR_IDX; pu.intraDir[1] = DM_CHROMA_IDX; - uint32_t bufIdx = (pu.intraDir[0] > 1) ? (pu.intraDir[0] == HOR_IDX ? 2 : 3) : pu.intraDir[0]; - bool isUseFilter = IntraPrediction::useFilteredIntraRefSamples(COMPONENT_Cb, pu, true, pu); - m_pcIntraSearch->initIntraPatternChType(*pu.cu, pu.Cb(), isUseFilter); - m_pcIntraSearch->predIntraAng(COMPONENT_Cb, pu.cs->getPredBuf(pu).Cb(), pu, isUseFilter); + if (pu.chromaSize().width == 2) + continue; + uint32_t bufIdx = 0; + m_pcIntraSearch->initIntraPatternChType(*pu.cu, pu.Cb()); + m_pcIntraSearch->predIntraAng(COMPONENT_Cb, pu.cs->getPredBuf(pu).Cb(), pu); m_pcIntraSearch->switchBuffer(pu, COMPONENT_Cb, pu.cs->getPredBuf(pu).Cb(), m_pcIntraSearch->getPredictorPtr2(COMPONENT_Cb, bufIdx)); - isUseFilter = IntraPrediction::useFilteredIntraRefSamples(COMPONENT_Cr, pu, true, pu); - m_pcIntraSearch->initIntraPatternChType(*pu.cu, pu.Cr(), isUseFilter); - m_pcIntraSearch->predIntraAng(COMPONENT_Cr, pu.cs->getPredBuf(pu).Cr(), pu, isUseFilter); + + m_pcIntraSearch->initIntraPatternChType(*pu.cu, pu.Cr()); + m_pcIntraSearch->predIntraAng(COMPONENT_Cr, pu.cs->getPredBuf(pu).Cr(), pu); m_pcIntraSearch->switchBuffer(pu, COMPONENT_Cr, pu.cs->getPredBuf(pu).Cr(), m_pcIntraSearch->getPredictorPtr2(COMPONENT_Cr, bufIdx)); } } - pu.mhIntraFlag = false; + pu.ciipFlag = false; } - tempCS->initStructData( encTestMode.qp, encTestMode.lossless ); + tempCS->initStructData( encTestMode.qp ); + m_CABACEstimator->getCtx() = ctxStart; } else { if (bestIsMMVDSkip) { - uiNumMrgSATDCand = mergeCtx.numValidMergeCand + MMVD_ADD_NUM; + uiNumMrgSATDCand = mergeCtx.numValidMergeCand + ((mergeCtx.numValidMergeCand > 1) ? MMVD_ADD_NUM : MMVD_ADD_NUM >> 1); } else { @@ -2092,26 +2542,16 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *& } m_bestModeUpdated = tempCS->useDbCost = bestCS->useDbCost = false; uint32_t iteration; - uint32_t iterationBegin = m_modeCtrl->getIsHashPerfectMatch() ? 1 : 0; - if (encTestMode.lossless) - { - iteration = 1; - iterationBegin = 0; - } - else - { - iteration = 2; - } + uint32_t iterationBegin = 0; + iteration = 2; for (uint32_t uiNoResidualPass = iterationBegin; uiNoResidualPass < iteration; ++uiNoResidualPass) { for( uint32_t uiMrgHADIdx = 0; uiMrgHADIdx < uiNumMrgSATDCand; uiMrgHADIdx++ ) { - uint32_t uiMergeCand = RdModeList[uiMrgHADIdx]; + uint32_t uiMergeCand = RdModeList[uiMrgHADIdx].mergeCand; - - if (uiNoResidualPass != 0 && uiMergeCand >= (MRG_MAX_NUM_CANDS + MMVD_ADD_NUM)) // intrainter does not support skip mode + if (uiNoResidualPass != 0 && RdModeList[uiMrgHADIdx].isCIIP) // intrainter does not support skip mode { - uiMergeCand -= (MRG_MAX_NUM_CANDS + MMVD_ADD_NUM); // for skip, map back to normal merge candidate idx and try RDO if (isTestSkipMerge[uiMergeCand]) { continue; @@ -2129,39 +2569,37 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *& partitioner.setCUData( cu ); cu.slice = tempCS->slice; -#if HEVC_TILES_WPP - cu.tileIdx = tempCS->picture->tileMap->getTileIdxMap( tempCS->area.lumaPos() ); -#endif + cu.tileIdx = tempCS->pps->getTileIdx( tempCS->area.lumaPos() ); cu.skip = false; cu.mmvdSkip = false; cu.triangle = false; //cu.affine cu.predMode = MODE_INTER; //cu.LICFlag - cu.transQuantBypass = encTestMode.lossless; - cu.chromaQpAdj = cu.transQuantBypass ? 0 : m_cuChromaQpOffsetIdxPlus1; + cu.chromaQpAdj = m_cuChromaQpOffsetIdxPlus1; cu.qp = encTestMode.qp; PredictionUnit &pu = tempCS->addPU( cu, partitioner.chType ); - if (uiNoResidualPass == 0 && uiMergeCand >= (MRG_MAX_NUM_CANDS + MMVD_ADD_NUM)) + if (uiNoResidualPass == 0 && RdModeList[uiMrgHADIdx].isCIIP) { - uiMergeCand -= (MRG_MAX_NUM_CANDS + MMVD_ADD_NUM); cu.mmvdSkip = false; mergeCtx.setMergeInfo(pu, uiMergeCand); - pu.mhIntraFlag = true; - pu.intraDir[0] = RdModeList2[uiMrgHADIdx]; + pu.ciipFlag = true; + pu.regularMergeFlag = false; + pu.intraDir[0] = PLANAR_IDX; CHECK(pu.intraDir[0]<0 || pu.intraDir[0]>(NUM_LUMA_MODE - 1), "out of intra mode"); pu.intraDir[1] = DM_CHROMA_IDX; } - - else if (uiMergeCand >= mergeCtx.numValidMergeCand && uiMergeCand < MRG_MAX_NUM_CANDS + MMVD_ADD_NUM) + else if (RdModeList[uiMrgHADIdx].isMMVD) { cu.mmvdSkip = true; - mergeCtx.setMmvdMergeCandiInfo(pu, uiMergeCand - mergeCtx.numValidMergeCand); + pu.regularMergeFlag = true; + mergeCtx.setMmvdMergeCandiInfo(pu, uiMergeCand); } else { cu.mmvdSkip = false; + pu.regularMergeFlag = true; mergeCtx.setMergeInfo(pu, uiMergeCand); } PU::spanMotionInfo( pu, mergeCtx ); @@ -2172,7 +2610,7 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *& if( ( isDMVR && MCTSHelper::isRefBlockAtRestrictedTileBoundary( pu ) ) || ( !isDMVR && !( MCTSHelper::checkMvBufferForMCTSConstraint( pu ) ) ) ) { // Do not use this mode - tempCS->initStructData( encTestMode.qp, encTestMode.lossless ); + tempCS->initStructData( encTestMode.qp ); continue; } } @@ -2194,31 +2632,41 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *& } } } - if (pu.mhIntraFlag) + if (pu.ciipFlag) { - uint32_t bufIdx = (pu.intraDir[0] > 1) ? (pu.intraDir[0] == HOR_IDX ? 2 : 3) : pu.intraDir[0]; + uint32_t bufIdx = 0; PelBuf tmpBuf = tempCS->getPredBuf(pu).Y(); - tmpBuf.copyFrom(acMergeBuffer[uiMergeCand].Y()); - if (pu.cs->slice->getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag()) + tmpBuf.copyFrom(acMergeTmpBuffer[uiMergeCand].Y()); + if (pu.cs->picHeader->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag()) { tmpBuf.rspSignal(m_pcReshape->getFwdLUT()); } m_pcIntraSearch->geneWeightedPred(COMPONENT_Y, tmpBuf, pu, m_pcIntraSearch->getPredictorPtr2(COMPONENT_Y, bufIdx)); + if (pu.chromaSize().width > 2) + { tmpBuf = tempCS->getPredBuf(pu).Cb(); - tmpBuf.copyFrom(acMergeBuffer[uiMergeCand].Cb()); + tmpBuf.copyFrom(acMergeTmpBuffer[uiMergeCand].Cb()); m_pcIntraSearch->geneWeightedPred(COMPONENT_Cb, tmpBuf, pu, m_pcIntraSearch->getPredictorPtr2(COMPONENT_Cb, bufIdx)); tmpBuf = tempCS->getPredBuf(pu).Cr(); - tmpBuf.copyFrom(acMergeBuffer[uiMergeCand].Cr()); + tmpBuf.copyFrom(acMergeTmpBuffer[uiMergeCand].Cr()); m_pcIntraSearch->geneWeightedPred(COMPONENT_Cr, tmpBuf, pu, m_pcIntraSearch->getPredictorPtr2(COMPONENT_Cr, bufIdx)); + } + else + { + tmpBuf = tempCS->getPredBuf(pu).Cb(); + tmpBuf.copyFrom(acMergeTmpBuffer[uiMergeCand].Cb()); + tmpBuf = tempCS->getPredBuf(pu).Cr(); + tmpBuf.copyFrom(acMergeTmpBuffer[uiMergeCand].Cr()); + } } else { - if (uiMergeCand >= mergeCtx.numValidMergeCand && uiMergeCand < MRG_MAX_NUM_CANDS + MMVD_ADD_NUM) { + if (RdModeList[uiMrgHADIdx].isMMVD) + { pu.mmvdEncOptMode = 0; m_pcInterSearch->motionCompensation(pu); } - else - if (uiNoResidualPass != 0 && uiMergeCand < mergeCtx.numValidMergeCand && RdModeList[uiMrgHADIdx] >= (MRG_MAX_NUM_CANDS + MMVD_ADD_NUM)) + else if (uiNoResidualPass != 0 && RdModeList[uiMrgHADIdx].isCIIP) { tempCS->getPredBuf().copyFrom(acMergeBuffer[uiMergeCand]); } @@ -2234,7 +2682,7 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *& m_pcInterSearch->motionCompensation( pu ); pu.mvRefine = false; } - if (!cu.mmvdSkip && !pu.mhIntraFlag && uiNoResidualPass != 0) + if (!cu.mmvdSkip && !pu.ciipFlag && uiNoResidualPass != 0) { CHECK(uiMergeCand >= mergeCtx.numValidMergeCand, "out of normal merge"); isTestSkipMerge[uiMergeCand] = true; @@ -2242,11 +2690,11 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *& xEncodeInterResidual( tempCS, bestCS, partitioner, encTestMode, uiNoResidualPass, uiNoResidualPass == 0 ? &candHasNoResidual[uiMrgHADIdx] : NULL ); - if( m_pcEncCfg->getUseFastDecisionForMerge() && !bestIsSkip && !pu.mhIntraFlag) + if( m_pcEncCfg->getUseFastDecisionForMerge() && !bestIsSkip && !pu.ciipFlag) { bestIsSkip = !bestCS->cus.empty() && bestCS->getCU( partitioner.chType )->rootCbf == 0; } - tempCS->initStructData( encTestMode.qp, encTestMode.lossless ); + tempCS->initStructData( encTestMode.qp ); }// end loop uiMrgHADIdx if( uiNoResidualPass == 0 && m_pcEncCfg->getUseEarlySkipDetection() ) @@ -2291,9 +2739,12 @@ void EncCu::xCheckRDCostMergeTriangle2Nx2N( CodingStructure *&tempCS, CodingStru const Slice &slice = *tempCS->slice; const SPS &sps = *tempCS->sps; + if (slice.getPicHeader()->getMaxNumTriangleCand() < 2) + return; + CHECK( slice.getSliceType() != B_SLICE, "Triangle mode is only applied to B-slices" ); - tempCS->initStructData( encTestMode.qp, encTestMode.lossless ); + tempCS->initStructData( encTestMode.qp ); bool trianglecandHasNoResidual[TRIANGLE_MAX_NUM_CANDS]; for( int mergeCand = 0; mergeCand < TRIANGLE_MAX_NUM_CANDS; mergeCand++ ) @@ -2301,31 +2752,23 @@ void EncCu::xCheckRDCostMergeTriangle2Nx2N( CodingStructure *&tempCS, CodingStru trianglecandHasNoResidual[mergeCand] = false; } - bool bestIsSkip; - CodingUnit* cuTemp = bestCS->getCU(partitioner.chType); - if (cuTemp) - bestIsSkip = m_pcEncCfg->getUseFastDecisionForMerge() ? bestCS->getCU(partitioner.chType)->rootCbf == 0 : false; - else - bestIsSkip = false; + bool bestIsSkip = false; uint8_t numTriangleCandidate = TRIANGLE_MAX_NUM_CANDS; uint8_t triangleNumMrgSATDCand = TRIANGLE_MAX_NUM_SATD_CANDS; PelUnitBuf triangleBuffer[TRIANGLE_MAX_NUM_UNI_CANDS]; PelUnitBuf triangleWeightedBuffer[TRIANGLE_MAX_NUM_CANDS]; static_vector<uint8_t, TRIANGLE_MAX_NUM_CANDS> triangleRdModeList; static_vector<double, TRIANGLE_MAX_NUM_CANDS> tianglecandCostList; + uint8_t numTriangleCandComb = slice.getPicHeader()->getMaxNumTriangleCand() * (slice.getPicHeader()->getMaxNumTriangleCand() - 1) * 2; - if( auto blkCache = dynamic_cast< CacheBlkInfoCtrl* >( m_modeCtrl ) ) - { - bestIsSkip |= blkCache->isSkip( tempCS->area ); - } DistParam distParam; - const bool useHadamard = !encTestMode.lossless; + const bool useHadamard = !tempCS->slice->getDisableSATDForRD(); m_pcRdCost->setDistParam( distParam, tempCS->getOrgBuf().Y(), m_acMergeBuffer[0].Y(), sps.getBitDepth( CHANNEL_TYPE_LUMA ), COMPONENT_Y, useHadamard ); const UnitArea localUnitArea( tempCS->area.chromaFormat, Area( 0, 0, tempCS->area.Y().width, tempCS->area.Y().height) ); - const double sqrtLambdaForFirstPass = m_pcRdCost->getMotionLambda(encTestMode.lossless); + const double sqrtLambdaForFirstPass = m_pcRdCost->getMotionLambda( ); MergeCtx triangleMrgCtx; { @@ -2333,20 +2776,19 @@ void EncCu::xCheckRDCostMergeTriangle2Nx2N( CodingStructure *&tempCS, CodingStru cu.cs = tempCS; cu.predMode = MODE_INTER; cu.slice = tempCS->slice; -#if HEVC_TILES_WPP - cu.tileIdx = tempCS->picture->tileMap->getTileIdxMap( tempCS->area.lumaPos() ); -#endif + cu.tileIdx = tempCS->pps->getTileIdx( tempCS->area.lumaPos() ); cu.triangle = true; cu.mmvdSkip = false; - cu.GBiIdx = GBI_DEFAULT; + cu.BcwIdx = BCW_DEFAULT; PredictionUnit pu( tempCS->area ); pu.cu = &cu; pu.cs = tempCS; - + pu.regularMergeFlag = false; PU::getTriangleMergeCandidates( pu, triangleMrgCtx ); - for( uint8_t mergeCand = 0; mergeCand < TRIANGLE_MAX_NUM_UNI_CANDS; mergeCand++ ) + const uint8_t maxNumTriangleCand = pu.cs->picHeader->getMaxNumTriangleCand(); + for (uint8_t mergeCand = 0; mergeCand < maxNumTriangleCand; mergeCand++) { triangleBuffer[mergeCand] = m_acMergeBuffer[mergeCand].getBuf(localUnitArea); triangleMrgCtx.setMergeInfo( pu, mergeCand ); @@ -2355,43 +2797,31 @@ void EncCu::xCheckRDCostMergeTriangle2Nx2N( CodingStructure *&tempCS, CodingStru if( m_pcEncCfg->getMCTSEncConstraint() && ( !( MCTSHelper::checkMvBufferForMCTSConstraint( pu ) ) ) ) { // Do not use this mode - tempCS->initStructData( encTestMode.qp, encTestMode.lossless ); + tempCS->initStructData( encTestMode.qp ); return; } m_pcInterSearch->motionCompensation( pu, triangleBuffer[mergeCand] ); } } - bool tempBufSet = bestIsSkip ? false : true; - triangleNumMrgSATDCand = bestIsSkip ? TRIANGLE_MAX_NUM_CANDS : TRIANGLE_MAX_NUM_SATD_CANDS; - if( bestIsSkip ) - { - for( uint8_t i = 0; i < TRIANGLE_MAX_NUM_CANDS; i++ ) - { - triangleRdModeList.push_back(i); - } - } - else + triangleNumMrgSATDCand = min(triangleNumMrgSATDCand, numTriangleCandComb); { CodingUnit &cu = tempCS->addCU( tempCS->area, partitioner.chType ); partitioner.setCUData( cu ); cu.slice = tempCS->slice; -#if HEVC_TILES_WPP - cu.tileIdx = tempCS->picture->tileMap->getTileIdxMap( tempCS->area.lumaPos() ); -#endif + cu.tileIdx = tempCS->pps->getTileIdx( tempCS->area.lumaPos() ); cu.skip = false; cu.predMode = MODE_INTER; - cu.transQuantBypass = encTestMode.lossless; - cu.chromaQpAdj = cu.transQuantBypass ? 0 : m_cuChromaQpOffsetIdxPlus1; + cu.chromaQpAdj = m_cuChromaQpOffsetIdxPlus1; cu.qp = encTestMode.qp; cu.triangle = true; cu.mmvdSkip = false; - cu.GBiIdx = GBI_DEFAULT; + cu.BcwIdx = BCW_DEFAULT; PredictionUnit &pu = tempCS->addPU( cu, partitioner.chType ); - if( abs(g_aucLog2[cu.lwidth()] - g_aucLog2[cu.lheight()]) >= 2 ) + if( abs(floorLog2(cu.lwidth()) - floorLog2(cu.lheight())) >= 2 ) { numTriangleCandidate = 30; } @@ -2400,6 +2830,8 @@ void EncCu::xCheckRDCostMergeTriangle2Nx2N( CodingStructure *&tempCS, CodingStru numTriangleCandidate = TRIANGLE_MAX_NUM_CANDS; } + numTriangleCandidate = min(numTriangleCandidate, numTriangleCandComb); + for( uint8_t mergeCand = 0; mergeCand < numTriangleCandidate; mergeCand++ ) { bool splitDir = m_triangleModeTest[mergeCand].m_splitDir; @@ -2410,6 +2842,7 @@ void EncCu::xCheckRDCostMergeTriangle2Nx2N( CodingStructure *&tempCS, CodingStru pu.triangleMergeIdx0 = candIdx0; pu.triangleMergeIdx1 = candIdx1; pu.mergeFlag = true; + pu.regularMergeFlag = false; triangleWeightedBuffer[mergeCand] = m_acTriangleWeightedBuffer[mergeCand].getBuf( localUnitArea ); triangleBuffer[candIdx0] = m_acMergeBuffer[candIdx0].getBuf( localUnitArea ); triangleBuffer[candIdx1] = m_acMergeBuffer[candIdx1].getBuf( localUnitArea ); @@ -2423,9 +2856,7 @@ void EncCu::xCheckRDCostMergeTriangle2Nx2N( CodingStructure *&tempCS, CodingStru double cost = (double)uiSad + (double)uiBitsCand * sqrtLambdaForFirstPass; - static_vector<int, TRIANGLE_MAX_NUM_CANDS> * nullList = nullptr; updateCandList( mergeCand, cost, triangleRdModeList, tianglecandCostList - , *nullList, -1 , triangleNumMrgSATDCand ); } @@ -2451,25 +2882,20 @@ void EncCu::xCheckRDCostMergeTriangle2Nx2N( CodingStructure *&tempCS, CodingStru pu.triangleMergeIdx0 = candIdx0; pu.triangleMergeIdx1 = candIdx1; pu.mergeFlag = true; - + pu.regularMergeFlag = false; m_pcInterSearch->weightedTriangleBlk( pu, splitDir, CHANNEL_TYPE_CHROMA, triangleWeightedBuffer[mergeCand], triangleBuffer[candIdx0], triangleBuffer[candIdx1] ); } - tempCS->initStructData( encTestMode.qp, encTestMode.lossless ); + tempCS->initStructData( encTestMode.qp ); } + + triangleNumMrgSATDCand = min(triangleNumMrgSATDCand, (uint8_t)triangleRdModeList.size()); + m_bestModeUpdated = tempCS->useDbCost = bestCS->useDbCost = false; { uint8_t iteration; - uint8_t iterationBegin = m_modeCtrl->getIsHashPerfectMatch() ? 1 : 0; - if (encTestMode.lossless) - { - iteration = 1; - iterationBegin = 0; - } - else - { - iteration = 2; - } + uint8_t iterationBegin = 0; + iteration = 2; for (uint8_t noResidualPass = iterationBegin; noResidualPass < iteration; ++noResidualPass) { for( uint8_t mrgHADIdx = 0; mrgHADIdx < triangleNumMrgSATDCand; mrgHADIdx++ ) @@ -2490,51 +2916,37 @@ void EncCu::xCheckRDCostMergeTriangle2Nx2N( CodingStructure *&tempCS, CodingStru partitioner.setCUData(cu); cu.slice = tempCS->slice; -#if HEVC_TILES_WPP - cu.tileIdx = tempCS->picture->tileMap->getTileIdxMap( tempCS->area.lumaPos() ); -#endif + cu.tileIdx = tempCS->pps->getTileIdx( tempCS->area.lumaPos() ); cu.skip = false; cu.predMode = MODE_INTER; - cu.transQuantBypass = encTestMode.lossless; - cu.chromaQpAdj = cu.transQuantBypass ? 0 : m_cuChromaQpOffsetIdxPlus1; + cu.chromaQpAdj = m_cuChromaQpOffsetIdxPlus1; cu.qp = encTestMode.qp; cu.triangle = true; cu.mmvdSkip = false; - cu.GBiIdx = GBI_DEFAULT; + cu.BcwIdx = BCW_DEFAULT; PredictionUnit &pu = tempCS->addPU(cu, partitioner.chType); pu.triangleSplitDir = splitDir; pu.triangleMergeIdx0 = candIdx0; pu.triangleMergeIdx1 = candIdx1; pu.mergeFlag = true; - + pu.regularMergeFlag = false; PU::spanTriangleMotionInfo(pu, triangleMrgCtx, splitDir, candIdx0, candIdx1 ); if( m_pcEncCfg->getMCTSEncConstraint() && ( !( MCTSHelper::checkMvBufferForMCTSConstraint( *cu.firstPU ) ) ) ) { // Do not use this mode - tempCS->initStructData( encTestMode.qp, encTestMode.lossless ); + tempCS->initStructData( encTestMode.qp ); return; } - if( tempBufSet ) - { - tempCS->getPredBuf().copyFrom( triangleWeightedBuffer[mergeCand] ); - } - else - { - triangleBuffer[candIdx0] = m_acMergeBuffer[candIdx0].getBuf( localUnitArea ); - triangleBuffer[candIdx1] = m_acMergeBuffer[candIdx1].getBuf( localUnitArea ); - PelUnitBuf predBuf = tempCS->getPredBuf(); - m_pcInterSearch->weightedTriangleBlk( pu, splitDir, MAX_NUM_CHANNEL_TYPE, predBuf, triangleBuffer[candIdx0], triangleBuffer[candIdx1] ); - } - + tempCS->getPredBuf().copyFrom( triangleWeightedBuffer[mergeCand] ); xEncodeInterResidual( tempCS, bestCS, partitioner, encTestMode, noResidualPass, ( noResidualPass == 0 ? &trianglecandHasNoResidual[mergeCand] : NULL ) ); if (m_pcEncCfg->getUseFastDecisionForMerge() && !bestIsSkip) { bestIsSkip = bestCS->getCU(partitioner.chType)->rootCbf == 0; } - tempCS->initStructData(encTestMode.qp, encTestMode.lossless); + tempCS->initStructData(encTestMode.qp); }// end loop mrgHADIdx } } @@ -2560,7 +2972,7 @@ void EncCu::xCheckRDCostAffineMerge2Nx2N( CodingStructure *&tempCS, CodingStruct CHECK( slice.getSliceType() == I_SLICE, "Affine Merge modes not available for I-slices" ); - tempCS->initStructData( encTestMode.qp, encTestMode.lossless ); + tempCS->initStructData( encTestMode.qp ); AffineMergeCtx affineMergeCtx; const SPS &sps = *tempCS->sps; @@ -2579,15 +2991,13 @@ void EncCu::xCheckRDCostAffineMerge2Nx2N( CodingStructure *&tempCS, CodingStruct cu.cs = tempCS; cu.predMode = MODE_INTER; cu.slice = tempCS->slice; -#if HEVC_TILES_WPP - cu.tileIdx = tempCS->picture->tileMap->getTileIdxMap( tempCS->area.lumaPos() ); -#endif + cu.tileIdx = tempCS->pps->getTileIdx( tempCS->area.lumaPos() ); cu.mmvdSkip = false; PredictionUnit pu( tempCS->area ); pu.cu = &cu; pu.cs = tempCS; - + pu.regularMergeFlag = false; PU::getAffineMergeCand( pu, affineMergeCtx ); if ( affineMergeCtx.numValidMergeCand <= 0 ) @@ -2630,26 +3040,23 @@ void EncCu::xCheckRDCostAffineMerge2Nx2N( CodingStructure *&tempCS, CodingStruct { RdModeList.clear(); mrgTempBufSet = true; - const double sqrtLambdaForFirstPass = m_pcRdCost->getMotionLambda( encTestMode.lossless ); + const double sqrtLambdaForFirstPass = m_pcRdCost->getMotionLambda( ); CodingUnit &cu = tempCS->addCU( tempCS->area, partitioner.chType ); partitioner.setCUData( cu ); cu.slice = tempCS->slice; -#if HEVC_TILES_WPP - cu.tileIdx = tempCS->picture->tileMap->getTileIdxMap( tempCS->area.lumaPos() ); -#endif + cu.tileIdx = tempCS->pps->getTileIdx( tempCS->area.lumaPos() ); cu.skip = false; cu.affine = true; cu.predMode = MODE_INTER; - cu.transQuantBypass = encTestMode.lossless; - cu.chromaQpAdj = cu.transQuantBypass ? 0 : m_cuChromaQpOffsetIdxPlus1; + cu.chromaQpAdj = m_cuChromaQpOffsetIdxPlus1; cu.qp = encTestMode.qp; PredictionUnit &pu = tempCS->addPU( cu, partitioner.chType ); DistParam distParam; - const bool bUseHadamard = !encTestMode.lossless; + const bool bUseHadamard = !tempCS->slice->getDisableSATDForRD(); m_pcRdCost->setDistParam( distParam, tempCS->getOrgBuf().Y(), m_acMergeBuffer[0].Y(), sps.getBitDepth( CHANNEL_TYPE_LUMA ), COMPONENT_Y, bUseHadamard ); const UnitArea localUnitArea( tempCS->area.chromaFormat, Area( 0, 0, tempCS->area.Y().width, tempCS->area.Y().height ) ); @@ -2661,9 +3068,10 @@ void EncCu::xCheckRDCostAffineMerge2Nx2N( CodingStructure *&tempCS, CodingStruct // set merge information pu.interDir = affineMergeCtx.interDirNeighbours[uiMergeCand]; pu.mergeFlag = true; + pu.regularMergeFlag = false; pu.mergeIdx = uiMergeCand; cu.affineType = affineMergeCtx.affineType[uiMergeCand]; - cu.GBiIdx = affineMergeCtx.GBiIdx[uiMergeCand]; + cu.BcwIdx = affineMergeCtx.BcwIdx[uiMergeCand]; pu.mergeType = affineMergeCtx.mergeType[uiMergeCand]; if ( pu.mergeType == MRG_TYPE_SUBPU_ATMVP ) @@ -2682,18 +3090,16 @@ void EncCu::xCheckRDCostAffineMerge2Nx2N( CodingStructure *&tempCS, CodingStruct distParam.cur = acMergeBuffer[uiMergeCand].Y(); - m_pcInterSearch->motionCompensation( pu, acMergeBuffer[uiMergeCand] ); + m_pcInterSearch->motionCompensation( pu, acMergeBuffer[uiMergeCand], REF_PIC_LIST_X, true, false ); Distortion uiSad = distParam.distFunc( distParam ); uint32_t uiBitsCand = uiMergeCand + 1; - if ( uiMergeCand == tempCS->slice->getMaxNumAffineMergeCand() - 1 ) + if ( uiMergeCand == tempCS->picHeader->getMaxNumAffineMergeCand() - 1 ) { uiBitsCand--; } double cost = (double)uiSad + (double)uiBitsCand * sqrtLambdaForFirstPass; - static_vector<int, AFFINE_MRG_MAX_NUM_CANDS> emptyList; updateCandList( uiMergeCand, cost, RdModeList, candCostList - , emptyList, -1 , uiNumMrgSATDCand ); CHECK( std::min( uiMergeCand + 1, uiNumMrgSATDCand ) != RdModeList.size(), "" ); @@ -2709,7 +3115,7 @@ void EncCu::xCheckRDCostAffineMerge2Nx2N( CodingStructure *&tempCS, CodingStruct } } - tempCS->initStructData( encTestMode.qp, encTestMode.lossless ); + tempCS->initStructData( encTestMode.qp ); } else { @@ -2718,16 +3124,8 @@ void EncCu::xCheckRDCostAffineMerge2Nx2N( CodingStructure *&tempCS, CodingStruct } uint32_t iteration; - uint32_t iterationBegin = m_modeCtrl->getIsHashPerfectMatch() ? 1 : 0; - if (encTestMode.lossless) - { - iteration = 1; - iterationBegin = 0; - } - else - { - iteration = 2; - } + uint32_t iterationBegin = 0; + iteration = 2; for (uint32_t uiNoResidualPass = iterationBegin; uiNoResidualPass < iteration; ++uiNoResidualPass) { for ( uint32_t uiMrgHADIdx = 0; uiMrgHADIdx < uiNumMrgSATDCand; uiMrgHADIdx++ ) @@ -2745,14 +3143,11 @@ void EncCu::xCheckRDCostAffineMerge2Nx2N( CodingStructure *&tempCS, CodingStruct partitioner.setCUData( cu ); cu.slice = tempCS->slice; -#if HEVC_TILES_WPP - cu.tileIdx = tempCS->picture->tileMap->getTileIdxMap( tempCS->area.lumaPos() ); -#endif + cu.tileIdx = tempCS->pps->getTileIdx( tempCS->area.lumaPos() ); cu.skip = false; cu.affine = true; cu.predMode = MODE_INTER; - cu.transQuantBypass = encTestMode.lossless; - cu.chromaQpAdj = cu.transQuantBypass ? 0 : m_cuChromaQpOffsetIdxPlus1; + cu.chromaQpAdj = m_cuChromaQpOffsetIdxPlus1; cu.qp = encTestMode.qp; PredictionUnit &pu = tempCS->addPU( cu, partitioner.chType ); @@ -2761,7 +3156,7 @@ void EncCu::xCheckRDCostAffineMerge2Nx2N( CodingStructure *&tempCS, CodingStruct pu.mergeIdx = uiMergeCand; pu.interDir = affineMergeCtx.interDirNeighbours[uiMergeCand]; cu.affineType = affineMergeCtx.affineType[uiMergeCand]; - cu.GBiIdx = affineMergeCtx.GBiIdx[uiMergeCand]; + cu.BcwIdx = affineMergeCtx.BcwIdx[uiMergeCand]; pu.mergeType = affineMergeCtx.mergeType[uiMergeCand]; if ( pu.mergeType == MRG_TYPE_SUBPU_ATMVP ) @@ -2781,12 +3176,13 @@ void EncCu::xCheckRDCostAffineMerge2Nx2N( CodingStructure *&tempCS, CodingStruct if( m_pcEncCfg->getMCTSEncConstraint() && ( !( MCTSHelper::checkMvBufferForMCTSConstraint( *cu.firstPU ) ) ) ) { // Do not use this mode - tempCS->initStructData( encTestMode.qp, encTestMode.lossless ); + tempCS->initStructData( encTestMode.qp ); return; } if ( mrgTempBufSet ) { - tempCS->getPredBuf().copyFrom( acMergeBuffer[uiMergeCand] ); + tempCS->getPredBuf().copyFrom(acMergeBuffer[uiMergeCand], true, false); // Copy Luma Only + m_pcInterSearch->motionCompensation(pu, REF_PIC_LIST_X, false, true); } else { @@ -2799,7 +3195,7 @@ void EncCu::xCheckRDCostAffineMerge2Nx2N( CodingStructure *&tempCS, CodingStruct { bestIsSkip = bestCS->getCU( partitioner.chType )->rootCbf == 0; } - tempCS->initStructData( encTestMode.qp, encTestMode.lossless ); + tempCS->initStructData( encTestMode.qp ); }// end loop uiMrgHADIdx if ( uiNoResidualPass == 0 && m_pcEncCfg->getUseEarlySkipDetection() ) @@ -2842,15 +3238,14 @@ void EncCu::xCheckRDCostAffineMerge2Nx2N( CodingStructure *&tempCS, CodingStruct // ibc merge/skip mode check void EncCu::xCheckRDCostIBCModeMerge2Nx2N(CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode) { - assert(tempCS->chType != CHANNEL_TYPE_CHROMA); // chroma IBC is derived - - if (tempCS->area.lwidth() > IBC_MAX_CAND_SIZE || tempCS->area.lheight() > IBC_MAX_CAND_SIZE) // currently only check 32x32 and below block for ibc merge/skip + assert(partitioner.chType != CHANNEL_TYPE_CHROMA); // chroma IBC is derived + if (tempCS->area.lwidth() == 128 || tempCS->area.lheight() == 128) // disable IBC mode larger than 64x64 { return; } const SPS &sps = *tempCS->sps; - tempCS->initStructData(encTestMode.qp, encTestMode.lossless); + tempCS->initStructData(encTestMode.qp); MergeCtx mergeCtx; @@ -2866,17 +3261,14 @@ void EncCu::xCheckRDCostIBCModeMerge2Nx2N(CodingStructure *&tempCS, CodingStruct cu.cs = tempCS; cu.predMode = MODE_IBC; cu.slice = tempCS->slice; -#if HEVC_TILES_WPP - cu.tileIdx = tempCS->picture->tileMap->getTileIdxMap(tempCS->area.lumaPos()); -#endif + cu.tileIdx = tempCS->pps->getTileIdx( tempCS->area.lumaPos() ); PredictionUnit pu(tempCS->area); pu.cu = &cu; pu.cs = tempCS; cu.mmvdSkip = false; pu.mmvdMergeFlag = false; + pu.regularMergeFlag = false; cu.triangle = false; - pu.shareParentPos = tempCS->sharedBndPos; - pu.shareParentSize = tempCS->sharedBndSize; PU::getIBCMergeCandidates(pu, mergeCtx); } @@ -2898,30 +3290,28 @@ void EncCu::xCheckRDCostIBCModeMerge2Nx2N(CodingStructure *&tempCS, CodingStruct static_vector<double, MRG_MAX_NUM_CANDS> candCostList(MRG_MAX_NUM_CANDS, MAX_DOUBLE); // 1. Pass: get SATD-cost for selected candidates and reduce their count { - const double sqrtLambdaForFirstPass = m_pcRdCost->getMotionLambda(encTestMode.lossless); + const double sqrtLambdaForFirstPass = m_pcRdCost->getMotionLambda( ); CodingUnit &cu = tempCS->addCU(CS::getArea(*tempCS, tempCS->area, (const ChannelType)partitioner.chType), (const ChannelType)partitioner.chType); partitioner.setCUData(cu); cu.slice = tempCS->slice; -#if HEVC_TILES_WPP - cu.tileIdx = tempCS->picture->tileMap->getTileIdxMap(tempCS->area.lumaPos()); -#endif + cu.tileIdx = tempCS->pps->getTileIdx( tempCS->area.lumaPos() ); cu.skip = false; cu.predMode = MODE_IBC; - cu.transQuantBypass = encTestMode.lossless; - cu.chromaQpAdj = cu.transQuantBypass ? 0 : m_cuChromaQpOffsetIdxPlus1; + cu.chromaQpAdj = m_cuChromaQpOffsetIdxPlus1; cu.qp = encTestMode.qp; cu.mmvdSkip = false; cu.triangle = false; DistParam distParam; - const bool bUseHadamard = !encTestMode.lossless; + const bool bUseHadamard = !cu.slice->getDisableSATDForRD(); PredictionUnit &pu = tempCS->addPU(cu, partitioner.chType); //tempCS->addPU(cu); pu.mmvdMergeFlag = false; + pu.regularMergeFlag = false; Picture* refPic = pu.cu->slice->getPic(); const CPelBuf refBuf = refPic->getRecoBuf(pu.blocks[COMPONENT_Y]); const Pel* piRefSrch = refBuf.buf; - if (tempCS->slice->getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag()) + if (tempCS->picHeader->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag() ) { const CompArea &area = cu.blocks[COMPONENT_Y]; CompArea tmpArea(COMPONENT_Y, area.chromaFormat, Position(0, 0), area.size()); @@ -2942,13 +3332,13 @@ void EncCu::xCheckRDCostIBCModeMerge2Nx2N(CodingStructure *&tempCS, CodingStruct const int cuPelY = pu.Y().y; int roiWidth = pu.lwidth(); int roiHeight = pu.lheight(); - const int picWidth = pu.cs->slice->getSPS()->getPicWidthInLumaSamples(); - const int picHeight = pu.cs->slice->getSPS()->getPicHeightInLumaSamples(); + const int picWidth = pu.cs->slice->getPPS()->getPicWidthInLumaSamples(); + const int picHeight = pu.cs->slice->getPPS()->getPicHeightInLumaSamples(); const unsigned int lcuWidth = pu.cs->slice->getSPS()->getMaxCUWidth(); int xPred = pu.bv.getHor(); int yPred = pu.bv.getVer(); - if (!PU::isBlockVectorValid(pu, cuPelX, cuPelY, roiWidth, roiHeight, picWidth, picHeight, 0, 0, xPred, yPred, lcuWidth)) // not valid bv derived + if (!m_pcInterSearch->searchBv(pu, cuPelX, cuPelY, roiWidth, roiHeight, picWidth, picHeight, xPred, yPred, lcuWidth)) // not valid bv derived { numValidBv--; continue; @@ -2959,15 +3349,13 @@ void EncCu::xCheckRDCostIBCModeMerge2Nx2N(CodingStructure *&tempCS, CodingStruct Distortion sad = distParam.distFunc(distParam); unsigned int bitsCand = mergeCand + 1; - if (mergeCand == tempCS->slice->getMaxNumMergeCand() - 1) + if (mergeCand == tempCS->picHeader->getMaxNumMergeCand() - 1) { bitsCand--; } double cost = (double)sad + (double)bitsCand * sqrtLambdaForFirstPass; - static_vector<int, MRG_MAX_NUM_CANDS> * nullList = nullptr; updateCandList(mergeCand, cost, RdModeList, candCostList - , *nullList, -1 , numMrgSATDCand); } @@ -2990,16 +3378,16 @@ void EncCu::xCheckRDCostIBCModeMerge2Nx2N(CodingStructure *&tempCS, CodingStruct tempCS->fracBits = 0; tempCS->cost = MAX_DOUBLE; tempCS->costDbOffset = 0; - tempCS->initStructData(encTestMode.qp, encTestMode.lossless); + tempCS->initStructData(encTestMode.qp); return; } - tempCS->initStructData(encTestMode.qp, encTestMode.lossless); + tempCS->initStructData(encTestMode.qp); } //} - const unsigned int iteration = encTestMode.lossless ? 1 : 2; + const unsigned int iteration = 2; m_bestModeUpdated = tempCS->useDbCost = bestCS->useDbCost = false; // 2. Pass: check candidates using full RD test for (unsigned int numResidualPass = 0; numResidualPass < iteration; numResidualPass++) @@ -3018,13 +3406,10 @@ void EncCu::xCheckRDCostIBCModeMerge2Nx2N(CodingStructure *&tempCS, CodingStruct partitioner.setCUData(cu); cu.slice = tempCS->slice; -#if HEVC_TILES_WPP - cu.tileIdx = tempCS->picture->tileMap->getTileIdxMap(tempCS->area.lumaPos()); -#endif + cu.tileIdx = tempCS->pps->getTileIdx( tempCS->area.lumaPos() ); cu.skip = false; cu.predMode = MODE_IBC; - cu.transQuantBypass = encTestMode.lossless; - cu.chromaQpAdj = cu.transQuantBypass ? 0 : m_cuChromaQpOffsetIdxPlus1; + cu.chromaQpAdj = m_cuChromaQpOffsetIdxPlus1; cu.qp = encTestMode.qp; cu.sbtInfo = 0; @@ -3033,18 +3418,24 @@ void EncCu::xCheckRDCostIBCModeMerge2Nx2N(CodingStructure *&tempCS, CodingStruct pu.intraDir[1] = PLANAR_IDX; // set intra pred for ibc block cu.mmvdSkip = false; pu.mmvdMergeFlag = false; + pu.regularMergeFlag = false; cu.triangle = false; mergeCtx.setMergeInfo(pu, mergeCand); PU::spanMotionInfo(pu, mergeCtx); assert(mergeCtx.mrgTypeNeighbours[mergeCand] == MRG_TYPE_IBC); // should be IBC candidate at this round - const bool chroma = !(CS::isDualITree(*tempCS)); + const bool chroma = !pu.cu->isSepTree(); // MC m_pcInterSearch->motionCompensation(pu,REF_PIC_LIST_0, true, chroma); m_CABACEstimator->getCtx() = m_CurrCtx->start; m_pcInterSearch->encodeResAndCalcRdInterCU(*tempCS, partitioner, (numResidualPass != 0), true, chroma); + if (tempCS->slice->getSPS()->getUseColorTrans()) + { + bestCS->tmpColorSpaceCost = tempCS->tmpColorSpaceCost; + bestCS->firstColorSpaceSelected = tempCS->firstColorSpaceSelected; + } xEncodeDontSplit(*tempCS, partitioner); #if ENABLE_QPA_SUB_CTU @@ -3061,7 +3452,7 @@ void EncCu::xCheckRDCostIBCModeMerge2Nx2N(CodingStructure *&tempCS, CodingStruct DTRACE_MODE_COST(*tempCS, m_pcRdCost->getLambda()); xCheckBestMode(tempCS, bestCS, partitioner, encTestMode); - tempCS->initStructData(encTestMode.qp, encTestMode.lossless); + tempCS->initStructData(encTestMode.qp); } if (m_pcEncCfg->getUseFastDecisionForMerge() && !bestIsSkip) @@ -3083,12 +3474,12 @@ void EncCu::xCheckRDCostIBCModeMerge2Nx2N(CodingStructure *&tempCS, CodingStruct void EncCu::xCheckRDCostIBCMode(CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode) { - if (tempCS->area.lwidth() > IBC_MAX_CAND_SIZE || tempCS->area.lheight() > IBC_MAX_CAND_SIZE) // currently only check 32x32 and below block for ibc merge/skip + if (tempCS->area.lwidth() == 128 || tempCS->area.lheight() == 128) // disable IBC mode larger than 64x64 { return; } - tempCS->initStructData(encTestMode.qp, encTestMode.lossless); + tempCS->initStructData(encTestMode.qp); m_bestModeUpdated = tempCS->useDbCost = bestCS->useDbCost = false; @@ -3096,13 +3487,10 @@ void EncCu::xCheckRDCostIBCMode(CodingStructure *&tempCS, CodingStructure *&best partitioner.setCUData(cu); cu.slice = tempCS->slice; -#if HEVC_TILES_WPP - cu.tileIdx = tempCS->picture->tileMap->getTileIdxMap(tempCS->area.lumaPos()); -#endif + cu.tileIdx = tempCS->pps->getTileIdx( tempCS->area.lumaPos() ); cu.skip = false; cu.predMode = MODE_IBC; - cu.transQuantBypass = encTestMode.lossless; - cu.chromaQpAdj = cu.transQuantBypass ? 0 : m_cuChromaQpOffsetIdxPlus1; + cu.chromaQpAdj = m_cuChromaQpOffsetIdxPlus1; cu.qp = encTestMode.qp; cu.imv = 0; cu.sbtInfo = 0; @@ -3114,27 +3502,30 @@ void EncCu::xCheckRDCostIBCMode(CodingStructure *&tempCS, CodingStructure *&best PredictionUnit& pu = *cu.firstPU; cu.mmvdSkip = false; pu.mmvdMergeFlag = false; + pu.regularMergeFlag = false; pu.intraDir[0] = DC_IDX; // set intra pred for ibc block pu.intraDir[1] = PLANAR_IDX; // set intra pred for ibc block pu.interDir = 1; // use list 0 for IBC mode pu.refIdx[REF_PIC_LIST_0] = MAX_NUM_REF; // last idx in the list - - if (partitioner.chType == CHANNEL_TYPE_LUMA) - { bool bValid = m_pcInterSearch->predIBCSearch(cu, partitioner, m_ctuIbcSearchRangeX, m_ctuIbcSearchRangeY, m_ibcHashMap); if (bValid) { PU::spanMotionInfo(pu); - const bool chroma = !(CS::isDualITree(*tempCS)); + const bool chroma = !pu.cu->isSepTree(); // MC m_pcInterSearch->motionCompensation(pu, REF_PIC_LIST_0, true, chroma); { m_pcInterSearch->encodeResAndCalcRdInterCU(*tempCS, partitioner, false, true, chroma); + if (tempCS->slice->getSPS()->getUseColorTrans()) + { + bestCS->tmpColorSpaceCost = tempCS->tmpColorSpaceCost; + bestCS->firstColorSpaceSelected = tempCS->firstColorSpaceSelected; + } xEncodeDontSplit(*tempCS, partitioner); @@ -3167,116 +3558,49 @@ void EncCu::xCheckRDCostIBCMode(CodingStructure *&tempCS, CodingStructure *&best tempCS->cost = MAX_DOUBLE; tempCS->costDbOffset = 0; } - } - // chroma CU ibc comp - else - { - bool success = true; - // chroma tree, reuse luma bv at minimal block level - // enabled search only when each chroma sub-block has a BV from its luma sub-block - assert(tempCS->getIbcLumaCoverage(pu.Cb()) == IBC_LUMA_COVERAGE_FULL); - // check if each BV for the chroma sub-block is valid - //static const UInt unitArea = MIN_PU_SIZE * MIN_PU_SIZE; - const CompArea lumaArea = CompArea(COMPONENT_Y, pu.chromaFormat, pu.Cb().lumaPos(), recalcSize(pu.chromaFormat, CHANNEL_TYPE_CHROMA, CHANNEL_TYPE_LUMA, pu.Cb().size())); - PredictionUnit subPu; - subPu.cs = pu.cs; - subPu.cu = pu.cu; - const ComponentID compID = COMPONENT_Cb; // use Cb to represent both Cb and CR, as their structures are the same - int shiftHor = ::getComponentScaleX(compID, pu.chromaFormat); - int shiftVer = ::getComponentScaleY(compID, pu.chromaFormat); - //const ChromaFormat chFmt = pu.chromaFormat; - - for (int y = lumaArea.y; y < lumaArea.y + lumaArea.height; y += MIN_PU_SIZE) - { - for (int x = lumaArea.x; x < lumaArea.x + lumaArea.width; x += MIN_PU_SIZE) - { - const MotionInfo &curMi = pu.cs->picture->cs->getMotionInfo(Position{ x, y }); - - subPu.UnitArea::operator=(UnitArea(pu.chromaFormat, Area(x, y, MIN_PU_SIZE, MIN_PU_SIZE))); - Position offsetRef = subPu.blocks[compID].pos().offset((curMi.bv.getHor() >> shiftHor), (curMi.bv.getVer() >> shiftVer)); - Position refEndPos(offsetRef.x + subPu.blocks[compID].size().width - 1, offsetRef.y + subPu.blocks[compID].size().height - 1 ); - - if (!subPu.cs->isDecomp(refEndPos, toChannelType(compID)) || !subPu.cs->isDecomp(offsetRef, toChannelType(compID))) // ref block is not yet available for this chroma sub-block - { - success = false; - break; - } - } - if (!success) - break; - } - //////////////////////////////////////////////////////////////////////////// - - if (success) - { - //pu.mergeType = MRG_TYPE_IBC; - m_pcInterSearch->motionCompensation(pu, REF_PIC_LIST_0, false, true); // luma=0, chroma=1 - m_pcInterSearch->encodeResAndCalcRdInterCU(*tempCS, partitioner, false, false, true); - - xEncodeDontSplit(*tempCS, partitioner); - - xCheckDQP(*tempCS, partitioner); - tempCS->useDbCost = m_pcEncCfg->getUseEncDbOpt(); - if ( m_bestModeUpdated ) - { - xCalDebCost( *tempCS, partitioner ); - } - - DTRACE_MODE_COST(*tempCS, m_pcRdCost->getLambda()); - - xCheckBestMode(tempCS, bestCS, partitioner, encTestMode); - } - else - { - tempCS->dist = 0; - tempCS->fracBits = 0; - tempCS->cost = MAX_DOUBLE; - tempCS->costDbOffset = 0; - } - } - } +} // check ibc mode in encoder RD ////////////////////////////////////////////////////////////////////////////////////////////// void EncCu::xCheckRDCostInter( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode ) { - tempCS->initStructData( encTestMode.qp, encTestMode.lossless ); + tempCS->initStructData( encTestMode.qp ); m_pcInterSearch->setAffineModeSelected(false); if( tempCS->slice->getCheckLDC() ) { - m_bestGbiCost[0] = m_bestGbiCost[1] = std::numeric_limits<double>::max(); - m_bestGbiIdx[0] = m_bestGbiIdx[1] = -1; + m_bestBcwCost[0] = m_bestBcwCost[1] = std::numeric_limits<double>::max(); + m_bestBcwIdx[0] = m_bestBcwIdx[1] = -1; } m_pcInterSearch->resetBufferedUniMotions(); - int gbiLoopNum = (tempCS->slice->isInterB() ? GBI_NUM : 1); - gbiLoopNum = (tempCS->sps->getUseGBi() ? gbiLoopNum : 1); + int bcwLoopNum = (tempCS->slice->isInterB() ? BCW_NUM : 1); + bcwLoopNum = (tempCS->sps->getUseBcw() ? bcwLoopNum : 1); - if( tempCS->area.lwidth() * tempCS->area.lheight() < GBI_SIZE_CONSTRAINT ) + if( tempCS->area.lwidth() * tempCS->area.lheight() < BCW_SIZE_CONSTRAINT ) { - gbiLoopNum = 1; + bcwLoopNum = 1; } double curBestCost = bestCS->cost; - double equGBiCost = MAX_DOUBLE; + double equBcwCost = MAX_DOUBLE; m_bestModeUpdated = tempCS->useDbCost = bestCS->useDbCost = false; - for( int gbiLoopIdx = 0; gbiLoopIdx < gbiLoopNum; gbiLoopIdx++ ) + for( int bcwLoopIdx = 0; bcwLoopIdx < bcwLoopNum; bcwLoopIdx++ ) { - if( m_pcEncCfg->getUseGBiFast() ) + if( m_pcEncCfg->getUseBcwFast() ) { auto blkCache = dynamic_cast< CacheBlkInfoCtrl* >(m_modeCtrl); if( blkCache ) { bool isBestInter = blkCache->getInter(bestCS->area); - uint8_t bestGBiIdx = blkCache->getGbiIdx(bestCS->area); + uint8_t bestBcwIdx = blkCache->getBcwIdx(bestCS->area); - if( isBestInter && g_GbiSearchOrder[gbiLoopIdx] != GBI_DEFAULT && g_GbiSearchOrder[gbiLoopIdx] != bestGBiIdx ) + if( isBestInter && g_BcwSearchOrder[bcwLoopIdx] != BCW_DEFAULT && g_BcwSearchOrder[bcwLoopIdx] != bestBcwIdx ) { continue; } @@ -3284,7 +3608,7 @@ void EncCu::xCheckRDCostInter( CodingStructure *&tempCS, CodingStructure *&bestC } if( !tempCS->slice->getCheckLDC() ) { - if( gbiLoopIdx != 0 && gbiLoopIdx != 3 && gbiLoopIdx != 4 ) + if( bcwLoopIdx != 0 && bcwLoopIdx != 3 && bcwLoopIdx != 4 ) { continue; } @@ -3294,36 +3618,33 @@ void EncCu::xCheckRDCostInter( CodingStructure *&tempCS, CodingStructure *&bestC partitioner.setCUData( cu ); cu.slice = tempCS->slice; -#if HEVC_TILES_WPP - cu.tileIdx = tempCS->picture->tileMap->getTileIdxMap( tempCS->area.lumaPos() ); -#endif + cu.tileIdx = tempCS->pps->getTileIdx( tempCS->area.lumaPos() ); cu.skip = false; cu.mmvdSkip = false; //cu.affine cu.predMode = MODE_INTER; - cu.transQuantBypass = encTestMode.lossless; - cu.chromaQpAdj = cu.transQuantBypass ? 0 : m_cuChromaQpOffsetIdxPlus1; + cu.chromaQpAdj = m_cuChromaQpOffsetIdxPlus1; cu.qp = encTestMode.qp; CU::addPUs( cu ); - cu.GBiIdx = g_GbiSearchOrder[gbiLoopIdx]; - uint8_t gbiIdx = cu.GBiIdx; - bool testGbi = (gbiIdx != GBI_DEFAULT); + cu.BcwIdx = g_BcwSearchOrder[bcwLoopIdx]; + uint8_t bcwIdx = cu.BcwIdx; + bool testBcw = (bcwIdx != BCW_DEFAULT); m_pcInterSearch->predInterSearch( cu, partitioner ); - gbiIdx = CU::getValidGbiIdx(cu); - if( testGbi && gbiIdx == GBI_DEFAULT ) // Enabled GBi but the search results is uni. + bcwIdx = CU::getValidBcwIdx(cu); + if( testBcw && bcwIdx == BCW_DEFAULT ) // Enabled Bcw but the search results is uni. { - tempCS->initStructData(encTestMode.qp, encTestMode.lossless); + tempCS->initStructData(encTestMode.qp); continue; } - CHECK(!(testGbi || (!testGbi && gbiIdx == GBI_DEFAULT)), " !( bTestGbi || (!bTestGbi && gbiIdx == GBI_DEFAULT ) )"); + CHECK(!(testBcw || (!testBcw && bcwIdx == BCW_DEFAULT)), " !( bTestBcw || (!bTestBcw && bcwIdx == BCW_DEFAULT ) )"); bool isEqualUni = false; - if( m_pcEncCfg->getUseGBiFast() ) + if( m_pcEncCfg->getUseBcwFast() ) { - if( cu.firstPU->interDir != 3 && testGbi == 0 ) + if( cu.firstPU->interDir != 3 && testBcw == 0 ) { isEqualUni = true; } @@ -3331,33 +3652,33 @@ void EncCu::xCheckRDCostInter( CodingStructure *&tempCS, CodingStructure *&bestC xEncodeInterResidual( tempCS, bestCS, partitioner, encTestMode, 0 , 0 - , &equGBiCost + , &equBcwCost ); - if( g_GbiSearchOrder[gbiLoopIdx] == GBI_DEFAULT ) + if( g_BcwSearchOrder[bcwLoopIdx] == BCW_DEFAULT ) m_pcInterSearch->setAffineModeSelected((bestCS->cus.front()->affine && !(bestCS->cus.front()->firstPU->mergeFlag))); - tempCS->initStructData(encTestMode.qp, encTestMode.lossless); + tempCS->initStructData(encTestMode.qp); double skipTH = MAX_DOUBLE; - skipTH = (m_pcEncCfg->getUseGBiFast() ? 1.05 : MAX_DOUBLE); - if( equGBiCost > curBestCost * skipTH ) + skipTH = (m_pcEncCfg->getUseBcwFast() ? 1.05 : MAX_DOUBLE); + if( equBcwCost > curBestCost * skipTH ) { break; } - if( m_pcEncCfg->getUseGBiFast() ) + if( m_pcEncCfg->getUseBcwFast() ) { if( isEqualUni == true && m_pcEncCfg->getIntraPeriod() == -1 ) { break; } } - if( g_GbiSearchOrder[gbiLoopIdx] == GBI_DEFAULT && xIsGBiSkip(cu) && m_pcEncCfg->getUseGBiFast() ) + if( g_BcwSearchOrder[bcwLoopIdx] == BCW_DEFAULT && xIsBcwSkip(cu) && m_pcEncCfg->getUseBcwFast() ) { break; } - } // for( UChar gbiLoopIdx = 0; gbiLoopIdx < gbiLoopNum; gbiLoopIdx++ ) + } // for( UChar bcwLoopIdx = 0; bcwLoopIdx < bcwLoopNum; bcwLoopIdx++ ) if ( m_bestModeUpdated && bestCS->cost != MAX_DOUBLE ) { xCalDebCost( *bestCS, partitioner ); @@ -3367,13 +3688,13 @@ void EncCu::xCheckRDCostInter( CodingStructure *&tempCS, CodingStructure *&bestC - -bool EncCu::xCheckRDCostInterIMV( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode ) +bool EncCu::xCheckRDCostInterIMV(CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode, double &bestIntPelCost) { int iIMV = int( ( encTestMode.opts & ETO_IMV ) >> ETO_IMV_SHIFT ); m_pcInterSearch->setAffineModeSelected(false); - // Only int-Pel, 4-Pel and fast 4-Pel allowed - CHECK( iIMV != 1 && iIMV != 2 && iIMV != 3, "Unsupported IMV Mode" ); + // Only Half-Pel, int-Pel, 4-Pel and fast 4-Pel allowed + CHECK(iIMV < 1 || iIMV > 4, "Unsupported IMV Mode"); + const bool testAltHpelFilter = iIMV == 4; // Fast 4-Pel Mode m_bestModeUpdated = tempCS->useDbCost = bestCS->useDbCost = false; @@ -3381,33 +3702,33 @@ bool EncCu::xCheckRDCostInterIMV( CodingStructure *&tempCS, CodingStructure *&be EncTestMode encTestModeBase = encTestMode; // copy for clearing non-IMV options encTestModeBase.opts = EncTestModeOpts( encTestModeBase.opts & ETO_IMV ); // clear non-IMV options (is that intended?) - tempCS->initStructData( encTestMode.qp, encTestMode.lossless ); + tempCS->initStructData( encTestMode.qp ); m_pcInterSearch->resetBufferedUniMotions(); - int gbiLoopNum = (tempCS->slice->isInterB() ? GBI_NUM : 1); - gbiLoopNum = (tempCS->slice->getSPS()->getUseGBi() ? gbiLoopNum : 1); + int bcwLoopNum = (tempCS->slice->isInterB() ? BCW_NUM : 1); + bcwLoopNum = (tempCS->slice->getSPS()->getUseBcw() ? bcwLoopNum : 1); - if( tempCS->area.lwidth() * tempCS->area.lheight() < GBI_SIZE_CONSTRAINT ) + if( tempCS->area.lwidth() * tempCS->area.lheight() < BCW_SIZE_CONSTRAINT ) { - gbiLoopNum = 1; + bcwLoopNum = 1; } bool validMode = false; double curBestCost = bestCS->cost; - double equGBiCost = MAX_DOUBLE; + double equBcwCost = MAX_DOUBLE; - for( int gbiLoopIdx = 0; gbiLoopIdx < gbiLoopNum; gbiLoopIdx++ ) + for( int bcwLoopIdx = 0; bcwLoopIdx < bcwLoopNum; bcwLoopIdx++ ) { - if( m_pcEncCfg->getUseGBiFast() ) + if( m_pcEncCfg->getUseBcwFast() ) { auto blkCache = dynamic_cast< CacheBlkInfoCtrl* >(m_modeCtrl); if( blkCache ) { bool isBestInter = blkCache->getInter(bestCS->area); - uint8_t bestGBiIdx = blkCache->getGbiIdx(bestCS->area); + uint8_t bestBcwIdx = blkCache->getBcwIdx(bestCS->area); - if( isBestInter && g_GbiSearchOrder[gbiLoopIdx] != GBI_DEFAULT && g_GbiSearchOrder[gbiLoopIdx] != bestGBiIdx ) + if( isBestInter && g_BcwSearchOrder[bcwLoopIdx] != BCW_DEFAULT && g_BcwSearchOrder[bcwLoopIdx] != bestBcwIdx ) { continue; } @@ -3416,15 +3737,15 @@ bool EncCu::xCheckRDCostInterIMV( CodingStructure *&tempCS, CodingStructure *&be if( !tempCS->slice->getCheckLDC() ) { - if( gbiLoopIdx != 0 && gbiLoopIdx != 3 && gbiLoopIdx != 4 ) + if( bcwLoopIdx != 0 && bcwLoopIdx != 3 && bcwLoopIdx != 4 ) { continue; } } - if( m_pcEncCfg->getUseGBiFast() && tempCS->slice->getCheckLDC() && g_GbiSearchOrder[gbiLoopIdx] != GBI_DEFAULT - && (m_bestGbiIdx[0] >= 0 && g_GbiSearchOrder[gbiLoopIdx] != m_bestGbiIdx[0]) - && (m_bestGbiIdx[1] >= 0 && g_GbiSearchOrder[gbiLoopIdx] != m_bestGbiIdx[1])) + if( m_pcEncCfg->getUseBcwFast() && tempCS->slice->getCheckLDC() && g_BcwSearchOrder[bcwLoopIdx] != BCW_DEFAULT + && (m_bestBcwIdx[0] >= 0 && g_BcwSearchOrder[bcwLoopIdx] != m_bestBcwIdx[0]) + && (m_bestBcwIdx[1] >= 0 && g_BcwSearchOrder[bcwLoopIdx] != m_bestBcwIdx[1])) { continue; } @@ -3433,28 +3754,32 @@ bool EncCu::xCheckRDCostInterIMV( CodingStructure *&tempCS, CodingStructure *&be partitioner.setCUData( cu ); cu.slice = tempCS->slice; -#if HEVC_TILES_WPP - cu.tileIdx = tempCS->picture->tileMap->getTileIdxMap( tempCS->area.lumaPos() ); -#endif + cu.tileIdx = tempCS->pps->getTileIdx( tempCS->area.lumaPos() ); cu.skip = false; cu.mmvdSkip = false; //cu.affine cu.predMode = MODE_INTER; - cu.transQuantBypass = encTestMode.lossless; - cu.chromaQpAdj = cu.transQuantBypass ? 0 : m_cuChromaQpOffsetIdxPlus1; + cu.chromaQpAdj = m_cuChromaQpOffsetIdxPlus1; cu.qp = encTestMode.qp; CU::addPUs( cu ); - cu.imv = iIMV > 1 ? 2 : 1; + if (testAltHpelFilter) + { + cu.imv = IMV_HPEL; + } + else + { + cu.imv = iIMV == 1 ? IMV_FPEL : IMV_4PEL; + } - bool testGbi; - uint8_t gbiIdx; - bool affineAmvrEanbledFlag = cu.slice->getSPS()->getAffineAmvrEnabledFlag(); + bool testBcw; + uint8_t bcwIdx; + bool affineAmvrEanbledFlag = !testAltHpelFilter && cu.slice->getSPS()->getAffineAmvrEnabledFlag(); - cu.GBiIdx = g_GbiSearchOrder[gbiLoopIdx]; - gbiIdx = cu.GBiIdx; - testGbi = (gbiIdx != GBI_DEFAULT); + cu.BcwIdx = g_BcwSearchOrder[bcwLoopIdx]; + bcwIdx = cu.BcwIdx; + testBcw = (bcwIdx != BCW_DEFAULT); cu.firstPU->interDir = 10; @@ -3462,7 +3787,7 @@ bool EncCu::xCheckRDCostInterIMV( CodingStructure *&tempCS, CodingStructure *&be if ( cu.firstPU->interDir <= 3 ) { - gbiIdx = CU::getValidGbiIdx(cu); + bcwIdx = CU::getValidBcwIdx(cu); } else { @@ -3472,20 +3797,20 @@ bool EncCu::xCheckRDCostInterIMV( CodingStructure *&tempCS, CodingStructure *&be if( m_pcEncCfg->getMCTSEncConstraint() && ( ( cu.firstPU->refIdx[L0] < 0 && cu.firstPU->refIdx[L1] < 0 ) || ( !( MCTSHelper::checkMvBufferForMCTSConstraint( *cu.firstPU ) ) ) ) ) { // Do not use this mode - tempCS->initStructData( encTestMode.qp, encTestMode.lossless ); + tempCS->initStructData( encTestMode.qp ); continue; } - if( testGbi && gbiIdx == GBI_DEFAULT ) // Enabled GBi but the search results is uni. + if( testBcw && bcwIdx == BCW_DEFAULT ) // Enabled Bcw but the search results is uni. { - tempCS->initStructData(encTestMode.qp, encTestMode.lossless); + tempCS->initStructData(encTestMode.qp); continue; } - CHECK(!(testGbi || (!testGbi && gbiIdx == GBI_DEFAULT)), " !( bTestGbi || (!bTestGbi && gbiIdx == GBI_DEFAULT ) )"); + CHECK(!(testBcw || (!testBcw && bcwIdx == BCW_DEFAULT)), " !( bTestBcw || (!bTestBcw && bcwIdx == BCW_DEFAULT ) )"); bool isEqualUni = false; - if( m_pcEncCfg->getUseGBiFast() ) + if( m_pcEncCfg->getUseBcwFast() ) { - if( cu.firstPU->interDir != 3 && testGbi == 0 ) + if( cu.firstPU->interDir != 3 && testBcw == 0 ) { isEqualUni = true; } @@ -3501,7 +3826,7 @@ bool EncCu::xCheckRDCostInterIMV( CodingStructure *&tempCS, CodingStructure *&be } if ( affineAmvrEanbledFlag ) { - tempCS->initStructData( encTestMode.qp, encTestMode.lossless ); + tempCS->initStructData( encTestMode.qp ); continue; } else @@ -3512,31 +3837,35 @@ bool EncCu::xCheckRDCostInterIMV( CodingStructure *&tempCS, CodingStructure *&be xEncodeInterResidual( tempCS, bestCS, partitioner, encTestModeBase, 0 , 0 - , &equGBiCost + , &equBcwCost ); - tempCS->initStructData(encTestMode.qp, encTestMode.lossless); + if( cu.imv == IMV_FPEL && tempCS->cost < bestIntPelCost ) + { + bestIntPelCost = tempCS->cost; + } + tempCS->initStructData(encTestMode.qp); double skipTH = MAX_DOUBLE; - skipTH = (m_pcEncCfg->getUseGBiFast() ? 1.05 : MAX_DOUBLE); - if( equGBiCost > curBestCost * skipTH ) + skipTH = (m_pcEncCfg->getUseBcwFast() ? 1.05 : MAX_DOUBLE); + if( equBcwCost > curBestCost * skipTH ) { break; } - if( m_pcEncCfg->getUseGBiFast() ) + if( m_pcEncCfg->getUseBcwFast() ) { if( isEqualUni == true && m_pcEncCfg->getIntraPeriod() == -1 ) { break; } } - if( g_GbiSearchOrder[gbiLoopIdx] == GBI_DEFAULT && xIsGBiSkip(cu) && m_pcEncCfg->getUseGBiFast() ) + if( g_BcwSearchOrder[bcwLoopIdx] == BCW_DEFAULT && xIsBcwSkip(cu) && m_pcEncCfg->getUseBcwFast() ) { break; } validMode = true; - } // for( UChar gbiLoopIdx = 0; gbiLoopIdx < gbiLoopNum; gbiLoopIdx++ ) + } // for( UChar bcwLoopIdx = 0; bcwLoopIdx < bcwLoopNum; bcwLoopIdx++ ) if ( m_bestModeUpdated && bestCS->cost != MAX_DOUBLE ) { @@ -3562,16 +3891,16 @@ void EncCu::xCalDebCost( CodingStructure &cs, Partitioner &partitioner, bool cal const ChromaFormat format = cs.area.chromaFormat; CodingUnit* cu = cs.getCU(partitioner.chType); const Position lumaPos = cu->Y().valid() ? cu->Y().pos() : recalcPosition( format, cu->chType, CHANNEL_TYPE_LUMA, cu->blocks[cu->chType].pos() ); - bool topEdgeAvai = lumaPos.y > 0 && ( ( lumaPos.y % 8 ) == 0 ); - bool leftEdgeAvai = lumaPos.x > 0 && ( ( lumaPos.x % 8 ) == 0 ); + bool topEdgeAvai = lumaPos.y > 0 && ((lumaPos.y % 4) == 0); + bool leftEdgeAvai = lumaPos.x > 0 && ((lumaPos.x % 4) == 0); bool anyEdgeAvai = topEdgeAvai || leftEdgeAvai; cs.costDbOffset = 0; if ( calDist ) { const UnitArea currCsArea = clipArea( CS::getArea( cs, cs.area, partitioner.chType ), *cs.picture ); - ComponentID compStr = ( CS::isDualITree( cs ) && !isLuma( partitioner.chType ) ) ? COMPONENT_Cb : COMPONENT_Y; - ComponentID compEnd = ( CS::isDualITree( cs ) && isLuma( partitioner.chType ) ) ? COMPONENT_Y : COMPONENT_Cr; + ComponentID compStr = ( cu->isSepTree() && !isLuma( partitioner.chType ) ) ? COMPONENT_Cb : COMPONENT_Y; + ComponentID compEnd = ( cu->isSepTree() && isLuma( partitioner.chType ) ) ? COMPONENT_Y : COMPONENT_Cr; Distortion finalDistortion = 0; for ( int comp = compStr; comp <= compEnd; comp++ ) { @@ -3586,8 +3915,8 @@ void EncCu::xCalDebCost( CodingStructure &cs, Partitioner &partitioner, bool cal if ( anyEdgeAvai && m_pcEncCfg->getUseEncDbOpt() ) { - ComponentID compStr = ( CS::isDualITree( cs ) && !isLuma( partitioner.chType ) ) ? COMPONENT_Cb : COMPONENT_Y; - ComponentID compEnd = ( CS::isDualITree( cs ) && isLuma( partitioner.chType ) ) ? COMPONENT_Y : COMPONENT_Cr; + ComponentID compStr = ( cu->isSepTree() && !isLuma( partitioner.chType ) ) ? COMPONENT_Cb : COMPONENT_Y; + ComponentID compEnd = ( cu->isSepTree() && isLuma( partitioner.chType ) ) ? COMPONENT_Y : COMPONENT_Cr; const UnitArea currCsArea = clipArea( CS::getArea( cs, cs.area, partitioner.chType ), *cs.picture ); @@ -3607,7 +3936,7 @@ void EncCu::xCalDebCost( CodingStructure &cs, Partitioner &partitioner, bool cal //Copy current CU's reco to Deblock Pic Buffer const CompArea& curCompArea = currCsArea.block( compId ); picDbBuf.getBuf( curCompArea ).copyFrom( cs.getRecoBuf( curCompArea ) ); - if ( cs.slice->getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getSliceReshaperInfo().getUseSliceReshaper() && isLuma( compId ) ) + if (cs.picHeader->getLmcsEnabledFlag() && m_pcReshape->getSliceReshaperInfo().getUseSliceReshaper() && isLuma(compId)) { picDbBuf.getBuf( curCompArea ).rspSignal( m_pcReshape->getInvLUT() ); } @@ -3617,7 +3946,7 @@ void EncCu::xCalDebCost( CodingStructure &cs, Partitioner &partitioner, bool cal { const CompArea& compArea = areaLeft.block(compId); picDbBuf.getBuf( compArea ).copyFrom( cs.picture->getRecoBuf( compArea ) ); - if ( cs.slice->getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getSliceReshaperInfo().getUseSliceReshaper() && isLuma( compId ) ) + if (cs.picHeader->getLmcsEnabledFlag() && m_pcReshape->getSliceReshaperInfo().getUseSliceReshaper() && isLuma(compId)) { picDbBuf.getBuf( compArea ).rspSignal( m_pcReshape->getInvLUT() ); } @@ -3627,7 +3956,7 @@ void EncCu::xCalDebCost( CodingStructure &cs, Partitioner &partitioner, bool cal { const CompArea& compArea = areaTop.block( compId ); picDbBuf.getBuf( compArea ).copyFrom( cs.picture->getRecoBuf( compArea ) ); - if ( cs.slice->getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getSliceReshaperInfo().getUseSliceReshaper() && isLuma( compId ) ) + if (cs.picHeader->getLmcsEnabledFlag() && m_pcReshape->getSliceReshaperInfo().getUseSliceReshaper() && isLuma(compId)) { picDbBuf.getBuf( compArea ).rspSignal( m_pcReshape->getInvLUT() ); } @@ -3694,9 +4023,10 @@ Distortion EncCu::getDistortionDb( CodingStructure &cs, CPelBuf org, CPelBuf rec { Distortion dist = 0; #if WCG_EXT + m_pcRdCost->setChromaFormat(cs.sps->getChromaFormatIdc()); CPelBuf orgLuma = cs.picture->getOrigBuf( cs.area.blocks[COMPONENT_Y] ); if ( m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled() || ( - m_pcEncCfg->getReshaper() && ( cs.slice->getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag() ) ) ) + m_pcEncCfg->getLmcs() && (cs.picHeader->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag()))) { if ( compID == COMPONENT_Y && !afterDb && !m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled()) { @@ -3711,7 +4041,7 @@ Distortion EncCu::getDistortionDb( CodingStructure &cs, CPelBuf org, CPelBuf rec dist += m_pcRdCost->getDistPart( org, reco, cs.sps->getBitDepth( toChannelType( compID ) ), compID, DF_SSE_WTD, &orgLuma ); } } - else if ( m_pcEncCfg->getReshaper() && cs.slice->getReshapeInfo().getUseSliceReshaper() && cs.slice->isIntra() ) //intra slice + else if (m_pcEncCfg->getLmcs() && cs.picHeader->getLmcsEnabledFlag() && cs.slice->isIntra()) //intra slice { if ( compID == COMPONENT_Y && afterDb ) { @@ -3740,13 +4070,9 @@ void EncCu::xEncodeInterResidual( CodingStructure *&tempCS , const EncTestMode& encTestMode , int residualPass , bool* bestHasNonResi - , double* equGBiCost + , double* equBcwCost ) { - if( residualPass == 1 && encTestMode.lossless ) - { - return; - } CodingUnit* cu = tempCS->getCU( partitioner.chType ); double bestCostInternal = MAX_DOUBLE; @@ -3757,18 +4083,61 @@ void EncCu::xEncodeInterResidual( CodingStructure *&tempCS bool swapped = false; // avoid unwanted data copy bool reloadCU = false; - // Not allow very big |MVd| to avoid CABAC crash caused by too large MVd. Normally no impact on coding performance. - const int maxMvd = 1 << 15; const PredictionUnit& pu = *cu->firstPU; - if (!cu->affine) + + // clang-format off + const int affineShiftTab[3] = + { + MV_PRECISION_INTERNAL - MV_PRECISION_QUARTER, + MV_PRECISION_INTERNAL - MV_PRECISION_SIXTEENTH, + MV_PRECISION_INTERNAL - MV_PRECISION_INT + }; + + const int normalShiftTab[NUM_IMV_MODES] = + { + MV_PRECISION_INTERNAL - MV_PRECISION_QUARTER, + MV_PRECISION_INTERNAL - MV_PRECISION_INT, + MV_PRECISION_INTERNAL - MV_PRECISION_4PEL, + MV_PRECISION_INTERNAL - MV_PRECISION_HALF, + }; + // clang-format on + + int mvShift; + + for (int refList = 0; refList < NUM_REF_PIC_LIST_01; refList++) + { + if (pu.refIdx[refList] >= 0) + { + if (!cu->affine) + { + mvShift = normalShiftTab[cu->imv]; + Mv signaledmvd(pu.mvd[refList].getHor() >> mvShift, pu.mvd[refList].getVer() >> mvShift); + if (!((signaledmvd.getHor() >= MVD_MIN) && (signaledmvd.getHor() <= MVD_MAX)) || !((signaledmvd.getVer() >= MVD_MIN) && (signaledmvd.getVer() <= MVD_MAX))) + return; + } + else + { + for (int ctrlP = 1 + (cu->affineType == AFFINEMODEL_6PARAM); ctrlP >= 0; ctrlP--) + { + mvShift = affineShiftTab[cu->imv]; + Mv signaledmvd(pu.mvdAffi[refList][ctrlP].getHor() >> mvShift, pu.mvdAffi[refList][ctrlP].getVer() >> mvShift); + if (!((signaledmvd.getHor() >= MVD_MIN) && (signaledmvd.getHor() <= MVD_MAX)) || !((signaledmvd.getVer() >= MVD_MIN) && (signaledmvd.getVer() <= MVD_MAX))) + return; + } + } + } + } + // avoid MV exceeding 18-bit dynamic range + const int maxMv = 1 << 17; + if (!cu->affine && !pu.mergeFlag) { - if ((pu.refIdx[0] >= 0 && (pu.mvd[0].getAbsHor() >= maxMvd || pu.mvd[0].getAbsVer() >= maxMvd)) - || (pu.refIdx[1] >= 0 && (pu.mvd[1].getAbsHor() >= maxMvd || pu.mvd[1].getAbsVer() >= maxMvd))) + if ( (pu.refIdx[0] >= 0 && (pu.mv[0].getAbsHor() >= maxMv || pu.mv[0].getAbsVer() >= maxMv)) + || (pu.refIdx[1] >= 0 && (pu.mv[1].getAbsHor() >= maxMv || pu.mv[1].getAbsVer() >= maxMv))) { return; } } - else + if (cu->affine && !pu.mergeFlag) { for (int refList = 0; refList < NUM_REF_PIC_LIST_01; refList++) { @@ -3776,7 +4145,7 @@ void EncCu::xEncodeInterResidual( CodingStructure *&tempCS { for (int ctrlP = 1 + (cu->affineType == AFFINEMODEL_6PARAM); ctrlP >= 0; ctrlP--) { - if (pu.mvdAffi[refList][ctrlP].getAbsHor() >= maxMvd || pu.mvdAffi[refList][ctrlP].getAbsVer() >= maxMvd) + if (pu.mvAffi[refList][ctrlP].getAbsHor() >= maxMv || pu.mvAffi[refList][ctrlP].getAbsVer() >= maxMv) { return; } @@ -3786,6 +4155,11 @@ void EncCu::xEncodeInterResidual( CodingStructure *&tempCS } const bool mtsAllowed = tempCS->sps->getUseInterMTS() && CU::isInter( *cu ) && partitioner.currArea().lwidth() <= MTS_INTER_MAX_CU_SIZE && partitioner.currArea().lheight() <= MTS_INTER_MAX_CU_SIZE; uint8_t sbtAllowed = cu->checkAllowedSbt(); + //SBT resolution-dependent fast algorithm: not try size-64 SBT in RDO for low-resolution sequences (now resolution below HD) + if( tempCS->pps->getPicWidthInLumaSamples() < (uint32_t)m_pcEncCfg->getSBTFast64WidthTh() ) + { + sbtAllowed = ((cu->lwidth() > 32 || cu->lheight() > 32)) ? 0 : sbtAllowed; + } uint8_t numRDOTried = 0; Distortion sbtOffDist = 0; bool sbtOffRootCbf = 0; @@ -3837,7 +4211,7 @@ void EncCu::xEncodeInterResidual( CodingStructure *&tempCS } else if( false == swapped ) { - tempCS->initStructData( encTestMode.qp, encTestMode.lossless ); + tempCS->initStructData( encTestMode.qp ); tempCS->copyStructure( *bestCS, partitioner.chType ); tempCS->getPredBuf().copyFrom( bestCS->getPredBuf() ); bestCost = bestCS->cost; @@ -3866,6 +4240,11 @@ void EncCu::xEncodeInterResidual( CodingStructure *&tempCS if( skipResidual || histBestSbt == MAX_UCHAR || !CU::isSbtMode( histBestSbt ) ) { m_pcInterSearch->encodeResAndCalcRdInterCU( *tempCS, partitioner, skipResidual ); + if (tempCS->slice->getSPS()->getUseColorTrans()) + { + bestCS->tmpColorSpaceCost = tempCS->tmpColorSpaceCost; + bestCS->firstColorSpaceSelected = tempCS->firstColorSpaceSelected; + } numRDOTried += mtsAllowed ? 2 : 1; xEncodeDontSplit( *tempCS, partitioner ); @@ -3875,13 +4254,13 @@ void EncCu::xEncodeInterResidual( CodingStructure *&tempCS if( NULL != bestHasNonResi && (bestCostInternal > tempCS->cost) ) { bestCostInternal = tempCS->cost; - if (!(tempCS->getPU(partitioner.chType)->mhIntraFlag)) + if (!(tempCS->getPU(partitioner.chType)->ciipFlag)) *bestHasNonResi = !cu->rootCbf; } if (cu->rootCbf == false) { - if (tempCS->getPU(partitioner.chType)->mhIntraFlag) + if (tempCS->getPU(partitioner.chType)->ciipFlag) { tempCS->cost = MAX_DOUBLE; tempCS->costDbOffset = 0; @@ -3892,12 +4271,8 @@ void EncCu::xEncodeInterResidual( CodingStructure *&tempCS sbtOffCost = tempCS->cost; sbtOffDist = tempCS->dist; sbtOffRootCbf = cu->rootCbf; - currBestSbt = CU::getSbtInfo( cu->firstTU->mtsIdx > 1 ? SBT_OFF_MTS : SBT_OFF_DCT, 0 ); - currBestTrs = cu->firstTU->mtsIdx; - if( cu->lwidth() <= MAX_TB_SIZEY && cu->lheight() <= MAX_TB_SIZEY ) - { - CHECK( tempCS->tus.size() != 1, "tu must be only one" ); - } + currBestSbt = CU::getSbtInfo(cu->firstTU->mtsIdx[COMPONENT_Y] > MTS_SKIP ? SBT_OFF_MTS : SBT_OFF_DCT, 0); + currBestTrs = cu->firstTU->mtsIdx[COMPONENT_Y]; #if WCG_EXT DTRACE_MODE_COST( *tempCS, m_pcRdCost->getLambda( true ) ); @@ -3986,7 +4361,7 @@ void EncCu::xEncodeInterResidual( CodingStructure *&tempCS } else if( false == swapped ) { - tempCS->initStructData( encTestMode.qp, encTestMode.lossless ); + tempCS->initStructData( encTestMode.qp ); tempCS->copyStructure( *bestCS, partitioner.chType ); tempCS->getPredBuf().copyFrom( bestCS->getPredBuf() ); bestCost = bestCS->cost; @@ -4012,6 +4387,11 @@ void EncCu::xEncodeInterResidual( CodingStructure *&tempCS //try residual coding m_pcInterSearch->encodeResAndCalcRdInterCU( *tempCS, partitioner, skipResidual ); + if (tempCS->slice->getSPS()->getUseColorTrans()) + { + bestCS->tmpColorSpaceCost = tempCS->tmpColorSpaceCost; + bestCS->firstColorSpaceSelected = tempCS->firstColorSpaceSelected; + } numRDOTried++; xEncodeDontSplit( *tempCS, partitioner ); @@ -4021,14 +4401,14 @@ void EncCu::xEncodeInterResidual( CodingStructure *&tempCS if( NULL != bestHasNonResi && ( bestCostInternal > tempCS->cost ) ) { bestCostInternal = tempCS->cost; - if( !( tempCS->getPU( partitioner.chType )->mhIntraFlag ) ) + if( !( tempCS->getPU( partitioner.chType )->ciipFlag ) ) *bestHasNonResi = !cu->rootCbf; } if( tempCS->cost < currBestCost ) { currBestSbt = cu->sbtInfo; - currBestTrs = tempCS->tus[cu->sbtInfo ? cu->getSbtPos() : 0]->mtsIdx; + currBestTrs = tempCS->tus[cu->sbtInfo ? cu->getSbtPos() : 0]->mtsIdx[COMPONENT_Y]; assert( currBestTrs == 0 || currBestTrs == 1 ); currBestCost = tempCS->cost; } @@ -4055,30 +4435,30 @@ void EncCu::xEncodeInterResidual( CodingStructure *&tempCS tempCS->cost = currBestCost; if( ETM_INTER_ME == encTestMode.type ) { - if( equGBiCost != NULL ) + if( equBcwCost != NULL ) { - if( tempCS->cost < ( *equGBiCost ) && cu->GBiIdx == GBI_DEFAULT ) + if( tempCS->cost < ( *equBcwCost ) && cu->BcwIdx == BCW_DEFAULT ) { - ( *equGBiCost ) = tempCS->cost; + ( *equBcwCost ) = tempCS->cost; } } else { - CHECK( equGBiCost == NULL, "equGBiCost == NULL" ); + CHECK( equBcwCost == NULL, "equBcwCost == NULL" ); } - if( tempCS->slice->getCheckLDC() && !cu->imv && cu->GBiIdx != GBI_DEFAULT && tempCS->cost < m_bestGbiCost[1] ) + if( tempCS->slice->getCheckLDC() && !cu->imv && cu->BcwIdx != BCW_DEFAULT && tempCS->cost < m_bestBcwCost[1] ) { - if( tempCS->cost < m_bestGbiCost[0] ) + if( tempCS->cost < m_bestBcwCost[0] ) { - m_bestGbiCost[1] = m_bestGbiCost[0]; - m_bestGbiCost[0] = tempCS->cost; - m_bestGbiIdx[1] = m_bestGbiIdx[0]; - m_bestGbiIdx[0] = cu->GBiIdx; + m_bestBcwCost[1] = m_bestBcwCost[0]; + m_bestBcwCost[0] = tempCS->cost; + m_bestBcwIdx[1] = m_bestBcwIdx[0]; + m_bestBcwIdx[0] = cu->BcwIdx; } else { - m_bestGbiCost[1] = tempCS->cost; - m_bestGbiIdx[1] = cu->GBiIdx; + m_bestBcwCost[1] = tempCS->cost; + m_bestBcwIdx[1] = cu->BcwIdx; } } } @@ -4090,6 +4470,8 @@ void EncCu::xEncodeDontSplit( CodingStructure &cs, Partitioner &partitioner ) m_CABACEstimator->resetBits(); m_CABACEstimator->split_cu_mode( CU_DONT_SPLIT, cs, partitioner ); + if( partitioner.treeType == TREE_C ) + CHECK( m_CABACEstimator->getEstFracBits() != 0, "must be 0 bit" ); cs.fracBits += m_CABACEstimator->getEstFracBits(); // split bits cs.cost = m_pcRdCost->calcRdCost( cs.fracBits, cs.dist ); @@ -4099,6 +4481,7 @@ void EncCu::xEncodeDontSplit( CodingStructure &cs, Partitioner &partitioner ) #if REUSE_CU_RESULTS void EncCu::xReuseCachedResult( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner ) { + m_pcRdCost->setChromaFormat(tempCS->sps->getChromaFormatIdc()); BestEncInfoCache* bestEncCache = dynamic_cast<BestEncInfoCache*>( m_modeCtrl ); CHECK( !bestEncCache, "If this mode is chosen, mode controller has to implement the mode caching capabilities" ); EncTestMode cachedMode; @@ -4106,11 +4489,11 @@ void EncCu::xReuseCachedResult( CodingStructure *&tempCS, CodingStructure *&best if( bestEncCache->setCsFrom( *tempCS, cachedMode, partitioner ) ) { CodingUnit& cu = *tempCS->cus.front(); - cu.shareParentPos = tempCS->sharedBndPos; - cu.shareParentSize = tempCS->sharedBndSize; partitioner.setCUData( cu ); - if( CU::isIntra( cu ) ) + if( CU::isIntra( cu ) + || CU::isPLT(cu) + ) { xReconIntraQT( cu ); } @@ -4136,7 +4519,7 @@ void EncCu::xReuseCachedResult( CodingStructure *&tempCS, CodingStructure *&best { const ComponentID compID = ComponentID( comp ); - if( CS::isDualITree( *tempCS ) && toChannelType( compID ) != partitioner.chType ) + if( partitioner.isSepTree( *tempCS ) && toChannelType( compID ) != partitioner.chType ) { continue; } @@ -4146,7 +4529,7 @@ void EncCu::xReuseCachedResult( CodingStructure *&tempCS, CodingStructure *&best #if WCG_EXT if (m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled() || ( - m_pcEncCfg->getReshaper() && (tempCS->slice->getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag()))) + m_pcEncCfg->getLmcs() && (tempCS->picHeader->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag()))) { const CPelBuf orgLuma = tempCS->getOrgBuf(tempCS->area.blocks[COMPONENT_Y]); if (compID == COMPONENT_Y && !(m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled())) diff --git a/source/Lib/EncoderLib/EncCu.h b/source/Lib/EncoderLib/EncCu.h index 270137a9c39d678262b51686370beced278c8648..3a9fbf7d42d1612d121218d633f4e18bc8fe1af7 100644 --- a/source/Lib/EncoderLib/EncCu.h +++ b/source/Lib/EncoderLib/EncCu.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -74,6 +74,7 @@ struct TriangleMotionInfo uint8_t m_candIdx1; TriangleMotionInfo ( uint8_t splitDir, uint8_t candIdx0, uint8_t candIdx1 ): m_splitDir(splitDir), m_candIdx0(candIdx0), m_candIdx1(candIdx1) { } + TriangleMotionInfo() { m_splitDir = m_candIdx0 = m_candIdx1 = 0; } }; class EncCu : DecCu @@ -90,7 +91,7 @@ private: CtxPair* m_CurrCtx; CtxCache* m_CtxCache; -#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM +#if ENABLE_SPLIT_PARALLELISM int m_dataId; #endif @@ -101,6 +102,8 @@ private: CodingStructure ***m_pTempCS; CodingStructure ***m_pBestCS; + CodingStructure ***m_pTempCS2; + CodingStructure ***m_pBestCS2; // Access channel EncCfg* m_pcEncCfg; IntraSearch* m_pcIntraSearch; @@ -114,35 +117,35 @@ private: RateCtrl* m_pcRateCtrl; IbcHashMap m_ibcHashMap; EncModeCtrl *m_modeCtrl; - int m_shareState; - uint32_t m_shareBndPosX; - uint32_t m_shareBndPosY; - SizeType m_shareBndSizeW; - SizeType m_shareBndSizeH; PelStorage m_acMergeBuffer[MMVD_MRG_MAX_RD_BUF_NUM]; PelStorage m_acRealMergeBuffer[MRG_MAX_NUM_CANDS]; + PelStorage m_acMergeTmpBuffer[MRG_MAX_NUM_CANDS]; PelStorage m_acTriangleWeightedBuffer[TRIANGLE_MAX_NUM_CANDS]; // to store weighted prediction pixles double m_mergeBestSATDCost; MotionInfo m_SubPuMiBuf [( MAX_CU_SIZE * MAX_CU_SIZE ) >> ( MIN_CU_LOG2 << 1 )]; int m_ctuIbcSearchRangeX; int m_ctuIbcSearchRangeY; -#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM +#if ENABLE_SPLIT_PARALLELISM EncLib* m_pcEncLib; #endif - int m_bestGbiIdx[2]; - double m_bestGbiCost[2]; - static const TriangleMotionInfo m_triangleModeTest[TRIANGLE_MAX_NUM_CANDS]; - uint8_t m_triangleIdxBins[2][TRIANGLE_MAX_NUM_UNI_CANDS][TRIANGLE_MAX_NUM_UNI_CANDS]; + int m_bestBcwIdx[2]; + double m_bestBcwCost[2]; + TriangleMotionInfo m_triangleModeTest[TRIANGLE_MAX_NUM_CANDS]; + uint8_t m_triangleIdxBins[2][TRIANGLE_MAX_NUM_UNI_CANDS][TRIANGLE_MAX_NUM_UNI_CANDS]; #if SHARP_LUMA_DELTA_QP || ENABLE_QPA_SUB_CTU - void updateLambda ( Slice* slice, const int dQP, const bool updateRdCostLambda ); + void updateLambda ( Slice* slice, const int dQP, + #if WCG_EXT && ER_CHROMA_QP_WCG_PPS + const bool useWCGChromaControl, + #endif + const bool updateRdCostLambda ); #endif double m_sbtCostSave[2]; - public: /// copy parameters from encoder class void init ( EncLib* pcEncLib, const SPS& sps PARL_PARAM( const int jId = 0 ) ); + void setDecCuReshaperInEncCU(EncReshape* pcReshape, ChromaFormat chromaFormatIDC) { initDecCuReshaper((Reshape*) pcReshape, chromaFormatIDC); } /// create internal buffers void create ( EncCfg* encCfg ); @@ -163,6 +166,7 @@ public: IbcHashMap& getIbcHashMap() { return m_ibcHashMap; } EncCfg* getEncCfg() const { return m_pcEncCfg; } + EncCu(); ~EncCu(); protected: @@ -170,7 +174,7 @@ protected: void xCalDebCost ( CodingStructure &cs, Partitioner &partitioner, bool calDist = false ); Distortion getDistortionDb ( CodingStructure &cs, CPelBuf org, CPelBuf reco, ComponentID compID, const CompArea& compArea, bool afterDb ); - void xCompressCU ( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &pm ); + void xCompressCU ( CodingStructure*& tempCS, CodingStructure*& bestCS, Partitioner& pm, double maxCostAllowed = MAX_DOUBLE ); #if ENABLE_SPLIT_PARALLELISM void xCompressCUParallel ( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &pm ); void copyState ( EncCu* other, Partitioner& pm, const UnitArea& currArea, const bool isDist ); @@ -179,10 +183,9 @@ protected: bool xCheckBestMode ( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &pm, const EncTestMode& encTestmode ); - void xCheckModeSplit ( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &pm, const EncTestMode& encTestMode ); + void xCheckModeSplit ( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &pm, const EncTestMode& encTestMode, const ModeType modeTypeParent, bool &skipInterPass ); - void xCheckRDCostIntra ( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &pm, const EncTestMode& encTestMode ); - void xCheckIntraPCM ( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &pm, const EncTestMode& encTestMode ); + bool xCheckRDCostIntra(CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &pm, const EncTestMode& encTestMode, bool adaptiveColorTrans); void xCheckDQP ( CodingStructure& cs, Partitioner& partitioner, bool bKeepCtx = false); void xFillPCMBuffer ( CodingUnit &cu); @@ -191,7 +194,7 @@ protected: void xCheckRDCostAffineMerge2Nx2N ( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode ); void xCheckRDCostInter ( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &pm, const EncTestMode& encTestMode ); - bool xCheckRDCostInterIMV ( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &pm, const EncTestMode& encTestMode ); + bool xCheckRDCostInterIMV(CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &pm, const EncTestMode& encTestMode, double &bestIntPelCost); void xEncodeDontSplit ( CodingStructure &cs, Partitioner &partitioner); void xCheckRDCostMerge2Nx2N ( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &pm, const EncTestMode& encTestMode ); @@ -204,12 +207,12 @@ protected: , const EncTestMode& encTestMode , int residualPass = 0 , bool* bestHasNonResi = NULL - , double* equGBiCost = NULL + , double* equBcwCost = NULL ); #if REUSE_CU_RESULTS void xReuseCachedResult ( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &Partitioner ); #endif - bool xIsGBiSkip(const CodingUnit& cu) + bool xIsBcwSkip(const CodingUnit& cu) { if (cu.slice->getSliceType() != B_SLICE) { @@ -222,6 +225,8 @@ protected: } void xCheckRDCostIBCMode ( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &pm, const EncTestMode& encTestMode ); void xCheckRDCostIBCModeMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode ); + + void xCheckPLT ( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode ); }; //! \} diff --git a/source/Lib/EncoderLib/EncGOP.cpp b/source/Lib/EncoderLib/EncGOP.cpp index 9618b0b34520bf2cdcf3db64dc1c2e6c33c9c668..0bc3ea911b653695611ded021506232dcd0fb76b 100644 --- a/source/Lib/EncoderLib/EncGOP.cpp +++ b/source/Lib/EncoderLib/EncGOP.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -80,7 +80,6 @@ int getLSB(int poc, int maxLSB) } } - EncGOP::EncGOP() { m_iLastIDR = 0; @@ -88,6 +87,7 @@ EncGOP::EncGOP() m_iNumPicCoded = 0; //Niko m_bFirst = true; m_iLastRecoveryPicPOC = 0; + m_latestDRAPPOC = MAX_INT; m_lastRasPoc = MAX_INT; m_pcCfg = NULL; @@ -101,7 +101,8 @@ EncGOP::EncGOP() m_numLongTermRefPicSPS = 0; ::memset(m_ltRefPicPocLsbSps, 0, sizeof(m_ltRefPicPocLsbSps)); ::memset(m_ltRefPicUsedByCurrPicFlag, 0, sizeof(m_ltRefPicUsedByCurrPicFlag)); - m_lastBPSEI = 0; + ::memset(m_lastBPSEI, 0, sizeof(m_lastBPSEI)); + m_rapWithLeading = false; m_bufferingPeriodSEIPresentInAU = false; m_associatedIRAPType = NAL_UNIT_CODED_SLICE_IDR_N_LP; m_associatedIRAPPOC = 0; @@ -109,6 +110,22 @@ EncGOP::EncGOP() m_pcDeblockingTempPicYuv = NULL; #endif +#if JVET_O0756_CALCULATE_HDRMETRICS + m_ppcFrameOrg = nullptr; + m_ppcFrameRec = nullptr; + + m_pcConvertFormat = nullptr; + m_pcConvertIQuantize = nullptr; + m_pcColorTransform = nullptr; + m_pcDistortionDeltaE = nullptr; + m_pcTransferFct = nullptr; + + m_pcColorTransformParams = nullptr; + m_pcFrameFormat = nullptr; + + m_metricTime = std::chrono::milliseconds(0); +#endif + m_bInitAMaxBT = true; m_bgPOC = -1; m_picBg = NULL; @@ -126,6 +143,28 @@ EncGOP::~EncGOP() // reset potential decoder resources tryDecodePicture( NULL, 0, std::string("") ); } +#if JVET_O0756_CALCULATE_HDRMETRICS + delete [] m_ppcFrameOrg; + delete [] m_ppcFrameRec; + + m_ppcFrameOrg = m_ppcFrameRec = nullptr; + + delete m_pcConvertFormat; + delete m_pcConvertIQuantize; + delete m_pcColorTransform; + delete m_pcDistortionDeltaE; + delete m_pcTransferFct; + delete m_pcColorTransformParams; + delete m_pcFrameFormat; + + m_pcConvertFormat = nullptr; + m_pcConvertIQuantize = nullptr; + m_pcColorTransform = nullptr; + m_pcDistortionDeltaE = nullptr; + m_pcTransferFct = nullptr; + m_pcColorTransformParams = nullptr; + m_pcFrameFormat = nullptr; +#endif } /** Create list to contain pointers to CTU start addresses of slice. @@ -170,15 +209,16 @@ void EncGOP::init ( EncLib* pcEncLib ) m_HLSWriter = pcEncLib->getHLSWriter(); m_pcLoopFilter = pcEncLib->getLoopFilter(); m_pcSAO = pcEncLib->getSAO(); - m_pcALF = pcEncLib->getALF(); + m_pcALF = pcEncLib->getALF(); m_pcRateCtrl = pcEncLib->getRateCtrl(); - m_lastBPSEI = 0; - m_totalCoded = 0; + ::memset(m_lastBPSEI, 0, sizeof(m_lastBPSEI)); + ::memset(m_totalCoded, 0, sizeof(m_totalCoded)); + m_HRD = pcEncLib->getHRD(); m_AUWriterIf = pcEncLib->getAUWriterIf(); #if WCG_EXT - if (m_pcCfg->getReshaper()) + if (m_pcCfg->getLmcs()) { pcEncLib->getRdCost()->setReshapeInfo(m_pcCfg->getReshapeSignalType(), m_pcCfg->getBitDepth(CHANNEL_TYPE_LUMA)); pcEncLib->getRdCost()->initLumaLevelToWeightTableReshape(); @@ -190,82 +230,197 @@ void EncGOP::init ( EncLib* pcEncLib ) } pcEncLib->getALF()->getLumaLevelWeightTable() = pcEncLib->getRdCost()->getLumaLevelWeightTable(); int alfWSSD = 0; - if (m_pcCfg->getReshaper() && m_pcCfg->getReshapeSignalType() == RESHAPE_SIGNAL_PQ ) + if (m_pcCfg->getLmcs() && m_pcCfg->getReshapeSignalType() == RESHAPE_SIGNAL_PQ ) { alfWSSD = 1; } pcEncLib->getALF()->setAlfWSSD(alfWSSD); #endif m_pcReshaper = pcEncLib->getReshaper(); + +#if JVET_O0756_CALCULATE_HDRMETRICS + const bool calculateHdrMetrics = m_pcEncLib->getCalcluateHdrMetrics(); + if(calculateHdrMetrics) + { + //allocate frame buffers and initialize class members + int chainNumber = 5; + + m_ppcFrameOrg = new hdrtoolslib::Frame* [chainNumber]; + m_ppcFrameRec = new hdrtoolslib::Frame* [chainNumber]; + + double* whitePointDeltaE = new double[hdrtoolslib::NB_REF_WHITE]; + for (int i=0; i<hdrtoolslib::NB_REF_WHITE; i++) + { + whitePointDeltaE[i] = m_pcCfg->getWhitePointDeltaE(i); + } + + double maxSampleValue = m_pcCfg->getMaxSampleValue(); + hdrtoolslib::SampleRange sampleRange = m_pcCfg->getSampleRange(); + hdrtoolslib::ChromaFormat chFmt = hdrtoolslib::ChromaFormat(m_pcCfg->getChromaFormatIdc()); + int bitDepth = m_pcCfg->getBitDepth(CHANNEL_TYPE_LUMA); + hdrtoolslib::ColorPrimaries colorPrimaries = m_pcCfg->getColorPrimaries(); + bool enableTFunctionLUT = m_pcCfg->getEnableTFunctionLUT(); + hdrtoolslib::ChromaLocation* chromaLocation = new hdrtoolslib::ChromaLocation[2]; + for (int i=0; i<2; i++) + { + chromaLocation[i] = m_pcCfg->getChromaLocation(i); + } + int chromaUpFilter = m_pcCfg->getChromaUPFilter(); + int cropOffsetLeft = m_pcCfg->getCropOffsetLeft(); + int cropOffsetTop = m_pcCfg->getCropOffsetTop(); + int cropOffsetRight = m_pcCfg->getCropOffsetRight(); + int cropOffsetBottom = m_pcCfg->getCropOffsetBottom(); + + int width = m_pcCfg->getSourceWidth() - cropOffsetLeft + cropOffsetRight; + int height = m_pcCfg->getSourceHeight() - cropOffsetTop + cropOffsetBottom; + + m_ppcFrameOrg[0] = new hdrtoolslib::Frame(width, height, false, hdrtoolslib::CM_YCbCr, colorPrimaries, chFmt, sampleRange, bitDepth, false, hdrtoolslib::TF_PQ, 0); + m_ppcFrameRec[0] = new hdrtoolslib::Frame(width, height, false, hdrtoolslib::CM_YCbCr, colorPrimaries, chFmt, sampleRange, bitDepth, false, hdrtoolslib::TF_PQ, 0); + + m_ppcFrameOrg[1] = new hdrtoolslib::Frame(m_ppcFrameOrg[0]->m_width[hdrtoolslib::Y_COMP], m_ppcFrameOrg[0]->m_height[hdrtoolslib::Y_COMP], false, hdrtoolslib::CM_YCbCr, colorPrimaries, hdrtoolslib::CF_444, sampleRange, bitDepth, false, hdrtoolslib::TF_PQ, 0); + m_ppcFrameRec[1] = new hdrtoolslib::Frame(m_ppcFrameRec[0]->m_width[hdrtoolslib::Y_COMP], m_ppcFrameRec[0]->m_height[hdrtoolslib::Y_COMP], false, hdrtoolslib::CM_YCbCr, colorPrimaries, hdrtoolslib::CF_444, sampleRange, bitDepth, false, hdrtoolslib::TF_PQ, 0); // 420 to 444 conversion + + m_ppcFrameOrg[2] = new hdrtoolslib::Frame(m_ppcFrameOrg[0]->m_width[hdrtoolslib::Y_COMP], m_ppcFrameOrg[0]->m_height[hdrtoolslib::Y_COMP], true, hdrtoolslib::CM_YCbCr, colorPrimaries, hdrtoolslib::CF_444, hdrtoolslib::SR_UNKNOWN, 32, false, hdrtoolslib::TF_PQ, 0); + m_ppcFrameRec[2] = new hdrtoolslib::Frame(m_ppcFrameRec[0]->m_width[hdrtoolslib::Y_COMP], m_ppcFrameRec[0]->m_height[hdrtoolslib::Y_COMP], true, hdrtoolslib::CM_YCbCr, colorPrimaries, hdrtoolslib::CF_444, hdrtoolslib::SR_UNKNOWN, 32, false, hdrtoolslib::TF_PQ, 0); // 444 to Float conversion + + m_ppcFrameOrg[3] = new hdrtoolslib::Frame(m_ppcFrameOrg[0]->m_width[hdrtoolslib::Y_COMP], m_ppcFrameOrg[0]->m_height[hdrtoolslib::Y_COMP], true, hdrtoolslib::CM_RGB, hdrtoolslib::CP_2020, hdrtoolslib::CF_444, hdrtoolslib::SR_UNKNOWN, 32, false, hdrtoolslib::TF_PQ, 0); + m_ppcFrameRec[3] = new hdrtoolslib::Frame(m_ppcFrameRec[0]->m_width[hdrtoolslib::Y_COMP], m_ppcFrameRec[0]->m_height[hdrtoolslib::Y_COMP], true, hdrtoolslib::CM_RGB, hdrtoolslib::CP_2020, hdrtoolslib::CF_444, hdrtoolslib::SR_UNKNOWN, 32, false, hdrtoolslib::TF_PQ, 0); // YCbCr to RGB conversion + + m_ppcFrameOrg[4] = new hdrtoolslib::Frame(m_ppcFrameOrg[0]->m_width[hdrtoolslib::Y_COMP], m_ppcFrameOrg[0]->m_height[hdrtoolslib::Y_COMP], true, hdrtoolslib::CM_RGB, hdrtoolslib::CP_2020, hdrtoolslib::CF_444, hdrtoolslib::SR_UNKNOWN, 32, false, hdrtoolslib::TF_NULL, 0); + m_ppcFrameRec[4] = new hdrtoolslib::Frame(m_ppcFrameRec[0]->m_width[hdrtoolslib::Y_COMP], m_ppcFrameRec[0]->m_height[hdrtoolslib::Y_COMP], true, hdrtoolslib::CM_RGB, hdrtoolslib::CP_2020, hdrtoolslib::CF_444, hdrtoolslib::SR_UNKNOWN, 32, false, hdrtoolslib::TF_NULL, 0); // Inverse Transfer Function + + m_pcFrameFormat = new hdrtoolslib::FrameFormat(); + m_pcFrameFormat->m_isFloat = true; + m_pcFrameFormat->m_chromaFormat = hdrtoolslib::CF_UNKNOWN; + m_pcFrameFormat->m_colorSpace = hdrtoolslib::CM_RGB; + m_pcFrameFormat->m_colorPrimaries = hdrtoolslib::CP_2020; + m_pcFrameFormat->m_sampleRange = hdrtoolslib::SR_UNKNOWN; + + m_pcConvertFormat = hdrtoolslib::ConvertColorFormat::create(width, height, chFmt, hdrtoolslib::CF_444, chromaUpFilter, chromaLocation, chromaLocation); + m_pcConvertIQuantize = hdrtoolslib::Convert::create(&m_ppcFrameOrg[1]->m_format, &m_ppcFrameOrg[2]->m_format); + m_pcColorTransform = hdrtoolslib::ColorTransform::create(m_ppcFrameOrg[2]->m_colorSpace, m_ppcFrameOrg[2]->m_colorPrimaries, m_ppcFrameOrg[3]->m_colorSpace, m_ppcFrameOrg[3]->m_colorPrimaries, true, 1); + m_pcDistortionDeltaE = new hdrtoolslib::DistortionMetricDeltaE(m_pcFrameFormat, false, maxSampleValue, whitePointDeltaE, 1); + m_pcTransferFct = hdrtoolslib::TransferFunction::create(hdrtoolslib::TF_PQ, true, (float) maxSampleValue, 0, 0.0, 1.0, enableTFunctionLUT); + } +#endif } -#if HEVC_VPS int EncGOP::xWriteVPS (AccessUnit &accessUnit, const VPS *vps) { OutputNALUnit nalu(NAL_UNIT_VPS); m_HLSWriter->setBitstream( &nalu.m_Bitstream ); + CHECK( nalu.m_temporalId, "The value of TemporalId of VPS NAL units shall be equal to 0" ); m_HLSWriter->codeVPS( vps ); accessUnit.push_back(new NALUnitEBSP(nalu)); return (int)(accessUnit.back()->m_nalUnitData.str().size()) * 8; } -#endif -int EncGOP::xWriteSPS (AccessUnit &accessUnit, const SPS *sps) +int EncGOP::xWriteDPS (AccessUnit &accessUnit, const DPS *dps) +{ + if (dps->getDecodingParameterSetId() !=0) + { + OutputNALUnit nalu(NAL_UNIT_DPS); + m_HLSWriter->setBitstream( &nalu.m_Bitstream ); + CHECK( nalu.m_temporalId, "The value of TemporalId of DPS NAL units shall be equal to 0" ); + m_HLSWriter->codeDPS( dps ); + accessUnit.push_back(new NALUnitEBSP(nalu)); + return (int)(accessUnit.back()->m_nalUnitData.str().size()) * 8; + } + else + { + return 0; + } +} + +int EncGOP::xWriteSPS( AccessUnit &accessUnit, const SPS *sps, const int layerId ) { OutputNALUnit nalu(NAL_UNIT_SPS); m_HLSWriter->setBitstream( &nalu.m_Bitstream ); + nalu.m_nuhLayerId = layerId; + CHECK( nalu.m_temporalId, "The value of TemporalId of SPS NAL units shall be equal to 0" ); m_HLSWriter->codeSPS( sps ); accessUnit.push_back(new NALUnitEBSP(nalu)); return (int)(accessUnit.back()->m_nalUnitData.str().size()) * 8; } -int EncGOP::xWritePPS (AccessUnit &accessUnit, const PPS *pps) +int EncGOP::xWritePPS( AccessUnit &accessUnit, const PPS *pps, const SPS *sps, const int layerId ) { OutputNALUnit nalu(NAL_UNIT_PPS); m_HLSWriter->setBitstream( &nalu.m_Bitstream ); - m_HLSWriter->codePPS( pps ); + nalu.m_nuhLayerId = layerId; + CHECK( nalu.m_temporalId < accessUnit.temporalId, "TemporalId shall be greater than or equal to the TemporalId of the layer access unit containing the NAL unit" ); + m_HLSWriter->codePPS( pps, sps ); accessUnit.push_back(new NALUnitEBSP(nalu)); return (int)(accessUnit.back()->m_nalUnitData.str().size()) * 8; } -int EncGOP::xWriteAPS(AccessUnit &accessUnit, APS *aps) +int EncGOP::xWriteAPS( AccessUnit &accessUnit, APS *aps, const int layerId, const bool isPrefixNUT ) { - OutputNALUnit nalu(NAL_UNIT_APS); + OutputNALUnit nalu( isPrefixNUT ? NAL_UNIT_PREFIX_APS : NAL_UNIT_SUFFIX_APS ); m_HLSWriter->setBitstream(&nalu.m_Bitstream); + nalu.m_nuhLayerId = layerId; + nalu.m_temporalId = aps->getTemporalId(); + aps->setLayerId( layerId ); + CHECK( nalu.m_temporalId < accessUnit.temporalId, "TemporalId shall be greater than or equal to the TemporalId of the layer access unit containing the NAL unit" ); m_HLSWriter->codeAPS(aps); accessUnit.push_back(new NALUnitEBSP(nalu)); return (int)(accessUnit.back()->m_nalUnitData.str().size()) * 8; } -int EncGOP::xWriteParameterSets (AccessUnit &accessUnit, Slice *slice, const bool bSeqFirst) +int EncGOP::xWriteParameterSets( AccessUnit &accessUnit, Slice *slice, const bool bSeqFirst ) { int actualTotalBits = 0; -#if HEVC_VPS - if (bSeqFirst) + if( bSeqFirst ) { - actualTotalBits += xWriteVPS(accessUnit, m_pcEncLib->getVPS()); - } -#endif - if (m_pcEncLib->SPSNeedsWriting(slice->getSPS()->getSPSId())) // Note this assumes that all changes to the SPS are made at the EncLib level prior to picture creation (EncLib::xGetNewPicBuffer). - { - CHECK(!(bSeqFirst), "Unspecified error"); // Implementations that use more than 1 SPS need to be aware of activation issues. - actualTotalBits += xWriteSPS(accessUnit, slice->getSPS()); + if (slice->getSPS()->getVPSId() != 0) + { + actualTotalBits += xWriteVPS(accessUnit, m_pcEncLib->getVPS()); + } + actualTotalBits += xWriteDPS( accessUnit, m_pcEncLib->getDPS() ); + + if( m_pcEncLib->SPSNeedsWriting( slice->getSPS()->getSPSId() ) ) // Note this assumes that all changes to the SPS are made at the EncLib level prior to picture creation (EncLib::xGetNewPicBuffer). + { + CHECK( !( bSeqFirst ), "Unspecified error" ); // Implementations that use more than 1 SPS need to be aware of activation issues. + actualTotalBits += xWriteSPS( accessUnit, slice->getSPS(), m_pcEncLib->getLayerId() ); + } } - if (m_pcEncLib->PPSNeedsWriting(slice->getPPS()->getPPSId())) // Note this assumes that all changes to the PPS are made at the EncLib level prior to picture creation (EncLib::xGetNewPicBuffer). + + if( m_pcEncLib->PPSNeedsWriting( slice->getPPS()->getPPSId() ) ) // Note this assumes that all changes to the PPS are made at the EncLib level prior to picture creation (EncLib::xGetNewPicBuffer). { - actualTotalBits += xWritePPS(accessUnit, slice->getPPS()); + actualTotalBits += xWritePPS( accessUnit, slice->getPPS(), slice->getSPS(), m_pcEncLib->getLayerId() ); } return actualTotalBits; } +int EncGOP::xWritePicHeader( AccessUnit &accessUnit, PicHeader *picHeader ) +{ + OutputNALUnit nalu(NAL_UNIT_PH); + m_HLSWriter->setBitstream( &nalu.m_Bitstream ); + nalu.m_temporalId = accessUnit.temporalId; + nalu.m_nuhLayerId = m_pcEncLib->getLayerId(); + m_HLSWriter->codePictureHeader( picHeader ); + accessUnit.push_back(new NALUnitEBSP(nalu)); + return (int)(accessUnit.back()->m_nalUnitData.str().size()) * 8; +} + void EncGOP::xWriteAccessUnitDelimiter (AccessUnit &accessUnit, Slice *slice) { AUDWriter audWriter; OutputNALUnit nalu(NAL_UNIT_ACCESS_UNIT_DELIMITER); - + nalu.m_temporalId = slice->getTLayer(); + int vpsId = slice->getSPS()->getVPSId(); + if (vpsId == 0) + { + nalu.m_nuhLayerId = 0; + } + else + { + nalu.m_nuhLayerId = slice->getVPS()->getLayerId(0); + } + CHECK( nalu.m_temporalId != accessUnit.temporalId, "TemporalId shall be equal to the TemporalId of the AU containing the NAL unit" ); int picType = slice->isIntra() ? 0 : (slice->isInterP() ? 1 : 2); audWriter.codeAUD(nalu.m_Bitstream, picType); @@ -280,8 +435,8 @@ void EncGOP::xWriteSEI (NalUnitType naluType, SEIMessages& seiMessages, AccessUn { return; } - OutputNALUnit nalu(naluType, temporalId); - m_seiWriter.writeSEImessages(nalu.m_Bitstream, seiMessages, sps, false); + OutputNALUnit nalu( naluType, m_pcEncLib->getLayerId(), temporalId ); + m_seiWriter.writeSEImessages(nalu.m_Bitstream, seiMessages, sps, *m_HRD, false, temporalId); auPos = accessUnit.insert(auPos, new NALUnitEBSP(nalu)); auPos++; } @@ -297,8 +452,8 @@ void EncGOP::xWriteSEISeparately (NalUnitType naluType, SEIMessages& seiMessages { SEIMessages tmpMessages; tmpMessages.push_back(*sei); - OutputNALUnit nalu(naluType, temporalId); - m_seiWriter.writeSEImessages(nalu.m_Bitstream, tmpMessages, sps, false); + OutputNALUnit nalu( naluType, m_pcEncLib->getLayerId(), temporalId ); + m_seiWriter.writeSEImessages(nalu.m_Bitstream, tmpMessages, sps, *m_HRD, false, temporalId); auPos = accessUnit.insert(auPos, new NALUnitEBSP(nalu)); auPos++; } @@ -323,9 +478,8 @@ void EncGOP::xWriteLeadingSEIOrdered (SEIMessages& seiMessages, SEIMessages& duI while ( (itNalu!=accessUnit.end())&& ( (*itNalu)->m_nalUnitType==NAL_UNIT_ACCESS_UNIT_DELIMITER -#if HEVC_VPS || (*itNalu)->m_nalUnitType==NAL_UNIT_VPS -#endif + || (*itNalu)->m_nalUnitType==NAL_UNIT_DPS || (*itNalu)->m_nalUnitType==NAL_UNIT_SPS || (*itNalu)->m_nalUnitType==NAL_UNIT_PPS )) @@ -341,11 +495,13 @@ void EncGOP::xWriteLeadingSEIOrdered (SEIMessages& seiMessages, SEIMessages& duI #endif // The case that a specific SEI is not present is handled in xWriteSEI (empty list) +#if HEVC_SEI // Active parameter sets SEI must always be the first SEI currentMessages = extractSeisByType(localMessages, SEI::ACTIVE_PARAMETER_SETS); CHECK(!(currentMessages.size() <= 1), "Unspecified error"); xWriteSEI(NAL_UNIT_PREFIX_SEI, currentMessages, accessUnit, itNalu, temporalId, sps); xClearSEIs(currentMessages, !testWrite); +#endif // Buffering period SEI must always be following active parameter sets currentMessages = extractSeisByType(localMessages, SEI::BUFFERING_PERIOD); @@ -371,10 +527,12 @@ void EncGOP::xWriteLeadingSEIOrdered (SEIMessages& seiMessages, SEIMessages& duI xClearSEIs(currentMessages, !testWrite); } +#if HEVC_SEI // Scalable nesting SEI must always be the following DU info currentMessages = extractSeisByType(localMessages, SEI::SCALABLE_NESTING); xWriteSEISeparately(NAL_UNIT_PREFIX_SEI, currentMessages, accessUnit, itNalu, temporalId, sps); xClearSEIs(currentMessages, !testWrite); +#endif // And finally everything else one by one xWriteSEISeparately(NAL_UNIT_PREFIX_SEI, localMessages, accessUnit, itNalu, temporalId, sps); @@ -399,7 +557,7 @@ void EncGOP::xWriteLeadingSEIMessages (SEIMessages& seiMessages, SEIMessages& du // update Timing and DU info SEI xUpdateDuData(testAU, duData); xUpdateTimingSEI(picTiming, duData, sps); - xUpdateDuInfoSEI(duInfoSeiMessages, picTiming); + xUpdateDuInfoSEI(duInfoSeiMessages, picTiming, sps->getMaxTLayers()); // actual writing xWriteLeadingSEIOrdered(seiMessages, duInfoSeiMessages, accessUnit, temporalId, sps, false); @@ -416,9 +574,7 @@ void EncGOP::xWriteTrailingSEIMessages (SEIMessages& seiMessages, AccessUnit &ac void EncGOP::xWriteDuSEIMessages (SEIMessages& duInfoSeiMessages, AccessUnit &accessUnit, int temporalId, const SPS *sps, std::deque<DUData> &duData) { - const HRD *hrd = sps->getVuiParameters()->getHrdParameters(); - - if( m_pcCfg->getDecodingUnitInfoSEIEnabled() && hrd->getSubPicCpbParamsPresentFlag() ) + if( m_pcCfg->getDecodingUnitInfoSEIEnabled() && m_HRD->getBufferingPeriodSEI()->m_decodingUnitCpbParamsInPicTimingSeiFlag ) { int naluIdx = 0; AccessUnit::iterator nalu = accessUnit.begin(); @@ -462,111 +618,135 @@ void EncGOP::xCreateIRAPLeadingSEIMessages (SEIMessages& seiMessages, const SPS { OutputNALUnit nalu(NAL_UNIT_PREFIX_SEI); - if(m_pcCfg->getActiveParameterSetsSEIEnabled()) - { - SEIActiveParameterSets *sei = new SEIActiveParameterSets; -#if HEVC_VPS - m_seiEncoder.initSEIActiveParameterSets (sei, m_pcCfg->getVPS(), sps); -#else - m_seiEncoder.initSEIActiveParameterSets(sei, sps); -#endif - seiMessages.push_back(sei); - } - if(m_pcCfg->getFramePackingArrangementSEIEnabled()) { SEIFramePacking *sei = new SEIFramePacking; m_seiEncoder.initSEIFramePacking (sei, m_iNumPicCoded); seiMessages.push_back(sei); } - - if(m_pcCfg->getSegmentedRectFramePackingArrangementSEIEnabled()) +#if U0033_ALTERNATIVE_TRANSFER_CHARACTERISTICS_SEI + if(m_pcCfg->getSEIAlternativeTransferCharacteristicsSEIEnable()) + { + SEIAlternativeTransferCharacteristics *seiAlternativeTransferCharacteristics = new SEIAlternativeTransferCharacteristics; + m_seiEncoder.initSEIAlternativeTransferCharacteristics(seiAlternativeTransferCharacteristics); + seiMessages.push_back(seiAlternativeTransferCharacteristics); + } +#endif + if (m_pcCfg->getErpSEIEnabled()) { - SEISegmentedRectFramePacking *sei = new SEISegmentedRectFramePacking; - m_seiEncoder.initSEISegmentedRectFramePacking(sei); + SEIEquirectangularProjection *sei = new SEIEquirectangularProjection; + m_seiEncoder.initSEIErp(sei); seiMessages.push_back(sei); } - if (m_pcCfg->getDisplayOrientationSEIAngle()) + if (m_pcCfg->getSphereRotationSEIEnabled()) { - SEIDisplayOrientation *sei = new SEIDisplayOrientation; - m_seiEncoder.initSEIDisplayOrientation(sei); + SEISphereRotation *sei = new SEISphereRotation; + m_seiEncoder.initSEISphereRotation(sei); seiMessages.push_back(sei); } - if(m_pcCfg->getToneMappingInfoSEIEnabled()) + if (m_pcCfg->getOmniViewportSEIEnabled()) { - SEIToneMappingInfo *sei = new SEIToneMappingInfo; - m_seiEncoder.initSEIToneMappingInfo (sei); + SEIOmniViewport *sei = new SEIOmniViewport; + m_seiEncoder.initSEIOmniViewport(sei); seiMessages.push_back(sei); } - -#if HEVC_TILES_WPP - if(m_pcCfg->getTMCTSSEIEnabled()) + if (m_pcCfg->getRwpSEIEnabled()) + { + SEIRegionWisePacking *seiRegionWisePacking = new SEIRegionWisePacking; + m_seiEncoder.initSEIRegionWisePacking(seiRegionWisePacking); + seiMessages.push_back(seiRegionWisePacking); + } + if (m_pcCfg->getGcmpSEIEnabled()) { - SEITempMotionConstrainedTileSets *sei = new SEITempMotionConstrainedTileSets; - m_seiEncoder.initSEITempMotionConstrainedTileSets(sei, pps); + SEIGeneralizedCubemapProjection *sei = new SEIGeneralizedCubemapProjection; + m_seiEncoder.initSEIGcmp(sei); seiMessages.push_back(sei); } -#endif - - if(m_pcCfg->getTimeCodeSEIEnabled()) + if (m_pcCfg->getSubpicureLevelInfoSEIEnabled()) { - SEITimeCode *seiTimeCode = new SEITimeCode; - m_seiEncoder.initSEITimeCode(seiTimeCode); - seiMessages.push_back(seiTimeCode); + SEISubpicureLevelInfo *seiSubpicureLevelInfo = new SEISubpicureLevelInfo; + m_seiEncoder.initSEISubpictureLevelInfo(seiSubpicureLevelInfo, sps); + seiMessages.push_back(seiSubpicureLevelInfo); } - - if(m_pcCfg->getKneeSEIEnabled()) + if (m_pcCfg->getSampleAspectRatioInfoSEIEnabled()) + { + SEISampleAspectRatioInfo *seiSampleAspectRatioInfo = new SEISampleAspectRatioInfo; + m_seiEncoder.initSEISampleAspectRatioInfo(seiSampleAspectRatioInfo); + seiMessages.push_back(seiSampleAspectRatioInfo); + } + // film grain + if (m_pcCfg->getFilmGrainCharactersticsSEIEnabled()) { - SEIKneeFunctionInfo *sei = new SEIKneeFunctionInfo; - m_seiEncoder.initSEIKneeFunctionInfo(sei); + SEIFilmGrainCharacteristics *sei = new SEIFilmGrainCharacteristics; + m_seiEncoder.initSEIFilmGrainCharacteristics(sei); seiMessages.push_back(sei); } - if(m_pcCfg->getMasteringDisplaySEI().colourVolumeSEIEnabled) + // mastering display colour volume + if (m_pcCfg->getMasteringDisplaySEI().colourVolumeSEIEnabled) { - const SEIMasteringDisplay &seiCfg=m_pcCfg->getMasteringDisplaySEI(); SEIMasteringDisplayColourVolume *sei = new SEIMasteringDisplayColourVolume; - sei->values = seiCfg; + m_seiEncoder.initSEIMasteringDisplayColourVolume(sei); seiMessages.push_back(sei); } - if(m_pcCfg->getChromaResamplingFilterHintEnabled()) + + // content light level + if (m_pcCfg->getCLLSEIEnabled()) { - SEIChromaResamplingFilterHint *seiChromaResamplingFilterHint = new SEIChromaResamplingFilterHint; - m_seiEncoder.initSEIChromaResamplingFilterHint(seiChromaResamplingFilterHint, m_pcCfg->getChromaResamplingHorFilterIdc(), m_pcCfg->getChromaResamplingVerFilterIdc()); - seiMessages.push_back(seiChromaResamplingFilterHint); + SEIContentLightLevelInfo *seiCLL = new SEIContentLightLevelInfo; + m_seiEncoder.initSEIContentLightLevel(seiCLL); + seiMessages.push_back(seiCLL); } -#if U0033_ALTERNATIVE_TRANSFER_CHARACTERISTICS_SEI - if(m_pcCfg->getSEIAlternativeTransferCharacteristicsSEIEnable()) + + // ambient viewing environment + if (m_pcCfg->getAmbientViewingEnvironmentSEIEnabled()) { - SEIAlternativeTransferCharacteristics *seiAlternativeTransferCharacteristics = new SEIAlternativeTransferCharacteristics; - m_seiEncoder.initSEIAlternativeTransferCharacteristics(seiAlternativeTransferCharacteristics); - seiMessages.push_back(seiAlternativeTransferCharacteristics); + SEIAmbientViewingEnvironment *seiAVE = new SEIAmbientViewingEnvironment; + m_seiEncoder.initSEIAmbientViewingEnvironment(seiAVE); + seiMessages.push_back(seiAVE); + } + + // content colour volume + if (m_pcCfg->getCcvSEIEnabled()) + { + SEIContentColourVolume *seiContentColourVolume = new SEIContentColourVolume; + m_seiEncoder.initSEIContentColourVolume(seiContentColourVolume); + seiMessages.push_back(seiContentColourVolume); } -#endif } void EncGOP::xCreatePerPictureSEIMessages (int picInGOP, SEIMessages& seiMessages, SEIMessages& nestedSeiMessages, Slice *slice) { - if( ( m_pcCfg->getBufferingPeriodSEIEnabled() ) && ( slice->getSliceType() == I_SLICE ) && - ( slice->getSPS()->getVuiParametersPresentFlag() ) && - ( ( slice->getSPS()->getVuiParameters()->getHrdParameters()->getNalHrdParametersPresentFlag() ) - || ( slice->getSPS()->getVuiParameters()->getHrdParameters()->getVclHrdParametersPresentFlag() ) ) ) + if ((m_pcCfg->getBufferingPeriodSEIEnabled()) && (slice->isIRAP() || slice->getNalUnitType() == NAL_UNIT_CODED_SLICE_GDR) && + ( slice->getSPS()->getHrdParametersPresentFlag() ) ) { SEIBufferingPeriod *bufferingPeriodSEI = new SEIBufferingPeriod(); - m_seiEncoder.initSEIBufferingPeriod(bufferingPeriodSEI, slice); + bool noLeadingPictures = ( (slice->getNalUnitType()!= NAL_UNIT_CODED_SLICE_IDR_W_RADL) && (slice->getNalUnitType()!= NAL_UNIT_CODED_SLICE_CRA) )?(true):(false); + m_seiEncoder.initSEIBufferingPeriod(bufferingPeriodSEI,noLeadingPictures); + m_HRD->setBufferingPeriodSEI(bufferingPeriodSEI); seiMessages.push_back(bufferingPeriodSEI); m_bufferingPeriodSEIPresentInAU = true; +#if HEVC_SEI if (m_pcCfg->getScalableNestingSEIEnabled()) { SEIBufferingPeriod *bufferingPeriodSEIcopy = new SEIBufferingPeriod(); bufferingPeriodSEI->copyTo(*bufferingPeriodSEIcopy); nestedSeiMessages.push_back(bufferingPeriodSEIcopy); } +#endif } + if (m_pcEncLib->getDependentRAPIndicationSEIEnabled() && slice->isDRAP()) + { + SEIDependentRAPIndication *dependentRAPIndicationSEI = new SEIDependentRAPIndication(); + m_seiEncoder.initSEIDependentRAPIndication(dependentRAPIndicationSEI); + seiMessages.push_back(dependentRAPIndicationSEI); + } + +#if HEVC_SEI if (picInGOP ==0 && m_pcCfg->getSOPDescriptionSEIEnabled() ) // write SOP description SEI (if enabled) at the beginning of GOP { SEISOPDescription* sopDescriptionSEI = new SEISOPDescription(); @@ -617,8 +797,10 @@ void EncGOP::xCreatePerPictureSEIMessages (int picInGOP, SEIMessages& seiMessage delete seiColourRemappingInfo; } } +#endif } +#if HEVC_SEI void EncGOP::xCreateScalableNestingSEI (SEIMessages& seiMessages, SEIMessages& nestedSeiMessages) { SEIMessages tmpMessages; @@ -633,31 +815,222 @@ void EncGOP::xCreateScalableNestingSEI (SEIMessages& seiMessages, SEIMessages& n tmpMessages.clear(); } } +#endif + +void EncGOP::xCreateFrameFieldInfoSEI (SEIMessages& seiMessages, Slice *slice, bool isField) +{ + if (m_pcCfg->getFrameFieldInfoSEIEnabled()) + { + SEIFrameFieldInfo *frameFieldInfoSEI = new SEIFrameFieldInfo(); + + // encode only very basic information. if more feature are supported, this should be moved to SEIEncoder + frameFieldInfoSEI->m_fieldPicFlag = isField; + if (isField) + { + frameFieldInfoSEI->m_bottomFieldFlag = !slice->getPic()->topField; + } + seiMessages.push_back(frameFieldInfoSEI); + } +} + void EncGOP::xCreatePictureTimingSEI (int IRAPGOPid, SEIMessages& seiMessages, SEIMessages& nestedSeiMessages, SEIMessages& duInfoSeiMessages, Slice *slice, bool isField, std::deque<DUData> &duData) { - const VUI *vui = slice->getSPS()->getVuiParameters(); - const HRD *hrd = vui->getHrdParameters(); + // Picture timing depends on buffering period. When either of those is not disabled, + // initialization would fail. Needs more cleanup after DU timing is integrated. + if (!(m_pcCfg->getPictureTimingSEIEnabled() && m_pcCfg->getBufferingPeriodSEIEnabled())) + { + return; + } + + const HRDParameters *hrd = slice->getSPS()->getHrdParameters(); // update decoding unit parameters - if( ( m_pcCfg->getPictureTimingSEIEnabled() || m_pcCfg->getDecodingUnitInfoSEIEnabled() ) && - ( slice->getSPS()->getVuiParametersPresentFlag() ) && - ( hrd->getNalHrdParametersPresentFlag() || hrd->getVclHrdParametersPresentFlag() ) ) + if( ( m_pcCfg->getPictureTimingSEIEnabled() || m_pcCfg->getDecodingUnitInfoSEIEnabled() ) ) { int picSptDpbOutputDuDelay = 0; SEIPictureTiming *pictureTimingSEI = new SEIPictureTiming(); // DU parameters - if( hrd->getSubPicCpbParamsPresentFlag() ) + if( hrd->getGeneralDecodingUnitHrdParamsPresentFlag() ) { uint32_t numDU = (uint32_t) duData.size(); pictureTimingSEI->m_numDecodingUnitsMinus1 = ( numDU - 1 ); pictureTimingSEI->m_duCommonCpbRemovalDelayFlag = false; pictureTimingSEI->m_numNalusInDuMinus1.resize( numDU ); - pictureTimingSEI->m_duCpbRemovalDelayMinus1.resize( numDU ); + const uint32_t maxNumSubLayers = slice->getSPS()->getMaxTLayers(); + pictureTimingSEI->m_duCpbRemovalDelayMinus1.resize( numDU * maxNumSubLayers ); + } + const uint32_t cpbRemovalDelayLegth = m_HRD->getBufferingPeriodSEI()->m_cpbRemovalDelayLength; + const uint32_t maxNumSubLayers = slice->getSPS()->getMaxTLayers(); + pictureTimingSEI->m_auCpbRemovalDelay[maxNumSubLayers-1] = std::min<int>(std::max<int>(1, m_totalCoded[maxNumSubLayers-1] - m_lastBPSEI[maxNumSubLayers-1]), static_cast<int>(pow(2, static_cast<double>(cpbRemovalDelayLegth)))); // Syntax element signalled as minus, hence the . + CHECK( (m_totalCoded[maxNumSubLayers-1] - m_lastBPSEI[maxNumSubLayers-1]) > pow(2, static_cast<double>(cpbRemovalDelayLegth)), " cpbRemovalDelayLegth too small for m_auCpbRemovalDelay[pt_max_sub_layers_minus1] at picture timing SEI " ); + const uint32_t temporalId = slice->getTLayer(); + for( int i = temporalId ; i < maxNumSubLayers - 1 ; i ++ ) + { + int indexWithinGOP = (m_totalCoded[maxNumSubLayers - 1] - m_lastBPSEI[maxNumSubLayers - 1]) % m_pcCfg->getGOPSize(); + pictureTimingSEI->m_ptSubLayerDelaysPresentFlag[i] = true; + if( ((m_rapWithLeading == true) && (indexWithinGOP == 0)) || (m_totalCoded[maxNumSubLayers - 1] == 0) || m_bufferingPeriodSEIPresentInAU) + { + pictureTimingSEI->m_cpbRemovalDelayDeltaEnabledFlag[i] = false; + } + else + { + pictureTimingSEI->m_cpbRemovalDelayDeltaEnabledFlag[i] = m_HRD->getBufferingPeriodSEI()->m_cpbRemovalDelayDeltasPresentFlag; + } + if( pictureTimingSEI->m_cpbRemovalDelayDeltaEnabledFlag[i] ) + { + if( m_rapWithLeading == false ) + { + switch (m_pcCfg->getGOPSize()) + { + case 8: + { + if((indexWithinGOP == 1 && i == 2)) + { + pictureTimingSEI->m_cpbRemovalDelayDeltaIdx[i] = 0; + } + else if((indexWithinGOP == 2 && i == 2) || (indexWithinGOP == 6 && i == 2)) + { + pictureTimingSEI->m_cpbRemovalDelayDeltaIdx[i] = 1; + } + else if((indexWithinGOP == 1 && i == 1) || (indexWithinGOP == 3 && i == 2)) + { + pictureTimingSEI->m_cpbRemovalDelayDeltaIdx[i] = 2; + } + else if(indexWithinGOP == 2 && i == 1) + { + pictureTimingSEI->m_cpbRemovalDelayDeltaIdx[i] = 3; + } + else if(indexWithinGOP == 1 && i == 0) + { + pictureTimingSEI->m_cpbRemovalDelayDeltaIdx[i] = 4; + } + else + { + THROW("m_cpbRemovalDelayDeltaIdx not applicable for the sub-layer and GOP size"); + } + } + break; + case 16: + { + if((indexWithinGOP == 1 && i == 3)) + { + pictureTimingSEI->m_cpbRemovalDelayDeltaIdx[i] = 0; + } + else if((indexWithinGOP == 2 && i == 3) || (indexWithinGOP == 10 && i == 3) || (indexWithinGOP == 14 && i == 3)) + { + pictureTimingSEI->m_cpbRemovalDelayDeltaIdx[i] = 1; + } + else if((indexWithinGOP == 1 && i == 2) || (indexWithinGOP == 3 && i == 3) || (indexWithinGOP == 7 && i == 3) || (indexWithinGOP == 11 && i == 3)) + { + pictureTimingSEI->m_cpbRemovalDelayDeltaIdx[i] = 2; + } + else if(indexWithinGOP == 4 && i == 3) + { + pictureTimingSEI->m_cpbRemovalDelayDeltaIdx[i] = 3; + } + else if((indexWithinGOP == 2 && i == 2) || (indexWithinGOP == 10 && i == 2)) + { + pictureTimingSEI->m_cpbRemovalDelayDeltaIdx[i] = 4; + } + else if(indexWithinGOP == 1 && i == 1) + { + pictureTimingSEI->m_cpbRemovalDelayDeltaIdx[i] = 5; + } + else if(indexWithinGOP == 3 && i == 2) + { + pictureTimingSEI->m_cpbRemovalDelayDeltaIdx[i] = 6; + } + else if(indexWithinGOP == 2 && i == 1) + { + pictureTimingSEI->m_cpbRemovalDelayDeltaIdx[i] = 7; + } + else if(indexWithinGOP == 1 && i == 0) + { + pictureTimingSEI->m_cpbRemovalDelayDeltaIdx[i] = 8; + } + else + { + THROW("m_cpbRemovalDelayDeltaIdx not applicable for the sub-layer and GOP size"); + } + } + break; + default: + { + THROW("m_cpbRemovalDelayDeltaIdx not supported for the current GOP size"); + } + break; + } + } + else + { + switch (m_pcCfg->getGOPSize()) + { + case 8: + { + if((indexWithinGOP == 1 && i == 2) || (indexWithinGOP == 5 && i == 2)) + { + pictureTimingSEI->m_cpbRemovalDelayDeltaIdx[i] = 0; + } + else if(indexWithinGOP == 2 && i == 2) + { + pictureTimingSEI->m_cpbRemovalDelayDeltaIdx[i] = 1; + } + else if(indexWithinGOP == 1 && i == 1) + { + pictureTimingSEI->m_cpbRemovalDelayDeltaIdx[i] = 2; + } + else + { + THROW("m_cpbRemovalDelayDeltaIdx not applicable for the sub-layer and GOP size"); + } + } + break; + case 16: + { + if((indexWithinGOP == 1 && i == 3) || (indexWithinGOP == 9 && i == 3) || (indexWithinGOP == 13 && i == 3)) + { + pictureTimingSEI->m_cpbRemovalDelayDeltaIdx[i] = 0; + } + else if((indexWithinGOP == 2 && i == 3) || (indexWithinGOP == 6 && i == 3) || (indexWithinGOP == 10 && i == 3)) + { + pictureTimingSEI->m_cpbRemovalDelayDeltaIdx[i] = 1; + } + else if((indexWithinGOP == 1 && i == 2) || (indexWithinGOP == 9 && i == 2) || (indexWithinGOP == 3 && i == 3)) + { + pictureTimingSEI->m_cpbRemovalDelayDeltaIdx[i] = 2; + } + else if(indexWithinGOP == 2 && i == 2) + { + pictureTimingSEI->m_cpbRemovalDelayDeltaIdx[i] = 3; + } + else if(indexWithinGOP == 1 && i == 1) + { + pictureTimingSEI->m_cpbRemovalDelayDeltaIdx[i] = 4; + } + else + { + THROW("m_cpbRemovalDelayDeltaIdx not applicable for the sub-layer and GOP size"); + } + } + break; + default: + { + THROW("m_cpbRemovalDelayDeltaIdx not applicable for the sub-layer and GOP size"); + } + break; + } + } + } + else + { + int scaledDistToBuffPeriod = (m_totalCoded[i] - m_lastBPSEI[i]) * static_cast<int>(pow(2, static_cast<double>(maxNumSubLayers - 1 - i))); + pictureTimingSEI->m_auCpbRemovalDelay[i] = std::min<int>(std::max<int>(1, scaledDistToBuffPeriod), static_cast<int>(pow(2, static_cast<double>(cpbRemovalDelayLegth)))); // Syntax element signalled as minus, hence the . + CHECK( (scaledDistToBuffPeriod) > pow(2, static_cast<double>(cpbRemovalDelayLegth)), " cpbRemovalDelayLegth too small for m_auCpbRemovalDelay[i] at picture timing SEI " ); + } } - pictureTimingSEI->m_auCpbRemovalDelay = std::min<int>(std::max<int>(1, m_totalCoded - m_lastBPSEI), static_cast<int>(pow(2, static_cast<double>(hrd->getCpbRemovalDelayLengthMinus1()+1)))); // Syntax element signalled as minus, hence the . - pictureTimingSEI->m_picDpbOutputDelay = slice->getSPS()->getNumReorderPics(slice->getSPS()->getMaxTLayers()-1) + slice->getPOC() - m_totalCoded; + pictureTimingSEI->m_picDpbOutputDelay = slice->getSPS()->getNumReorderPics(slice->getSPS()->getMaxTLayers()-1) + slice->getPOC() - m_totalCoded[maxNumSubLayers-1]; if(m_pcCfg->getEfficientFieldIRAPEnabled() && IRAPGOPid > 0 && IRAPGOPid < m_iGopSize) { // if pictures have been swapped there is likely one more picture delay on their tid. Very rough approximation @@ -671,93 +1044,39 @@ void EncGOP::xCreatePictureTimingSEI (int IRAPGOPid, SEIMessages& seiMessages, } if (m_bufferingPeriodSEIPresentInAU) { - m_lastBPSEI = m_totalCoded; - } - - if( hrd->getSubPicCpbParamsPresentFlag() ) - { - int i; - uint64_t ui64Tmp; - uint32_t uiPrev = 0; - uint32_t numDU = ( pictureTimingSEI->m_numDecodingUnitsMinus1 + 1 ); - std::vector<uint32_t> &rDuCpbRemovalDelayMinus1 = pictureTimingSEI->m_duCpbRemovalDelayMinus1; - uint32_t maxDiff = ( hrd->getTickDivisorMinus2() + 2 ) - 1; - - for( i = 0; i < numDU; i ++ ) - { - pictureTimingSEI->m_numNalusInDuMinus1[ i ] = ( i == 0 ) ? ( duData[i].accumNalsDU - 1 ) : ( duData[i].accumNalsDU- duData[i-1].accumNalsDU - 1 ); - } - - if( numDU == 1 ) + for( int i = temporalId ; i < maxNumSubLayers ; i ++ ) { - rDuCpbRemovalDelayMinus1[ 0 ] = 0; /* don't care */ + m_lastBPSEI[i] = m_totalCoded[i]; } - else + if( (slice->getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_W_RADL)||(slice->getNalUnitType() == NAL_UNIT_CODED_SLICE_CRA) ) { - rDuCpbRemovalDelayMinus1[ numDU - 1 ] = 0;/* by definition */ - uint32_t tmp = 0; - uint32_t accum = 0; - - for( i = ( numDU - 2 ); i >= 0; i -- ) - { - ui64Tmp = ( ( ( duData[numDU - 1].accumBitsDU - duData[i].accumBitsDU ) * ( vui->getTimingInfo()->getTimeScale() / vui->getTimingInfo()->getNumUnitsInTick() ) * ( hrd->getTickDivisorMinus2() + 2 ) ) / ( m_pcCfg->getTargetBitrate() ) ); - if( (uint32_t)ui64Tmp > maxDiff ) - { - tmp ++; - } - } - uiPrev = 0; - - uint32_t flag = 0; - for( i = ( numDU - 2 ); i >= 0; i -- ) - { - flag = 0; - ui64Tmp = ( ( ( duData[numDU - 1].accumBitsDU - duData[i].accumBitsDU ) * ( vui->getTimingInfo()->getTimeScale() / vui->getTimingInfo()->getNumUnitsInTick() ) * ( hrd->getTickDivisorMinus2() + 2 ) ) / ( m_pcCfg->getTargetBitrate() ) ); - - if( (uint32_t)ui64Tmp > maxDiff ) - { - if(uiPrev >= maxDiff - tmp) - { - ui64Tmp = uiPrev + 1; - flag = 1; - } - else ui64Tmp = maxDiff - tmp + 1; - } - rDuCpbRemovalDelayMinus1[ i ] = (uint32_t)ui64Tmp - uiPrev - 1; - if( (int)rDuCpbRemovalDelayMinus1[ i ] < 0 ) - { - rDuCpbRemovalDelayMinus1[ i ] = 0; - } - else if (tmp > 0 && flag == 1) - { - tmp --; - } - accum += rDuCpbRemovalDelayMinus1[ i ] + 1; - uiPrev = accum; - } + m_rapWithLeading = true; } } + if( m_pcCfg->getPictureTimingSEIEnabled() ) { - pictureTimingSEI->m_picStruct = (isField && slice->getPic()->topField)? 1 : isField? 2 : 0; seiMessages.push_back(pictureTimingSEI); +#if HEVC_SEI if ( m_pcCfg->getScalableNestingSEIEnabled() ) // put picture timing SEI into scalable nesting SEI { SEIPictureTiming *pictureTimingSEIcopy = new SEIPictureTiming(); pictureTimingSEI->copyTo(*pictureTimingSEIcopy); nestedSeiMessages.push_back(pictureTimingSEIcopy); } +#endif } - if( m_pcCfg->getDecodingUnitInfoSEIEnabled() && hrd->getSubPicCpbParamsPresentFlag() ) + if( m_pcCfg->getDecodingUnitInfoSEIEnabled() && hrd->getGeneralDecodingUnitHrdParamsPresentFlag() ) { for( int i = 0; i < ( pictureTimingSEI->m_numDecodingUnitsMinus1 + 1 ); i ++ ) { SEIDecodingUnitInfo *duInfoSEI = new SEIDecodingUnitInfo(); duInfoSEI->m_decodingUnitIdx = i; - duInfoSEI->m_duSptCpbRemovalDelay = pictureTimingSEI->m_duCpbRemovalDelayMinus1[i] + 1; + for( int j = temporalId; j <= maxNumSubLayers; j++ ) + duInfoSEI->m_duSptCpbRemovalDelayIncrement[j] = pictureTimingSEI->m_duCpbRemovalDelayMinus1[i*maxNumSubLayers+j] + 1; duInfoSEI->m_dpbOutputDuDelayPresentFlag = false; duInfoSEI->m_picSptDpbOutputDuDelay = picSptDpbOutputDuDelay; @@ -815,9 +1134,8 @@ void EncGOP::xUpdateTimingSEI(SEIPictureTiming *pictureTimingSEI, std::deque<DUD { return; } - const VUI *vui = sps->getVuiParameters(); - const HRD *hrd = vui->getHrdParameters(); - if( hrd->getSubPicCpbParamsPresentFlag() ) + const HRDParameters *hrd = sps->getHrdParameters(); + if( hrd->getGeneralDecodingUnitHrdParamsPresentFlag() ) { int i; uint64_t ui64Tmp; @@ -826,6 +1144,10 @@ void EncGOP::xUpdateTimingSEI(SEIPictureTiming *pictureTimingSEI, std::deque<DUD std::vector<uint32_t> &rDuCpbRemovalDelayMinus1 = pictureTimingSEI->m_duCpbRemovalDelayMinus1; uint32_t maxDiff = ( hrd->getTickDivisorMinus2() + 2 ) - 1; + int maxNumSubLayers = sps->getMaxTLayers(); + for( int j = 0; j < maxNumSubLayers - 1; j++ ) + pictureTimingSEI->m_ptSubLayerDelaysPresentFlag[j] = false; + for( i = 0; i < numDU; i ++ ) { pictureTimingSEI->m_numNalusInDuMinus1[ i ] = ( i == 0 ) ? ( duData[i].accumNalsDU - 1 ) : ( duData[i].accumNalsDU- duData[i-1].accumNalsDU - 1 ); @@ -833,17 +1155,17 @@ void EncGOP::xUpdateTimingSEI(SEIPictureTiming *pictureTimingSEI, std::deque<DUD if( numDU == 1 ) { - rDuCpbRemovalDelayMinus1[ 0 ] = 0; /* don't care */ + rDuCpbRemovalDelayMinus1[ 0 + maxNumSubLayers - 1 ] = 0; /* don't care */ } else { - rDuCpbRemovalDelayMinus1[ numDU - 1 ] = 0;/* by definition */ + rDuCpbRemovalDelayMinus1[ (numDU - 1) * maxNumSubLayers + maxNumSubLayers - 1 ] = 0;/* by definition */ uint32_t tmp = 0; uint32_t accum = 0; for( i = ( numDU - 2 ); i >= 0; i -- ) { - ui64Tmp = ( ( ( duData[numDU - 1].accumBitsDU - duData[i].accumBitsDU ) * ( vui->getTimingInfo()->getTimeScale() / vui->getTimingInfo()->getNumUnitsInTick() ) * ( hrd->getTickDivisorMinus2() + 2 ) ) / ( m_pcCfg->getTargetBitrate() ) ); + ui64Tmp = ( ( ( duData[numDU - 1].accumBitsDU - duData[i].accumBitsDU ) * ( sps->getTimingInfo()->getTimeScale() / sps->getTimingInfo()->getNumUnitsInTick() ) * ( hrd->getTickDivisorMinus2() + 2 ) ) / ( m_pcCfg->getTargetBitrate() ) ); if( (uint32_t)ui64Tmp > maxDiff ) { tmp ++; @@ -855,7 +1177,7 @@ void EncGOP::xUpdateTimingSEI(SEIPictureTiming *pictureTimingSEI, std::deque<DUD for( i = ( numDU - 2 ); i >= 0; i -- ) { flag = 0; - ui64Tmp = ( ( ( duData[numDU - 1].accumBitsDU - duData[i].accumBitsDU ) * ( vui->getTimingInfo()->getTimeScale() / vui->getTimingInfo()->getNumUnitsInTick() ) * ( hrd->getTickDivisorMinus2() + 2 ) ) / ( m_pcCfg->getTargetBitrate() ) ); + ui64Tmp = ( ( ( duData[numDU - 1].accumBitsDU - duData[i].accumBitsDU ) * ( sps->getTimingInfo()->getTimeScale() / sps->getTimingInfo()->getNumUnitsInTick() ) * ( hrd->getTickDivisorMinus2() + 2 ) ) / ( m_pcCfg->getTargetBitrate() ) ); if( (uint32_t)ui64Tmp > maxDiff ) { @@ -866,22 +1188,22 @@ void EncGOP::xUpdateTimingSEI(SEIPictureTiming *pictureTimingSEI, std::deque<DUD } else ui64Tmp = maxDiff - tmp + 1; } - rDuCpbRemovalDelayMinus1[ i ] = (uint32_t)ui64Tmp - uiPrev - 1; - if( (int)rDuCpbRemovalDelayMinus1[ i ] < 0 ) + rDuCpbRemovalDelayMinus1[ i * maxNumSubLayers + maxNumSubLayers - 1 ] = (uint32_t)ui64Tmp - uiPrev - 1; + if( (int)rDuCpbRemovalDelayMinus1[ i * maxNumSubLayers + maxNumSubLayers - 1 ] < 0 ) { - rDuCpbRemovalDelayMinus1[ i ] = 0; + rDuCpbRemovalDelayMinus1[ i * maxNumSubLayers + maxNumSubLayers - 1 ] = 0; } else if (tmp > 0 && flag == 1) { tmp --; } - accum += rDuCpbRemovalDelayMinus1[ i ] + 1; + accum += rDuCpbRemovalDelayMinus1[ i * maxNumSubLayers + maxNumSubLayers - 1 ] + 1; uiPrev = accum; } } } } -void EncGOP::xUpdateDuInfoSEI(SEIMessages &duInfoSeiMessages, SEIPictureTiming *pictureTimingSEI) +void EncGOP::xUpdateDuInfoSEI(SEIMessages &duInfoSeiMessages, SEIPictureTiming *pictureTimingSEI, int maxSubLayers) { if (duInfoSeiMessages.empty() || (pictureTimingSEI == NULL)) { @@ -894,7 +1216,11 @@ void EncGOP::xUpdateDuInfoSEI(SEIMessages &duInfoSeiMessages, SEIPictureTiming * { SEIDecodingUnitInfo *duInfoSEI = (SEIDecodingUnitInfo*) (*du); duInfoSEI->m_decodingUnitIdx = i; - duInfoSEI->m_duSptCpbRemovalDelay = pictureTimingSEI->m_duCpbRemovalDelayMinus1[i] + 1; + for ( int j = 0; j < maxSubLayers; j++ ) + { + duInfoSEI->m_duiSubLayerDelaysPresentFlag[j] = pictureTimingSEI->m_ptSubLayerDelaysPresentFlag[j]; + duInfoSEI->m_duSptCpbRemovalDelayIncrement[j] = pictureTimingSEI->m_duCpbRemovalDelayMinus1[i*maxSubLayers+j] + 1; + } duInfoSEI->m_dpbOutputDuDelayPresentFlag = false; i++; } @@ -908,10 +1234,10 @@ cabac_zero_word_padding(Slice *const pcSlice, Picture *const pcPic, const std::s const int log2subWidthCxsubHeightC = (::getComponentScaleX(COMPONENT_Cb, format)+::getComponentScaleY(COMPONENT_Cb, format)); const int minCuWidth = pcPic->cs->pcv->minCUWidth; const int minCuHeight = pcPic->cs->pcv->minCUHeight; - const int paddedWidth = ((sps.getPicWidthInLumaSamples() + minCuWidth - 1) / minCuWidth) * minCuWidth; - const int paddedHeight= ((sps.getPicHeightInLumaSamples() + minCuHeight - 1) / minCuHeight) * minCuHeight; + const int paddedWidth = ( ( pcSlice->getPPS()->getPicWidthInLumaSamples() + minCuWidth - 1 ) / minCuWidth ) * minCuWidth; + const int paddedHeight = ( ( pcSlice->getPPS()->getPicHeightInLumaSamples() + minCuHeight - 1 ) / minCuHeight ) * minCuHeight; const int rawBits = paddedWidth * paddedHeight * - (sps.getBitDepth(CHANNEL_TYPE_LUMA) + 2*(sps.getBitDepth(CHANNEL_TYPE_CHROMA)>>log2subWidthCxsubHeightC)); + (sps.getBitDepth(CHANNEL_TYPE_LUMA) + ((2*sps.getBitDepth(CHANNEL_TYPE_CHROMA))>>log2subWidthCxsubHeightC)); const std::size_t threshold = (32/3)*numBytesInVclNalUnits + (rawBits/32); if (binCountsInNalUnits >= threshold) { @@ -985,11 +1311,7 @@ void EfficientFieldIRAPMapping::initialize(const bool isField, const int gopSize // check if POC corresponds to IRAP NalUnitType tmpUnitType = pEncGop->getNalUnitType(pocCurr, lastIDR, isField); -#if !JVET_M0101_HLS - if(tmpUnitType >= NAL_UNIT_CODED_SLICE_BLA_W_LP && tmpUnitType <= NAL_UNIT_CODED_SLICE_CRA) // if picture is an IRAP -#else if (tmpUnitType >= NAL_UNIT_CODED_SLICE_IDR_W_RADL && tmpUnitType <= NAL_UNIT_CODED_SLICE_CRA) // if picture is an IRAP -#endif { if(pocCurr%2 == 0 && iGOPid < gopSize-1 && pCfg->getGOPEntry(iGOPid).m_POC == pCfg->getGOPEntry(iGOPid+1).m_POC-1) { // if top field and following picture in enc order is associated bottom field @@ -1074,71 +1396,11 @@ int EfficientFieldIRAPMapping::restoreGOPid(const int GOPid) } -#if X0038_LAMBDA_FROM_QP_CAPABILITY -static uint32_t calculateCollocatedFromL0Flag(const Slice *pSlice) -{ - const int refIdx = 0; // Zero always assumed - const Picture *refPicL0 = pSlice->getRefPic(REF_PIC_LIST_0, refIdx); - const Picture *refPicL1 = pSlice->getRefPic(REF_PIC_LIST_1, refIdx); - return refPicL0->slices[0]->getSliceQp() > refPicL1->slices[0]->getSliceQp(); -} -#else -static uint32_t calculateCollocatedFromL1Flag(EncCfg *pCfg, const int GOPid, const int gopSize) +static void +printHash(const HashType hashType, const std::string &digestStr) { - int iCloseLeft=1, iCloseRight=-1; - for(int i = 0; i<pCfg->getGOPEntry(GOPid).m_numRefPics; i++) - { - int iRef = pCfg->getGOPEntry(GOPid).m_referencePics[i]; - if(iRef>0&&(iRef<iCloseRight||iCloseRight==-1)) - { - iCloseRight=iRef; - } - else if(iRef<0&&(iRef>iCloseLeft||iCloseLeft==1)) - { - iCloseLeft=iRef; - } - } - if(iCloseRight>-1) - { - iCloseRight=iCloseRight+pCfg->getGOPEntry(GOPid).m_POC-1; - } - if(iCloseLeft<1) - { - iCloseLeft=iCloseLeft+pCfg->getGOPEntry(GOPid).m_POC-1; - while(iCloseLeft<0) - { - iCloseLeft+=gopSize; - } - } - int iLeftQP=0, iRightQP=0; - for(int i=0; i<gopSize; i++) - { - if(pCfg->getGOPEntry(i).m_POC==(iCloseLeft%gopSize)+1) - { - iLeftQP= pCfg->getGOPEntry(i).m_QPOffset; - } - if (pCfg->getGOPEntry(i).m_POC==(iCloseRight%gopSize)+1) - { - iRightQP=pCfg->getGOPEntry(i).m_QPOffset; - } - } - if(iCloseRight>-1&&iRightQP<iLeftQP) - { - return 0; - } - else - { - return 1; - } -} -#endif - - -static void -printHash(const HashType hashType, const std::string &digestStr) -{ - const char *decodedPictureHashModeName; - switch (hashType) + const char *decodedPictureHashModeName; + switch (hashType) { case HASHTYPE_MD5: decodedPictureHashModeName = "MD5"; @@ -1295,8 +1557,7 @@ void trySkipOrDecodePicture( bool& decPic, bool& encPic, const EncCfg& cfg, Pict // patch IDR-slice to CRA-Intra-slice pcPic->slices[ i ]->setNalUnitType ( slice0.getNalUnitType() ); pcPic->slices[ i ]->setLastIDR ( slice0.getLastIDR() ); - pcPic->slices[ i ]->setEnableTMVPFlag ( slice0.getEnableTMVPFlag() ); - if ( slice0.getEnableTMVPFlag() ) + if ( pcPic->cs->picHeader->getEnableTMVPFlag() ) { pcPic->slices[ i ]->setColFromL0Flag( slice0.getColFromL0Flag() ); pcPic->slices[ i ]->setColRefIdx ( slice0.getColRefIdx() ); @@ -1337,6 +1598,326 @@ void trySkipOrDecodePicture( bool& decPic, bool& encPic, const EncCfg& cfg, Pict } } +void EncGOP::xPicInitHashME( Picture *pic, const PPS *pps, PicList &rcListPic ) +{ + if (! m_pcCfg->getUseHashME()) + { + return; + } + + PicList::iterator iterPic = rcListPic.begin(); + while (iterPic != rcListPic.end()) + { + Picture* refPic = *(iterPic++); + + if (refPic->poc != pic->poc && refPic->referenced) + { + if (!refPic->getHashMap()->isInitial()) + { + if (refPic->getPOC() == 0) + { + Pel* picSrc = refPic->getOrigBuf().get(COMPONENT_Y).buf; + int stridePic = refPic->getOrigBuf().get(COMPONENT_Y).stride; + int picWidth = pps->getPicWidthInLumaSamples(); + int picHeight = pps->getPicHeightInLumaSamples(); + int blockSize = 4; + int allNum = 0; + int simpleNum = 0; + for (int j = 0; j <= picHeight - blockSize; j += blockSize) + { + for (int i = 0; i <= picWidth - blockSize; i += blockSize) + { + Pel* curBlock = picSrc + j * stridePic + i; + bool isHorSame = true; + for (int m = 0; m < blockSize&&isHorSame; m++) + { + for (int n = 1; n < blockSize&&isHorSame; n++) + { + if (curBlock[m*stridePic] != curBlock[m*stridePic + n]) + { + isHorSame = false; + } + } + } + bool isVerSame = true; + for (int m = 1; m < blockSize&&isVerSame; m++) + { + for (int n = 0; n < blockSize&&isVerSame; n++) + { + if (curBlock[n] != curBlock[m*stridePic + n]) + { + isVerSame = false; + } + } + } + allNum++; + if (isHorSame || isVerSame) + { + simpleNum++; + } + } + } + + if (simpleNum < 0.3*allNum) + { + m_pcCfg->setUseHashME(false); + break; + } + } + refPic->addPictureToHashMapForInter(); + } + } + } +} + +void EncGOP::xPicInitRateControl(int &estimatedBits, int gopId, double &lambda, Picture *pic, Slice *slice) +{ + if ( !m_pcCfg->getUseRateCtrl() ) // TODO: does this work with multiple slices and slice-segments? + { + return; + } + int frameLevel = m_pcRateCtrl->getRCSeq()->getGOPID2Level( gopId ); + if ( pic->slices[0]->isIRAP() ) + { + frameLevel = 0; + } + m_pcRateCtrl->initRCPic( frameLevel ); + estimatedBits = m_pcRateCtrl->getRCPic()->getTargetBits(); + +#if U0132_TARGET_BITS_SATURATION + if (m_pcRateCtrl->getCpbSaturationEnabled() && frameLevel != 0) + { + int estimatedCpbFullness = m_pcRateCtrl->getCpbState() + m_pcRateCtrl->getBufferingRate(); + + // prevent overflow + if (estimatedCpbFullness - estimatedBits > (int)(m_pcRateCtrl->getCpbSize()*0.9f)) + { + estimatedBits = estimatedCpbFullness - (int)(m_pcRateCtrl->getCpbSize()*0.9f); + } + + estimatedCpbFullness -= m_pcRateCtrl->getBufferingRate(); + // prevent underflow +#if V0078_ADAPTIVE_LOWER_BOUND + if (estimatedCpbFullness - estimatedBits < m_pcRateCtrl->getRCPic()->getLowerBound()) + { + estimatedBits = std::max(200, estimatedCpbFullness - m_pcRateCtrl->getRCPic()->getLowerBound()); + } +#else + if (estimatedCpbFullness - estimatedBits < (int)(m_pcRateCtrl->getCpbSize()*0.1f)) + { + estimatedBits = std::max(200, estimatedCpbFullness - (int)(m_pcRateCtrl->getCpbSize()*0.1f)); + } +#endif + + m_pcRateCtrl->getRCPic()->setTargetBits(estimatedBits); + } +#endif + + int sliceQP = m_pcCfg->getInitialQP(); + if ( ( slice->getPOC() == 0 && m_pcCfg->getInitialQP() > 0 ) || ( frameLevel == 0 && m_pcCfg->getForceIntraQP() ) ) // QP is specified + { + int NumberBFrames = ( m_pcCfg->getGOPSize() - 1 ); + double dLambda_scale = 1.0 - Clip3( 0.0, 0.5, 0.05*(double)NumberBFrames ); + double dQPFactor = 0.57*dLambda_scale; + int SHIFT_QP = 12; + int bitdepth_luma_qp_scale = 6 * (slice->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA) - 8 + - DISTORTION_PRECISION_ADJUSTMENT(slice->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA))); + double qp_temp = (double) sliceQP + bitdepth_luma_qp_scale - SHIFT_QP; + lambda = dQPFactor*pow( 2.0, qp_temp/3.0 ); + } + else if ( frameLevel == 0 ) // intra case, but use the model + { + m_pcSliceEncoder->calCostSliceI(pic); // TODO: This only analyses the first slice segment - what about the others? + + if ( m_pcCfg->getIntraPeriod() != 1 ) // do not refine allocated bits for all intra case + { + int bits = m_pcRateCtrl->getRCSeq()->getLeftAverageBits(); + bits = m_pcRateCtrl->getRCPic()->getRefineBitsForIntra( bits ); + +#if U0132_TARGET_BITS_SATURATION + if (m_pcRateCtrl->getCpbSaturationEnabled() ) + { + int estimatedCpbFullness = m_pcRateCtrl->getCpbState() + m_pcRateCtrl->getBufferingRate(); + + // prevent overflow + if (estimatedCpbFullness - bits > (int)(m_pcRateCtrl->getCpbSize()*0.9f)) + { + bits = estimatedCpbFullness - (int)(m_pcRateCtrl->getCpbSize()*0.9f); + } + + estimatedCpbFullness -= m_pcRateCtrl->getBufferingRate(); + // prevent underflow +#if V0078_ADAPTIVE_LOWER_BOUND + if (estimatedCpbFullness - bits < m_pcRateCtrl->getRCPic()->getLowerBound()) + { + bits = estimatedCpbFullness - m_pcRateCtrl->getRCPic()->getLowerBound(); + } +#else + if (estimatedCpbFullness - bits < (int)(m_pcRateCtrl->getCpbSize()*0.1f)) + { + bits = estimatedCpbFullness - (int)(m_pcRateCtrl->getCpbSize()*0.1f); + } +#endif + } +#endif + + if ( bits < 200 ) + { + bits = 200; + } + m_pcRateCtrl->getRCPic()->setTargetBits( bits ); + } + + list<EncRCPic*> listPreviousPicture = m_pcRateCtrl->getPicList(); + m_pcRateCtrl->getRCPic()->getLCUInitTargetBits(); + lambda = m_pcRateCtrl->getRCPic()->estimatePicLambda( listPreviousPicture, slice->isIRAP()); + sliceQP = m_pcRateCtrl->getRCPic()->estimatePicQP( lambda, listPreviousPicture ); + } + else // normal case + { + list<EncRCPic*> listPreviousPicture = m_pcRateCtrl->getPicList(); + lambda = m_pcRateCtrl->getRCPic()->estimatePicLambda( listPreviousPicture, slice->isIRAP()); + sliceQP = m_pcRateCtrl->getRCPic()->estimatePicQP( lambda, listPreviousPicture ); + } + + sliceQP = Clip3( -slice->getSPS()->getQpBDOffset(CHANNEL_TYPE_LUMA), MAX_QP, sliceQP ); + m_pcRateCtrl->getRCPic()->setPicEstQP( sliceQP ); + + m_pcSliceEncoder->resetQP( pic, sliceQP, lambda ); +} + +void EncGOP::xPicInitLMCS(Picture *pic, PicHeader *picHeader, Slice *slice) +{ + if (slice->getSPS()->getUseLmcs()) + { + const SliceType sliceType = slice->getSliceType(); + + m_pcReshaper->getReshapeCW()->rspTid = slice->getTLayer() + (slice->isIntra() ? 0 : 1); + m_pcReshaper->getReshapeCW()->rspSliceQP = slice->getSliceQp(); + + m_pcReshaper->setSrcReshaped(false); + m_pcReshaper->setRecReshaped(true); + + m_pcReshaper->getSliceReshaperInfo().chrResScalingOffset = m_pcCfg->getReshapeCSoffset(); + + if (m_pcCfg->getReshapeSignalType() == RESHAPE_SIGNAL_PQ) + { + m_pcReshaper->preAnalyzerHDR(pic, sliceType, m_pcCfg->getReshapeCW(), m_pcCfg->getDualITree()); + } + else if (m_pcCfg->getReshapeSignalType() == RESHAPE_SIGNAL_SDR || m_pcCfg->getReshapeSignalType() == RESHAPE_SIGNAL_HLG) + { + m_pcReshaper->preAnalyzerLMCS(pic, m_pcCfg->getReshapeSignalType(), sliceType, m_pcCfg->getReshapeCW()); + } + else + { + THROW("Reshaper for other signal currently not defined!"); + } + + if (sliceType == I_SLICE ) + { + if (m_pcCfg->getReshapeSignalType() == RESHAPE_SIGNAL_PQ) + { + m_pcReshaper->initLUTfromdQPModel(); + m_pcEncLib->getRdCost()->updateReshapeLumaLevelToWeightTableChromaMD(m_pcReshaper->getInvLUT()); + } + else if (m_pcCfg->getReshapeSignalType() == RESHAPE_SIGNAL_SDR || m_pcCfg->getReshapeSignalType() == RESHAPE_SIGNAL_HLG) + { + if (m_pcReshaper->getReshapeFlag()) + { + m_pcReshaper->constructReshaperLMCS(); + m_pcEncLib->getRdCost()->updateReshapeLumaLevelToWeightTable(m_pcReshaper->getSliceReshaperInfo(), m_pcReshaper->getWeightTable(), m_pcReshaper->getCWeight()); + } + } + else + { + THROW("Reshaper for other signal currently not defined!"); + } + + m_pcReshaper->setCTUFlag(false); + + //reshape original signal + if (m_pcReshaper->getSliceReshaperInfo().getUseSliceReshaper()) + { + pic->getOrigBuf(COMPONENT_Y).rspSignal(m_pcReshaper->getFwdLUT()); + m_pcReshaper->setSrcReshaped(true); + m_pcReshaper->setRecReshaped(true); + } + } + else + { + if (!m_pcReshaper->getReshapeFlag()) + { + m_pcReshaper->setCTUFlag(false); + } + else + m_pcReshaper->setCTUFlag(true); + + m_pcReshaper->getSliceReshaperInfo().setSliceReshapeModelPresentFlag(false); + + if (m_pcCfg->getReshapeSignalType() == RESHAPE_SIGNAL_PQ) + { + m_pcEncLib->getRdCost()->restoreReshapeLumaLevelToWeightTable(); + } + else if (m_pcCfg->getReshapeSignalType() == RESHAPE_SIGNAL_SDR || m_pcCfg->getReshapeSignalType() == RESHAPE_SIGNAL_HLG) + { + int modIP = pic->getPOC() - pic->getPOC() / m_pcCfg->getReshapeCW().rspFpsToIp * m_pcCfg->getReshapeCW().rspFpsToIp; + if (m_pcReshaper->getReshapeFlag() && m_pcCfg->getReshapeCW().updateCtrl == 2 && modIP == 0) + { + m_pcReshaper->getSliceReshaperInfo().setSliceReshapeModelPresentFlag(true); + m_pcReshaper->constructReshaperLMCS(); + m_pcEncLib->getRdCost()->updateReshapeLumaLevelToWeightTable(m_pcReshaper->getSliceReshaperInfo(), m_pcReshaper->getWeightTable(), m_pcReshaper->getCWeight()); + } + } + else + { + THROW("Reshaper for other signal currently not defined!"); + } + } + + //set all necessary information in LMCS APS and picture header + picHeader->setLmcsEnabledFlag(m_pcReshaper->getSliceReshaperInfo().getUseSliceReshaper()); + picHeader->setLmcsChromaResidualScaleFlag(m_pcReshaper->getSliceReshaperInfo().getSliceReshapeChromaAdj() == 1); + if (m_pcReshaper->getSliceReshaperInfo().getSliceReshapeModelPresentFlag()) + { + int apsId = std::min<int>( 3, m_pcEncLib->getVPS() == nullptr ? 0 : m_pcEncLib->getVPS()->getGeneralLayerIdx( m_pcEncLib->getLayerId() ) ); + picHeader->setLmcsAPSId(apsId); + APS* lmcsAPS = picHeader->getLmcsAPS(); + if (lmcsAPS == nullptr) + { + ParameterSetMap<APS> *apsMap = m_pcEncLib->getApsMap(); + lmcsAPS = apsMap->getPS((apsId << NUM_APS_TYPE_LEN) + LMCS_APS); + if (lmcsAPS == NULL) + { + lmcsAPS = apsMap->allocatePS((apsId << NUM_APS_TYPE_LEN) + LMCS_APS); + lmcsAPS->setAPSId(apsId); + lmcsAPS->setAPSType(LMCS_APS); + } + picHeader->setLmcsAPS(lmcsAPS); + } + //m_pcReshaper->copySliceReshaperInfo(lmcsAPS->getReshaperAPSInfo(), m_pcReshaper->getSliceReshaperInfo()); + SliceReshapeInfo& tInfo = lmcsAPS->getReshaperAPSInfo(); + SliceReshapeInfo& sInfo = m_pcReshaper->getSliceReshaperInfo(); + tInfo.reshaperModelMaxBinIdx = sInfo.reshaperModelMaxBinIdx; + tInfo.reshaperModelMinBinIdx = sInfo.reshaperModelMinBinIdx; + memcpy(tInfo.reshaperModelBinCWDelta, sInfo.reshaperModelBinCWDelta, sizeof(int)*(PIC_CODE_CW_BINS)); + tInfo.maxNbitsNeededDeltaCW = sInfo.maxNbitsNeededDeltaCW; + tInfo.chrResScalingOffset = sInfo.chrResScalingOffset; + m_pcEncLib->getApsMap()->setChangedFlag((lmcsAPS->getAPSId() << NUM_APS_TYPE_LEN) + LMCS_APS); + } + + + if (picHeader->getLmcsEnabledFlag()) + { + int apsId = std::min<int>( 3, m_pcEncLib->getVPS() == nullptr ? 0 : m_pcEncLib->getVPS()->getGeneralLayerIdx( m_pcEncLib->getLayerId() ) ); + picHeader->setLmcsAPSId(apsId); + } + } + else + { + m_pcReshaper->setCTUFlag(false); + } +} + // ==================================================================================================================== // Public member functions // ==================================================================================================================== @@ -1344,19 +1925,20 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, std::list<PelUnitBuf*>& rcListPicYuvRecOut, bool isField, bool isTff, const InputColourSpaceConversion snr_conversion, const bool printFrameMSE , bool isEncodeLtRef + , const int picIdInGOP ) { // TODO: Split this function up. Picture* pcPic = NULL; + PicHeader* picHeader = NULL; Slice* pcSlice; OutputBitstream *pcBitstreamRedirect; pcBitstreamRedirect = new OutputBitstream; AccessUnit::iterator itLocationToPushSliceHeaderNALU; // used to store location where NALU containing slice header is to be inserted + Picture* scaledRefPic[MAX_NUM_REF] = {}; - xInitGOP(iPOCLast, iNumPicRcvd, isField - , isEncodeLtRef - ); + xInitGOP( iPOCLast, iNumPicRcvd, isField, isEncodeLtRef ); m_iNumPicCoded = 0; SEIMessages leadingSeiMessages; @@ -1364,7 +1946,6 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, SEIMessages duInfoSeiMessages; SEIMessages trailingSeiMessages; std::deque<DUData> duData; - SEIDecodingUnitInfo decodingUnitInfoSEI; EfficientFieldIRAPMapping effFieldIRAPMap; if (m_pcCfg->getEfficientFieldIRAPEnabled()) @@ -1372,14 +1953,17 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, effFieldIRAPMap.initialize(isField, m_iGopSize, iPOCLast, iNumPicRcvd, m_iLastIDR, this, m_pcCfg); } - // reset flag indicating whether pictures have been encoded - for ( int iGOPid=0; iGOPid < m_iGopSize; iGOPid++ ) + if( isField && picIdInGOP == 0 ) { - m_pcCfg->setEncodedFlag(iGOPid, false); + for( int iGOPid = 0; iGOPid < max(2, m_iGopSize); iGOPid++ ) + { + m_pcCfg->setEncodedFlag( iGOPid, false ); + } } - - for ( int iGOPid=0; iGOPid < m_iGopSize; iGOPid++ ) + for( int iGOPid = picIdInGOP; iGOPid <= picIdInGOP; iGOPid++ ) { + // reset flag indicating whether pictures have been encoded + m_pcCfg->setEncodedFlag( iGOPid, false ); if (m_pcCfg->getEfficientFieldIRAPEnabled()) { iGOPid=effFieldIRAPMap.adjustGOPid(iGOPid); @@ -1434,9 +2018,15 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, // start a new access unit: create an entry in the list of output access units AccessUnit accessUnit; + accessUnit.temporalId = m_pcCfg->getGOPEntry( iGOPid ).m_temporalId; xGetBuffer( rcListPic, rcListPicYuvRecOut, iNumPicRcvd, iTimeOffset, pcPic, pocCurr, isField ); + picHeader = pcPic->cs->picHeader; + picHeader->setSPSId( pcPic->cs->pps->getSPSId() ); + picHeader->setPPSId( pcPic->cs->pps->getPPSId() ); + picHeader->setSplitConsOverrideFlag(false); +#if ER_CHROMA_QP_WCG_PPS // th this is a hot fix for the choma qp control if( m_pcEncLib->getWCGChromaQPControl().isEnabled() && m_pcEncLib->getSwitchPOC() != -1 ) { @@ -1449,16 +2039,23 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, // replace the pps with a more appropriated one pcPic->cs->pps = pPPS; } +#endif + + // create objects based on the picture size + const int picWidth = pcPic->cs->pps->getPicWidthInLumaSamples(); + const int picHeight = pcPic->cs->pps->getPicHeightInLumaSamples(); + const int maxCUWidth = pcPic->cs->sps->getMaxCUWidth(); + const int maxCUHeight = pcPic->cs->sps->getMaxCUHeight(); + const ChromaFormat chromaFormatIDC = pcPic->cs->sps->getChromaFormatIdc(); + const int maxTotalCUDepth = pcPic->cs->sps->getMaxCodingDepth(); + + m_pcSliceEncoder->create( picWidth, picHeight, chromaFormatIDC, maxCUWidth, maxCUHeight, maxTotalCUDepth ); -#if ENABLE_SPLIT_PARALLELISM && ENABLE_WPP_PARALLELISM - pcPic->scheduler.init( pcPic->cs->pcv->heightInCtus, pcPic->cs->pcv->widthInCtus, m_pcCfg->getNumWppThreads(), m_pcCfg->getNumWppExtraLines(), m_pcCfg->getNumSplitThreads() ); -#elif ENABLE_SPLIT_PARALLELISM +#if ENABLE_SPLIT_PARALLELISM pcPic->scheduler.init( pcPic->cs->pcv->heightInCtus, pcPic->cs->pcv->widthInCtus, 1 , 0 , m_pcCfg->getNumSplitThreads() ); -#elif ENABLE_WPP_PARALLELISM - pcPic->scheduler.init( pcPic->cs->pcv->heightInCtus, pcPic->cs->pcv->widthInCtus, m_pcCfg->getNumWppThreads(), m_pcCfg->getNumWppExtraLines(), 1 ); #endif pcPic->createTempBuffers( pcPic->cs->pps->pcv->maxCUWidth ); - pcPic->cs->createCoeffs(); + pcPic->cs->createCoeffs((bool)pcPic->cs->sps->getPLTMode()); // Slice data initialization pcPic->clearSliceBuffer(); @@ -1477,15 +2074,8 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, pcPic->fieldPic = isField; #endif - int pocBits = pcSlice->getSPS()->getBitsForPOC(); - int pocMask = (1 << pocBits) - 1; - pcSlice->setLastIDR(m_iLastIDR & ~pocMask); -#if HEVC_DEPENDENT_SLICES - pcSlice->setSliceSegmentIdx(0); -#endif + pcSlice->setLastIDR(m_iLastIDR); pcSlice->setIndependentSliceIdx(0); - //set default slice level flag to the same as SPS level flag - pcSlice->setLFCrossSliceBoundaryFlag( pcSlice->getPPS()->getLoopFilterAcrossSlicesEnabledFlag() ); if(pcSlice->getSliceType()==B_SLICE&&m_pcCfg->getGOPEntry(iGOPid).m_sliceType=='P') { @@ -1497,40 +2087,12 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, } // Set the nal unit type pcSlice->setNalUnitType(getNalUnitType(pocCurr, m_iLastIDR, isField)); -#if !JVET_M0101_HLS - if(pcSlice->getTemporalLayerNonReferenceFlag()) - { - if (pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_TRAIL_R && - !(m_iGopSize == 1 && pcSlice->getSliceType() == I_SLICE)) - // Add this condition to avoid POC issues with encoder_intra_main.cfg configuration (see #1127 in bug tracker) - { - pcSlice->setNalUnitType(NAL_UNIT_CODED_SLICE_TRAIL_N); - } - if(pcSlice->getNalUnitType()==NAL_UNIT_CODED_SLICE_RADL_R) - { - pcSlice->setNalUnitType(NAL_UNIT_CODED_SLICE_RADL_N); - } - if(pcSlice->getNalUnitType()==NAL_UNIT_CODED_SLICE_RASL_R) - { - pcSlice->setNalUnitType(NAL_UNIT_CODED_SLICE_RASL_N); - } - } -#endif if (m_pcCfg->getEfficientFieldIRAPEnabled()) { -#if !JVET_M0101_HLS - if ( pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_BLA_W_LP - || pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_BLA_W_RADL - || pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_BLA_N_LP - || pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_W_RADL - || pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_N_LP - || pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_CRA ) // IRAP picture -#else if ( pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_W_RADL || pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_N_LP || pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_CRA) // IRAP picture -#endif { m_associatedIRAPType = pcSlice->getNalUnitType(); m_associatedIRAPPOC = pocCurr; @@ -1558,26 +2120,17 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, if (m_pcCfg->getUseCompositeRef() && m_picBg->getSpliceFull() && getUseLTRef()) { - m_pcEncLib->selectReferencePictureSet(pcSlice, pocCurr, iGOPid, m_bgPOC); + m_pcEncLib->selectReferencePictureList(pcSlice, pocCurr, iGOPid, m_bgPOC); } else { - m_pcEncLib->selectReferencePictureSet(pcSlice, pocCurr, iGOPid, -1); + m_pcEncLib->selectReferencePictureList(pcSlice, pocCurr, iGOPid, -1); } if (!m_pcCfg->getEfficientFieldIRAPEnabled()) { -#if !JVET_M0101_HLS - if ( pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_BLA_W_LP - || pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_BLA_W_RADL - || pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_BLA_N_LP - || pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_W_RADL - || pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_N_LP - || pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_CRA ) // IRAP picture -#else if ( pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_W_RADL || pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_N_LP || pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_CRA) // IRAP picture -#endif { m_associatedIRAPType = pcSlice->getNalUnitType(); m_associatedIRAPPOC = pocCurr; @@ -1586,185 +2139,139 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, pcSlice->setAssociatedIRAPPOC(m_associatedIRAPPOC); } - if ((pcSlice->checkThatAllRefPicsAreAvailable(rcListPic, pcSlice->getRPS(), false, m_iLastRecoveryPicPOC, m_pcCfg->getDecodingRefreshType() == 3) != 0) || (pcSlice->isIRAP()) -#if !JVET_M0101_HLS - || (m_pcCfg->getEfficientFieldIRAPEnabled() && isField && pcSlice->getAssociatedIRAPType() >= NAL_UNIT_CODED_SLICE_BLA_W_LP && pcSlice->getAssociatedIRAPType() <= NAL_UNIT_CODED_SLICE_CRA && pcSlice->getAssociatedIRAPPOC() == pcSlice->getPOC()+1) -#else - || (m_pcCfg->getEfficientFieldIRAPEnabled() && isField && pcSlice->getAssociatedIRAPType() >= NAL_UNIT_CODED_SLICE_IDR_W_RADL && pcSlice->getAssociatedIRAPType() <= NAL_UNIT_CODED_SLICE_CRA && pcSlice->getAssociatedIRAPPOC() == pcSlice->getPOC() + 1) -#endif + pcSlice->setEnableDRAPSEI(m_pcEncLib->getDependentRAPIndicationSEIEnabled()); + if (m_pcEncLib->getDependentRAPIndicationSEIEnabled()) + { + // Only mark the picture as DRAP if all of the following applies: + // 1) DRAP indication SEI messages are enabled + // 2) The current picture is not an intra picture + // 3) The current picture is in the DRAP period + // 4) The current picture is a trailing picture + pcSlice->setDRAP(m_pcEncLib->getDependentRAPIndicationSEIEnabled() && m_pcEncLib->getDrapPeriod() > 0 && !pcSlice->isIntra() && + pocCurr % m_pcEncLib->getDrapPeriod() == 0 && pocCurr > pcSlice->getAssociatedIRAPPOC()); + + if (pcSlice->isDRAP()) + { + int pocCycle = 1 << (pcSlice->getSPS()->getBitsForPOC()); + int deltaPOC = pocCurr > pcSlice->getAssociatedIRAPPOC() ? pocCurr - pcSlice->getAssociatedIRAPPOC() : pocCurr - ( pcSlice->getAssociatedIRAPPOC() & (pocCycle -1) ); + CHECK(deltaPOC > (pocCycle >> 1), "Use a greater value for POC wraparound to enable a POC distance between IRAP and DRAP of " << deltaPOC << "."); + m_latestDRAPPOC = pocCurr; + pcSlice->setTLayer(0); // Force DRAP picture to have temporal layer 0 + } + pcSlice->setLatestDRAPPOC(m_latestDRAPPOC); + pcSlice->setUseLTforDRAP(false); // When set, sets the associated IRAP as long-term in RPL0 at slice level, unless the associated IRAP is already included in RPL0 or RPL1 defined in SPS + + PicList::iterator iterPic = rcListPic.begin(); + Picture *rpcPic; + while (iterPic != rcListPic.end()) + { + rpcPic = *(iterPic++); + if ( pcSlice->isDRAP() && rpcPic->getPOC() != pocCurr ) + { + rpcPic->precedingDRAP = true; + } + else if ( !pcSlice->isDRAP() && rpcPic->getPOC() == pocCurr ) + { + rpcPic->precedingDRAP = false; + } + } + } + + if (pcSlice->checkThatAllRefPicsAreAvailable(rcListPic, pcSlice->getRPL0(), 0, false) != 0 || pcSlice->checkThatAllRefPicsAreAvailable(rcListPic, pcSlice->getRPL1(), 1, false) != 0 || + (m_pcEncLib->getDependentRAPIndicationSEIEnabled() && !pcSlice->isIRAP() && ( pcSlice->isDRAP() || !pcSlice->isPOCInRefPicList(pcSlice->getRPL0(), pcSlice->getAssociatedIRAPPOC())) ) + || ( !pcSlice->isIRAP() && pcSlice->getPic()->cs->vps && m_pcEncLib->getNumRefLayers( pcSlice->getPic()->cs->vps->getGeneralLayerIdx( m_pcEncLib->getLayerId() ) ) ) ) { - pcSlice->createExplicitReferencePictureSetFromReference(rcListPic, pcSlice->getRPS(), pcSlice->isIRAP(), m_iLastRecoveryPicPOC, m_pcCfg->getDecodingRefreshType() == 3, m_pcCfg->getEfficientFieldIRAPEnabled() - , isEncodeLtRef, m_pcCfg->getUseCompositeRef() - ); + xCreateExplicitReferencePictureSetFromReference( pcSlice, rcListPic, pcSlice->getRPL0(), pcSlice->getRPL1() ); } - pcSlice->applyReferencePictureSet(rcListPic, pcSlice->getRPS()); + pcSlice->applyReferencePictureListBasedMarking( rcListPic, pcSlice->getRPL0(), pcSlice->getRPL1(), pcSlice->getPic()->layerId ); if(pcSlice->getTLayer() > 0 -#if !JVET_M0101_HLS - && !( pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_RADL_N // Check if not a leading picture - || pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_RADL_R - || pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_RASL_N - || pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_RASL_R ) -#else && !(pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_RADL // Check if not a leading picture || pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_RASL) -#endif ) { -#if !JVET_M0101_HLS - if(pcSlice->isTemporalLayerSwitchingPoint(rcListPic) || pcSlice->getSPS()->getTemporalIdNestingFlag()) - { - if(pcSlice->getTemporalLayerNonReferenceFlag()) - { - pcSlice->setNalUnitType(NAL_UNIT_CODED_SLICE_TSA_N); - } - else - { - pcSlice->setNalUnitType(NAL_UNIT_CODED_SLICE_TSA_R); - } - } - else if(pcSlice->isStepwiseTemporalLayerSwitchingPointCandidate(rcListPic)) -#else if (pcSlice->isStepwiseTemporalLayerSwitchingPointCandidate(rcListPic)) -#endif { bool isSTSA=true; + + if( !m_pcEncLib->getVPS()->getAllIndependentLayersFlag() && m_pcEncLib->getVPS()->getGeneralLayerIdx( m_pcEncLib->getLayerId() ) ) + { + isSTSA = false; + } + for(int ii=iGOPid+1;(ii<m_pcCfg->getGOPSize() && isSTSA==true);ii++) { - int lTid= m_pcCfg->getGOPEntry(ii).m_temporalId; - if(lTid==pcSlice->getTLayer()) + int lTid = m_pcCfg->getRPLEntry(0, ii).m_temporalId; + + if (lTid == pcSlice->getTLayer()) { - const ReferencePictureSet* nRPS = pcSlice->getSPS()->getRPSList()->getReferencePictureSet(ii); - for(int jj=0;jj<nRPS->getNumberOfPictures();jj++) + const ReferencePictureList* rpl0 = pcSlice->getSPS()->getRPLList0()->getReferencePictureList(ii); + for (int jj = 0; jj < pcSlice->getRPL0()->getNumberOfActivePictures(); jj++) { - if(nRPS->getUsed(jj)) + int tPoc = m_pcCfg->getRPLEntry(0, ii).m_POC + rpl0->getRefPicIdentifier(jj); + int kk = 0; + for (kk = 0; kk<m_pcCfg->getGOPSize(); kk++) { - int tPoc=m_pcCfg->getGOPEntry(ii).m_POC+nRPS->getDeltaPOC(jj); - int kk=0; - for(kk=0;kk<m_pcCfg->getGOPSize();kk++) + if (m_pcCfg->getRPLEntry(0, kk).m_POC == tPoc) { - if(m_pcCfg->getGOPEntry(kk).m_POC==tPoc) - { - break; - } + break; } - int tTid=m_pcCfg->getGOPEntry(kk).m_temporalId; - if(tTid >= pcSlice->getTLayer()) + } + int tTid = m_pcCfg->getRPLEntry(0, kk).m_temporalId; + if (tTid >= pcSlice->getTLayer()) + { + isSTSA = false; + break; + } + } + const ReferencePictureList* rpl1 = pcSlice->getSPS()->getRPLList1()->getReferencePictureList(ii); + for (int jj = 0; jj < pcSlice->getRPL1()->getNumberOfActivePictures(); jj++) + { + int tPoc = m_pcCfg->getRPLEntry(1, ii).m_POC + rpl1->getRefPicIdentifier(jj); + int kk = 0; + for (kk = 0; kk<m_pcCfg->getGOPSize(); kk++) + { + if (m_pcCfg->getRPLEntry(1, kk).m_POC == tPoc) { - isSTSA=false; break; } } + int tTid = m_pcCfg->getRPLEntry(1, kk).m_temporalId; + if (tTid >= pcSlice->getTLayer()) + { + isSTSA = false; + break; + } } } } if(isSTSA==true) { -#if !JVET_M0101_HLS - if(pcSlice->getTemporalLayerNonReferenceFlag()) - { - pcSlice->setNalUnitType(NAL_UNIT_CODED_SLICE_STSA_N); - } - else - { - pcSlice->setNalUnitType(NAL_UNIT_CODED_SLICE_STSA_R); - } -#else pcSlice->setNalUnitType(NAL_UNIT_CODED_SLICE_STSA); -#endif } } } - if (pcSlice->getRPSidx() == -1) - arrangeLongtermPicturesInRPS(pcSlice, rcListPic); - RefPicListModification* refPicListModification = pcSlice->getRefPicListModification(); - refPicListModification->setRefPicListModificationFlagL0(0); - refPicListModification->setRefPicListModificationFlagL1(0); if (m_pcCfg->getUseCompositeRef() && getUseLTRef() && (pocCurr > getLastLTRefPoc())) { - pcSlice->setNumRefIdx(REF_PIC_LIST_0, min(m_pcCfg->getGOPEntry(iGOPid).m_numRefPicsActive + 1, pcSlice->getRPS()->getNumberOfPictures())); - pcSlice->setNumRefIdx(REF_PIC_LIST_1, min(m_pcCfg->getGOPEntry(iGOPid).m_numRefPicsActive + 1, pcSlice->getRPS()->getNumberOfPictures())); + pcSlice->setNumRefIdx(REF_PIC_LIST_0, (pcSlice->isIntra()) ? 0 : min(m_pcCfg->getRPLEntry(0, iGOPid).m_numRefPicsActive + 1, pcSlice->getRPL0()->getNumberOfActivePictures())); + pcSlice->setNumRefIdx(REF_PIC_LIST_1, (!pcSlice->isInterB()) ? 0 : min(m_pcCfg->getRPLEntry(1, iGOPid).m_numRefPicsActive + 1, pcSlice->getRPL1()->getNumberOfActivePictures())); } else { - pcSlice->setNumRefIdx(REF_PIC_LIST_0, std::min(m_pcCfg->getGOPEntry(iGOPid).m_numRefPicsActive, pcSlice->getRPS()->getNumberOfPictures())); - pcSlice->setNumRefIdx(REF_PIC_LIST_1, std::min(m_pcCfg->getGOPEntry(iGOPid).m_numRefPicsActive, pcSlice->getRPS()->getNumberOfPictures())); + pcSlice->setNumRefIdx(REF_PIC_LIST_0, (pcSlice->isIntra()) ? 0 : pcSlice->getRPL0()->getNumberOfActivePictures()); + pcSlice->setNumRefIdx(REF_PIC_LIST_1, (!pcSlice->isInterB()) ? 0 : pcSlice->getRPL1()->getNumberOfActivePictures()); } if (m_pcCfg->getUseCompositeRef() && getPrepareLTRef()) { arrangeCompositeReference(pcSlice, rcListPic, pocCurr); } // Set reference list - pcSlice->setRefPicList ( rcListPic ); - - if (m_pcCfg->getUseHashME()) - { - PicList::iterator iterPic = rcListPic.begin(); - while (iterPic != rcListPic.end()) - { - Picture* refPic = *(iterPic++); - - if (refPic->poc != pcPic->poc && refPic->referenced) - { - if (!refPic->getHashMap()->isInitial()) - { - if (refPic->getPOC() == 0) - { - Pel* picSrc = refPic->getOrigBuf().get(COMPONENT_Y).buf; - int stridePic = refPic->getOrigBuf().get(COMPONENT_Y).stride; - int picWidth = pcSlice->getSPS()->getPicWidthInLumaSamples(); - int picHeight = pcSlice->getSPS()->getPicHeightInLumaSamples(); - int blockSize = 4; - int allNum = 0; - int simpleNum = 0; - for (int j = 0; j <= picHeight - blockSize; j += blockSize) - { - for (int i = 0; i <= picWidth - blockSize; i += blockSize) - { - Pel* curBlock = picSrc + j * stridePic + i; - bool isHorSame = true; - for (int m = 0; m < blockSize&&isHorSame; m++) - { - for (int n = 1; n < blockSize&&isHorSame; n++) - { - if (curBlock[m*stridePic] != curBlock[m*stridePic + n]) - { - isHorSame = false; - } - } - } - bool isVerSame = true; - for (int m = 1; m < blockSize&&isVerSame; m++) - { - for (int n = 0; n < blockSize&&isVerSame; n++) - { - if (curBlock[n] != curBlock[m*stridePic + n]) - { - isVerSame = false; - } - } - } - allNum++; - if (isHorSame || isVerSame) - { - simpleNum++; - } - } - } + pcSlice->constructRefPicList(rcListPic); + + xPicInitHashME( pcPic, pcSlice->getPPS(), rcListPic ); - if (simpleNum < 0.3*allNum) - { - m_pcCfg->setUseHashME(false); - break; - } - } - refPic->addPictureToHashMapForInter(); - } - } - } - } if( m_pcCfg->getUseAMaxBT() ) { if( !pcSlice->isIRAP() ) @@ -1781,19 +2288,19 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, if( refLayer >= 0 && m_uiNumBlk[refLayer] != 0 ) { - pcSlice->setSplitConsOverrideFlag(true); + picHeader->setSplitConsOverrideFlag(true); double dBlkSize = sqrt( ( double ) m_uiBlkSize[refLayer] / m_uiNumBlk[refLayer] ); - if( dBlkSize < AMAXBT_TH32 ) + if( dBlkSize < AMAXBT_TH32 || pcPic->cs->sps->getCTUSize()==32 ) { - pcSlice->setMaxBTSize( 32 > MAX_BT_SIZE_INTER ? MAX_BT_SIZE_INTER : 32 ); + picHeader->setMaxBTSize( 1, 32 > MAX_BT_SIZE_INTER ? MAX_BT_SIZE_INTER : 32 ); } - else if( dBlkSize < AMAXBT_TH64 ) + else if( dBlkSize < AMAXBT_TH64 || pcPic->cs->sps->getCTUSize()==64 ) { - pcSlice->setMaxBTSize( 64 > MAX_BT_SIZE_INTER ? MAX_BT_SIZE_INTER : 64 ); + picHeader->setMaxBTSize( 1, 64 > MAX_BT_SIZE_INTER ? MAX_BT_SIZE_INTER : 64 ); } else { - pcSlice->setMaxBTSize( 128 > MAX_BT_SIZE_INTER ? MAX_BT_SIZE_INTER : 128 ); + picHeader->setMaxBTSize( 1, 128 > MAX_BT_SIZE_INTER ? MAX_BT_SIZE_INTER : 128 ); } m_uiBlkSize[refLayer] = 0; @@ -1832,12 +2339,7 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, if (pcSlice->getSliceType() == B_SLICE) { -#if X0038_LAMBDA_FROM_QP_CAPABILITY - const uint32_t uiColFromL0 = calculateCollocatedFromL0Flag(pcSlice); - pcSlice->setColFromL0Flag(uiColFromL0); -#else - pcSlice->setColFromL0Flag(1-uiColDir); -#endif + bool bLowDelay = true; int iCurrPOC = pcSlice->getPOC(); int iRefIdx = 0; @@ -1864,9 +2366,6 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, pcSlice->setCheckLDC(true); } -#if !X0038_LAMBDA_FROM_QP_CAPABILITY - uiColDir = 1-uiColDir; -#endif //------------------------------------------------------------- pcSlice->setRefPOCList(); @@ -1878,28 +2377,107 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, { if (iGOPid == 0) // first picture in SOP (i.e. forward B) { - pcSlice->setEnableTMVPFlag(0); + picHeader->setEnableTMVPFlag(0); } else { // Note: pcSlice->getColFromL0Flag() is assumed to be always 0 and getcolRefIdx() is always 0. - pcSlice->setEnableTMVPFlag(1); + picHeader->setEnableTMVPFlag(1); } } else if (m_pcEncLib->getTMVPModeId() == 1) { - pcSlice->setEnableTMVPFlag(1); + picHeader->setEnableTMVPFlag(1); } else { - pcSlice->setEnableTMVPFlag(0); + picHeader->setEnableTMVPFlag(0); + } + + // disable TMVP when current picture is the only ref picture + if (pcSlice->isIRAP() && pcSlice->getSPS()->getIBCFlag()) + { + picHeader->setEnableTMVPFlag(0); + } + + if( pcSlice->getSliceType() != I_SLICE && picHeader->getEnableTMVPFlag() ) + { + int colRefIdxL0 = -1, colRefIdxL1 = -1; + + for( int refIdx = 0; refIdx < pcSlice->getNumRefIdx( REF_PIC_LIST_0 ); refIdx++ ) + { +#if JVET_Q0487_SCALING_WINDOW_ISSUES + if( pcSlice->getRefPic( REF_PIC_LIST_0, refIdx )->isRefScaled( pcSlice->getPPS() ) == false ) +#else + int refPicWidth = pcSlice->getRefPic( REF_PIC_LIST_0, refIdx )->unscaledPic->cs->pps->getPicWidthInLumaSamples(); + int refPicHeight = pcSlice->getRefPic( REF_PIC_LIST_0, refIdx )->unscaledPic->cs->pps->getPicHeightInLumaSamples(); + int curPicWidth = pcSlice->getPPS()->getPicWidthInLumaSamples(); + int curPicHeight = pcSlice->getPPS()->getPicHeightInLumaSamples(); + + if( refPicWidth == curPicWidth && refPicHeight == curPicHeight ) +#endif + { + colRefIdxL0 = refIdx; + break; + } + } + + if( pcSlice->getSliceType() == B_SLICE ) + { + for( int refIdx = 0; refIdx < pcSlice->getNumRefIdx( REF_PIC_LIST_1 ); refIdx++ ) + { +#if JVET_Q0487_SCALING_WINDOW_ISSUES + if( pcSlice->getRefPic( REF_PIC_LIST_1, refIdx )->isRefScaled( pcSlice->getPPS() ) == false ) +#else + int refPicWidth = pcSlice->getRefPic( REF_PIC_LIST_1, refIdx )->unscaledPic->cs->pps->getPicWidthInLumaSamples(); + int refPicHeight = pcSlice->getRefPic( REF_PIC_LIST_1, refIdx )->unscaledPic->cs->pps->getPicHeightInLumaSamples(); + int curPicWidth = pcSlice->getPPS()->getPicWidthInLumaSamples(); + int curPicHeight = pcSlice->getPPS()->getPicHeightInLumaSamples(); + + if( refPicWidth == curPicWidth && refPicHeight == curPicHeight ) +#endif + { + colRefIdxL1 = refIdx; + break; + } + } + } + + if( colRefIdxL0 >= 0 && colRefIdxL1 >= 0 ) + { + const Picture *refPicL0 = pcSlice->getRefPic( REF_PIC_LIST_0, colRefIdxL0 ); + if( !refPicL0->slices.size() ) + { + refPicL0 = refPicL0->unscaledPic; + } + + const Picture *refPicL1 = pcSlice->getRefPic( REF_PIC_LIST_1, colRefIdxL1 ); + if( !refPicL1->slices.size() ) + { + refPicL1 = refPicL1->unscaledPic; + } + + const uint32_t uiColFromL0 = refPicL0->slices[0]->getSliceQp() > refPicL1->slices[0]->getSliceQp(); + pcSlice->setColFromL0Flag( uiColFromL0 ); + pcSlice->setColRefIdx( uiColFromL0 ? colRefIdxL0 : colRefIdxL1 ); + } + else if( colRefIdxL0 < 0 && colRefIdxL1 >= 0 ) + { + pcSlice->setColFromL0Flag( false ); + pcSlice->setColRefIdx( colRefIdxL1 ); + } + else if( colRefIdxL0 >= 0 && colRefIdxL1 < 0 ) + { + pcSlice->setColFromL0Flag( true ); + pcSlice->setColRefIdx( colRefIdxL0 ); + } + else + { + picHeader->setEnableTMVPFlag( 0 ); + } } - // disable TMVP when current picture is the only ref picture - if (pcSlice->isIRAP() && pcSlice->getSPS()->getIBCFlag()) - { - pcSlice->setEnableTMVPFlag(0); - } + pcSlice->scaleRefPicList( scaledRefPic, pcPic->cs->picHeader, m_pcEncLib->getApss(), picHeader->getLmcsAPS(), picHeader->getScalingListAPS(), false ); // set adaptive search range for non-intra-slices if (m_pcCfg->getUseASR() && !pcSlice->isIRAP()) @@ -1926,17 +2504,16 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, } if(bGPBcheck) { - pcSlice->setMvdL1ZeroFlag(true); + picHeader->setMvdL1ZeroFlag(true); } else { - pcSlice->setMvdL1ZeroFlag(false); + picHeader->setMvdL1ZeroFlag(false); } -#if HEVC_DEPENDENT_SLICES - pcPic->slices[pcSlice->getSliceSegmentIdx()]->setMvdL1ZeroFlag(pcSlice->getMvdL1ZeroFlag()); -#endif - if ( pcSlice->getCheckLDC() == false && pcSlice->getMvdL1ZeroFlag() == false ) + if ( pcSlice->getSPS()->getUseSMVD() && pcSlice->getCheckLDC() == false + && picHeader->getMvdL1ZeroFlag() == false + ) { int currPOC = pcSlice->getPOC(); @@ -1948,7 +2525,8 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, for ( ref = 0; ref < pcSlice->getNumRefIdx( REF_PIC_LIST_0 ); ref++ ) { int poc = pcSlice->getRefPic( REF_PIC_LIST_0, ref )->getPOC(); - if ( poc < currPOC && (poc > forwardPOC || refIdx0 == -1) ) + const bool isRefLongTerm = pcSlice->getRefPic(REF_PIC_LIST_0, ref)->longTerm; + if ( poc < currPOC && (poc > forwardPOC || refIdx0 == -1) && !isRefLongTerm ) { forwardPOC = poc; refIdx0 = ref; @@ -1959,7 +2537,8 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, for ( ref = 0; ref < pcSlice->getNumRefIdx( REF_PIC_LIST_1 ); ref++ ) { int poc = pcSlice->getRefPic( REF_PIC_LIST_1, ref )->getPOC(); - if ( poc > currPOC && (poc < backwardPOC || refIdx1 == -1) ) + const bool isRefLongTerm = pcSlice->getRefPic(REF_PIC_LIST_1, ref)->longTerm; + if ( poc > currPOC && (poc < backwardPOC || refIdx1 == -1) && !isRefLongTerm ) { backwardPOC = poc; refIdx1 = ref; @@ -1977,7 +2556,8 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, for ( ref = 0; ref < pcSlice->getNumRefIdx( REF_PIC_LIST_0 ); ref++ ) { int poc = pcSlice->getRefPic( REF_PIC_LIST_0, ref )->getPOC(); - if ( poc > currPOC && (poc < backwardPOC || refIdx0 == -1) ) + const bool isRefLongTerm = pcSlice->getRefPic(REF_PIC_LIST_0, ref)->longTerm; + if ( poc > currPOC && (poc < backwardPOC || refIdx0 == -1) && !isRefLongTerm ) { backwardPOC = poc; refIdx0 = ref; @@ -1988,7 +2568,8 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, for ( ref = 0; ref < pcSlice->getNumRefIdx( REF_PIC_LIST_1 ); ref++ ) { int poc = pcSlice->getRefPic( REF_PIC_LIST_1, ref )->getPOC(); - if ( poc < currPOC && (poc > forwardPOC || refIdx1 == -1) ) + const bool isRefLongTerm = pcSlice->getRefPic(REF_PIC_LIST_1, ref)->longTerm; + if ( poc < currPOC && (poc > forwardPOC || refIdx1 == -1) && !isRefLongTerm ) { forwardPOC = poc; refIdx1 = ref; @@ -2015,119 +2596,8 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, int actualTotalBits = 0; int estimatedBits = 0; int tmpBitsBeforeWriting = 0; - if ( m_pcCfg->getUseRateCtrl() ) // TODO: does this work with multiple slices and slice-segments? - { - int frameLevel = m_pcRateCtrl->getRCSeq()->getGOPID2Level( iGOPid ); - if ( pcPic->slices[0]->isIRAP() ) - { - frameLevel = 0; - } - m_pcRateCtrl->initRCPic( frameLevel ); - estimatedBits = m_pcRateCtrl->getRCPic()->getTargetBits(); - -#if U0132_TARGET_BITS_SATURATION - if (m_pcRateCtrl->getCpbSaturationEnabled() && frameLevel != 0) - { - int estimatedCpbFullness = m_pcRateCtrl->getCpbState() + m_pcRateCtrl->getBufferingRate(); - - // prevent overflow - if (estimatedCpbFullness - estimatedBits > (int)(m_pcRateCtrl->getCpbSize()*0.9f)) - { - estimatedBits = estimatedCpbFullness - (int)(m_pcRateCtrl->getCpbSize()*0.9f); - } - - estimatedCpbFullness -= m_pcRateCtrl->getBufferingRate(); - // prevent underflow -#if V0078_ADAPTIVE_LOWER_BOUND - if (estimatedCpbFullness - estimatedBits < m_pcRateCtrl->getRCPic()->getLowerBound()) - { - estimatedBits = std::max(200, estimatedCpbFullness - m_pcRateCtrl->getRCPic()->getLowerBound()); - } -#else - if (estimatedCpbFullness - estimatedBits < (int)(m_pcRateCtrl->getCpbSize()*0.1f)) - { - estimatedBits = std::max(200, estimatedCpbFullness - (int)(m_pcRateCtrl->getCpbSize()*0.1f)); - } -#endif - - m_pcRateCtrl->getRCPic()->setTargetBits(estimatedBits); - } -#endif - - int sliceQP = m_pcCfg->getInitialQP(); - if ( ( pcSlice->getPOC() == 0 && m_pcCfg->getInitialQP() > 0 ) || ( frameLevel == 0 && m_pcCfg->getForceIntraQP() ) ) // QP is specified - { - int NumberBFrames = ( m_pcCfg->getGOPSize() - 1 ); - double dLambda_scale = 1.0 - Clip3( 0.0, 0.5, 0.05*(double)NumberBFrames ); - double dQPFactor = 0.57*dLambda_scale; - int SHIFT_QP = 12; - int bitdepth_luma_qp_scale = - 6 - * (pcSlice->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA) - 8 - - DISTORTION_PRECISION_ADJUSTMENT(pcSlice->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA))); - double qp_temp = (double) sliceQP + bitdepth_luma_qp_scale - SHIFT_QP; - lambda = dQPFactor*pow( 2.0, qp_temp/3.0 ); - } - else if ( frameLevel == 0 ) // intra case, but use the model - { - m_pcSliceEncoder->calCostSliceI(pcPic); // TODO: This only analyses the first slice segment - what about the others? - - if ( m_pcCfg->getIntraPeriod() != 1 ) // do not refine allocated bits for all intra case - { - int bits = m_pcRateCtrl->getRCSeq()->getLeftAverageBits(); - bits = m_pcRateCtrl->getRCPic()->getRefineBitsForIntra( bits ); - -#if U0132_TARGET_BITS_SATURATION - if (m_pcRateCtrl->getCpbSaturationEnabled() ) - { - int estimatedCpbFullness = m_pcRateCtrl->getCpbState() + m_pcRateCtrl->getBufferingRate(); - - // prevent overflow - if (estimatedCpbFullness - bits > (int)(m_pcRateCtrl->getCpbSize()*0.9f)) - { - bits = estimatedCpbFullness - (int)(m_pcRateCtrl->getCpbSize()*0.9f); - } - - estimatedCpbFullness -= m_pcRateCtrl->getBufferingRate(); - // prevent underflow -#if V0078_ADAPTIVE_LOWER_BOUND - if (estimatedCpbFullness - bits < m_pcRateCtrl->getRCPic()->getLowerBound()) - { - bits = estimatedCpbFullness - m_pcRateCtrl->getRCPic()->getLowerBound(); - } -#else - if (estimatedCpbFullness - bits < (int)(m_pcRateCtrl->getCpbSize()*0.1f)) - { - bits = estimatedCpbFullness - (int)(m_pcRateCtrl->getCpbSize()*0.1f); - } -#endif - } -#endif - - if ( bits < 200 ) - { - bits = 200; - } - m_pcRateCtrl->getRCPic()->setTargetBits( bits ); - } - - list<EncRCPic*> listPreviousPicture = m_pcRateCtrl->getPicList(); - m_pcRateCtrl->getRCPic()->getLCUInitTargetBits(); - lambda = m_pcRateCtrl->getRCPic()->estimatePicLambda( listPreviousPicture, pcSlice->isIRAP()); - sliceQP = m_pcRateCtrl->getRCPic()->estimatePicQP( lambda, listPreviousPicture ); - } - else // normal case - { - list<EncRCPic*> listPreviousPicture = m_pcRateCtrl->getPicList(); - lambda = m_pcRateCtrl->getRCPic()->estimatePicLambda( listPreviousPicture, pcSlice->isIRAP()); - sliceQP = m_pcRateCtrl->getRCPic()->estimatePicQP( lambda, listPreviousPicture ); - } - sliceQP = Clip3( -pcSlice->getSPS()->getQpBDOffset(CHANNEL_TYPE_LUMA), MAX_QP, sliceQP ); - m_pcRateCtrl->getRCPic()->setPicEstQP( sliceQP ); - - m_pcSliceEncoder->resetQP( pcPic, sliceQP, lambda ); - } + xPicInitRateControl(estimatedBits, iGOPid, lambda, pcPic, pcSlice); uint32_t uiNumSliceSegments = 1; @@ -2137,27 +2607,19 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, // Allocate some coders, now the number of tiles are known. const uint32_t numberOfCtusInFrame = pcPic->cs->pcv->sizeInCtus; -#if HEVC_TILES_WPP - const int numSubstreamsColumns = (pcSlice->getPPS()->getNumTileColumnsMinus1() + 1); - const int numSubstreamRows = pcSlice->getPPS()->getEntropyCodingSyncEnabledFlag() ? pcPic->cs->pcv->heightInCtus : (pcSlice->getPPS()->getNumTileRowsMinus1() + 1); - const int numSubstreams = numSubstreamRows * numSubstreamsColumns; -#else - const int numSubstreams = 1; -#endif + const int numSubstreamsColumns = pcSlice->getPPS()->getNumTileColumns(); + const int numSubstreamRows = pcSlice->getPPS()->getEntropyCodingSyncEnabledFlag() ? pcPic->cs->pcv->heightInCtus : (pcSlice->getPPS()->getNumTileRows()); + const int numSubstreams = std::max<int> (numSubstreamRows * numSubstreamsColumns, (int) pcPic->cs->pps->getNumSlicesInPic()); std::vector<OutputBitstream> substreamsOut(numSubstreams); #if ENABLE_QPA pcPic->m_uEnerHpCtu.resize (numberOfCtusInFrame); pcPic->m_iOffsetCtu.resize (numberOfCtusInFrame); #if ENABLE_QPA_SUB_CTU - if (pcSlice->getPPS()->getUseDQP() && pcSlice->getPPS()->getCuQpDeltaSubdiv() > 0) + if (pcSlice->getPPS()->getUseDQP() && pcSlice->getCuQpDeltaSubdiv() > 0) { const PreCalcValues &pcv = *pcPic->cs->pcv; -#if MAX_TB_SIZE_SIGNALLING - const unsigned mtsLog2 = (unsigned)g_aucLog2[std::min (pcPic->cs->sps->getMaxTbSize(), pcv.maxCUWidth)]; -#else - const unsigned mtsLog2 = (unsigned)g_aucLog2[std::min<uint32_t> (MAX_TB_SIZEY, pcv.maxCUWidth)]; -#endif + const unsigned mtsLog2 = (unsigned)floorLog2(std::min (pcPic->cs->sps->getMaxTbSize(), pcv.maxCUWidth)); pcPic->m_subCtuQP.resize ((pcv.maxCUWidth >> mtsLog2) * (pcv.maxCUHeight >> mtsLog2)); } #endif @@ -2172,9 +2634,8 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, if( pcSlice->getSPS()->getALFEnabledFlag() ) { pcPic->resizeAlfCtuEnableFlag( numberOfCtusInFrame ); - // reset the APS ALF parameters - AlfSliceParam newALFParam; - pcSlice->getAPS()->setAlfAPSParam(newALFParam); + pcPic->resizeAlfCtuAlternative( numberOfCtusInFrame ); + pcPic->resizeAlfCtbFilterIndex(numberOfCtusInFrame); } bool decPic = false; @@ -2192,147 +2653,53 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, // overwrite chroma qp offset for dual tree pcSlice->setSliceChromaQpDelta(COMPONENT_Cb, m_pcCfg->getChromaCbQpOffsetDualTree()); pcSlice->setSliceChromaQpDelta(COMPONENT_Cr, m_pcCfg->getChromaCrQpOffsetDualTree()); - m_pcSliceEncoder->setUpLambda(pcSlice, pcSlice->getLambdas()[0], pcSlice->getSliceQp()); - } - if (pcSlice->getSPS()->getUseReshaper()) - { - m_pcReshaper->getReshapeCW()->rspTid = pcSlice->getTLayer() + (pcSlice->isIntra() ? 0 : 1); - m_pcReshaper->getReshapeCW()->rspSliceQP = pcSlice->getSliceQp(); - - m_pcReshaper->setSrcReshaped(false); - m_pcReshaper->setRecReshaped(true); - - if (m_pcCfg->getReshapeSignalType() == RESHAPE_SIGNAL_PQ) - { - m_pcReshaper->preAnalyzerHDR(pcPic, pcSlice->getSliceType(), m_pcCfg->getReshapeCW(), m_pcCfg->getDualITree()); - } - else if (m_pcCfg->getReshapeSignalType() == RESHAPE_SIGNAL_SDR) - { - m_pcReshaper->preAnalyzerSDR(pcPic, pcSlice->getSliceType(), m_pcCfg->getReshapeCW(), m_pcCfg->getDualITree()); - } - else + if (pcSlice->getSPS()->getJointCbCrEnabledFlag()) { - THROW("Reshaper for other signal currently not defined!"); + pcSlice->setSliceChromaQpDelta(JOINT_CbCr, m_pcCfg->getChromaCbCrQpOffsetDualTree()); } + m_pcSliceEncoder->setUpLambda(pcSlice, pcSlice->getLambdas()[0], pcSlice->getSliceQp()); + } - if (pcSlice->getSliceType() == I_SLICE ) - { - if (m_pcCfg->getReshapeSignalType() == RESHAPE_SIGNAL_PQ) - { - m_pcReshaper->initLUTfromdQPModel(); - m_pcEncLib->getRdCost()->updateReshapeLumaLevelToWeightTableChromaMD(m_pcReshaper->getInvLUT()); - } - else if (m_pcCfg->getReshapeSignalType() == RESHAPE_SIGNAL_SDR) - { - if (m_pcReshaper->getReshapeFlag()) - { - m_pcReshaper->constructReshaperSDR(); - m_pcEncLib->getRdCost()->updateReshapeLumaLevelToWeightTable(m_pcReshaper->getSliceReshaperInfo(), m_pcReshaper->getWeightTable(), m_pcReshaper->getCWeight()); - } - } - else - { - THROW("Reshaper for other signal currently not defined!"); - } - - m_pcReshaper->setCTUFlag(false); + xPicInitLMCS(pcPic, picHeader, pcSlice); - //reshape original signal - if (m_pcReshaper->getSliceReshaperInfo().getUseSliceReshaper()) - { - pcPic->getOrigBuf(COMPONENT_Y).rspSignal(m_pcReshaper->getFwdLUT()); - m_pcReshaper->setSrcReshaped(true); - m_pcReshaper->setRecReshaped(true); - } - } - else - { - if (!m_pcReshaper->getReshapeFlag()) - { - m_pcReshaper->setCTUFlag(false); - } - else - m_pcReshaper->setCTUFlag(true); + if( pcSlice->getSPS()->getScalingListFlag() && m_pcCfg->getUseScalingListId() == SCALING_LIST_FILE_READ ) + { + picHeader->setScalingListPresentFlag( true ); - m_pcReshaper->getSliceReshaperInfo().setSliceReshapeModelPresentFlag(false); + int apsId = std::min<int>( 7, m_pcEncLib->getVPS() == nullptr ? 0 : m_pcEncLib->getVPS()->getGeneralLayerIdx( m_pcEncLib->getLayerId() ) ); + picHeader->setScalingListAPSId( apsId ); - if (m_pcCfg->getReshapeSignalType() == RESHAPE_SIGNAL_PQ) - { - m_pcEncLib->getRdCost()->restoreReshapeLumaLevelToWeightTable(); - } - else if (m_pcCfg->getReshapeSignalType() == RESHAPE_SIGNAL_SDR) - { - int modIP = pcPic->getPOC() - pcPic->getPOC() / m_pcCfg->getReshapeCW().rspFpsToIp * m_pcCfg->getReshapeCW().rspFpsToIp; - if (m_pcReshaper->getReshapeFlag() && m_pcCfg->getReshapeCW().rspIntraPeriod == -1 && modIP == 0) // for LDB, update reshaping curve every second - { - m_pcReshaper->getSliceReshaperInfo().setSliceReshapeModelPresentFlag(true); - m_pcReshaper->constructReshaperSDR(); - m_pcEncLib->getRdCost()->updateReshapeLumaLevelToWeightTable(m_pcReshaper->getSliceReshaperInfo(), m_pcReshaper->getWeightTable(), m_pcReshaper->getCWeight()); - } - } - else - { - THROW("Reshaper for other signal currently not defined!"); - } - } + ParameterSetMap<APS> *apsMap = m_pcEncLib->getApsMap(); + APS* scalingListAPS = apsMap->getPS( ( apsId << NUM_APS_TYPE_LEN ) + SCALING_LIST_APS ); + assert( scalingListAPS != NULL ); + picHeader->setScalingListAPS( scalingListAPS ); + } - m_pcReshaper->copySliceReshaperInfo(pcSlice->getReshapeInfo(), m_pcReshaper->getSliceReshaperInfo()); + pcPic->cs->picHeader->setPic(pcPic); + pcPic->cs->picHeader->setValid(); + if(pcPic->cs->sps->getFpelMmvdEnabledFlag()) + { + // cannot set pic_fpel_mmvd_enabled_flag at slice level - need new picture-level version of checkDisFracMmvd algorithm? + // m_pcSliceEncoder->checkDisFracMmvd( pcPic, 0, numberOfCtusInFrame ); + bool useIntegerMVD = (pcPic->lwidth()*pcPic->lheight() > 1920 * 1080); + pcPic->cs->picHeader->setDisFracMMVD( useIntegerMVD ); } - else + if (pcSlice->getSPS()->getJointCbCrEnabledFlag()) { - m_pcReshaper->setCTUFlag(false); + m_pcSliceEncoder->setJointCbCrModes(*pcPic->cs, Position(0, 0), pcPic->cs->area.lumaSize()); } - if( encPic ) // now compress (trial encode) the various slice segments (slices, and dependent slices) { DTRACE_UPDATE( g_trace_ctx, ( std::make_pair( "poc", pocCurr ) ) ); - pcSlice->setSliceCurStartCtuTsAddr( 0 ); -#if HEVC_DEPENDENT_SLICES - pcSlice->setSliceSegmentCurStartCtuTsAddr( 0 ); -#endif - - for(uint32_t nextCtuTsAddr = 0; nextCtuTsAddr < numberOfCtusInFrame; ) + for(uint32_t sliceIdx = 0; sliceIdx < pcPic->cs->pps->getNumSlicesInPic(); sliceIdx++ ) { + pcSlice->setSliceMap( pcPic->cs->pps->getSliceMap( sliceIdx ) ); m_pcSliceEncoder->precompressSlice( pcPic ); m_pcSliceEncoder->compressSlice ( pcPic, false, false ); -#if HEVC_DEPENDENT_SLICES - const uint32_t curSliceSegmentEnd = pcSlice->getSliceSegmentCurEndCtuTsAddr(); - if (curSliceSegmentEnd < numberOfCtusInFrame) - { - const bool bNextSegmentIsDependentSlice = curSliceSegmentEnd < pcSlice->getSliceCurEndCtuTsAddr(); - const uint32_t sliceBits = pcSlice->getSliceBits(); - uint32_t independentSliceIdx = pcSlice->getIndependentSliceIdx(); - pcPic->allocateNewSlice(); - // prepare for next slice - m_pcSliceEncoder->setSliceSegmentIdx ( uiNumSliceSegments ); - pcSlice = pcPic->slices [ uiNumSliceSegments ]; - CHECK(!(pcSlice->getPPS()!=0), "Unspecified error"); - pcSlice->copySliceInfo ( pcPic->slices[uiNumSliceSegments-1] ); - pcSlice->setSliceSegmentIdx ( uiNumSliceSegments ); - if (bNextSegmentIsDependentSlice) - { - pcSlice->setSliceBits(sliceBits); - } - else - { - pcSlice->setSliceCurStartCtuTsAddr ( curSliceSegmentEnd ); - pcSlice->setSliceBits(0); - independentSliceIdx ++; - } - pcSlice->setIndependentSliceIdx( independentSliceIdx ); - pcSlice->setDependentSliceSegmentFlag( bNextSegmentIsDependentSlice ); - pcSlice->setSliceSegmentCurStartCtuTsAddr ( curSliceSegmentEnd ); - // TODO: optimise cabac_init during compress slice to improve multi-slice operation - // pcSlice->setEncCABACTableIdx(m_pcSliceEncoder->getEncCABACTableIdx()); - uiNumSliceSegments ++; - } - nextCtuTsAddr = curSliceSegmentEnd; -#else - const uint32_t curSliceEnd = pcSlice->getSliceCurEndCtuTsAddr(); - if(curSliceEnd < numberOfCtusInFrame) + if(sliceIdx < pcPic->cs->pps->getNumSlicesInPic() - 1) { uint32_t independentSliceIdx = pcSlice->getIndependentSliceIdx(); pcPic->allocateNewSlice(); @@ -2341,14 +2708,11 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, pcSlice = pcPic->slices[uiNumSliceSegments]; CHECK(!(pcSlice->getPPS() != 0), "Unspecified error"); pcSlice->copySliceInfo(pcPic->slices[uiNumSliceSegments - 1]); - pcSlice->setSliceCurStartCtuTsAddr(curSliceEnd); pcSlice->setSliceBits(0); independentSliceIdx++; pcSlice->setIndependentSliceIdx(independentSliceIdx); uiNumSliceSegments++; } - nextCtuTsAddr = curSliceEnd; -#endif } duData.clear(); @@ -2356,8 +2720,13 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, CodingStructure& cs = *pcPic->cs; pcSlice = pcPic->slices[0]; - if (pcSlice->getSPS()->getUseReshaper() && m_pcReshaper->getSliceReshaperInfo().getUseSliceReshaper()) + if (pcSlice->getSPS()->getUseLmcs() && m_pcReshaper->getSliceReshaperInfo().getUseSliceReshaper()) { + picHeader->setLmcsEnabledFlag(true); + + int apsId = std::min<int>( 3, m_pcEncLib->getVPS() == nullptr ? 0 : m_pcEncLib->getVPS()->getGeneralLayerIdx( m_pcEncLib->getLayerId() ) ); + + picHeader->setLmcsAPSId(apsId); CHECK((m_pcReshaper->getRecReshaped() == false), "Rec picture is not reshaped!"); pcPic->getRecoBuf(COMPONENT_Y).rspSignal(m_pcReshaper->getInvLUT()); m_pcReshaper->setRecReshaped(false); @@ -2365,6 +2734,34 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, pcPic->getOrigBuf().copyFrom(pcPic->getTrueOrigBuf()); } + // create SAO object based on the picture size + if( pcSlice->getSPS()->getSAOEnabledFlag() ) + { + const uint32_t widthInCtus = ( picWidth + maxCUWidth - 1 ) / maxCUWidth; + const uint32_t heightInCtus = ( picHeight + maxCUHeight - 1 ) / maxCUHeight; + const uint32_t numCtuInFrame = widthInCtus * heightInCtus; + + const uint32_t log2SaoOffsetScaleLuma = pcPic->cs->slice->getPPS()->getPpsRangeExtension().getLog2SaoOffsetScale( CHANNEL_TYPE_LUMA ); + const uint32_t log2SaoOffsetScaleChroma = pcPic->cs->slice->getPPS()->getPpsRangeExtension().getLog2SaoOffsetScale( CHANNEL_TYPE_CHROMA ); + + m_pcSAO->create( picWidth, picHeight, chromaFormatIDC, maxCUWidth, maxCUHeight, maxTotalCUDepth, log2SaoOffsetScaleLuma, log2SaoOffsetScaleChroma ); + m_pcSAO->destroyEncData(); + m_pcSAO->createEncData( m_pcCfg->getSaoCtuBoundary(), numCtuInFrame ); + m_pcSAO->setReshaper( m_pcReshaper ); + } + + if( !m_pcEncLib->getLoopFilterDisable() ) + { + m_pcEncLib->getLoopFilter()->initEncPicYuvBuffer( chromaFormatIDC, picWidth, picHeight ); + } + + if( pcSlice->getSPS()->getScalingListFlag() && m_pcCfg->getUseScalingListId() == SCALING_LIST_FILE_READ ) + { + picHeader->setScalingListPresentFlag(true); + int apsId = 0; + picHeader->setScalingListAPSId( apsId ); + } + // SAO parameter estimation using non-deblocked pixels for CTU bottom and right boundary areas if( pcSlice->getSPS()->getSAOEnabledFlag() && m_pcCfg->getSaoCtuBoundary() ) { @@ -2402,11 +2799,7 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, #if ENABLE_QPA (m_pcCfg->getUsePerceptQPA() && !m_pcCfg->getUseRateCtrl() && pcSlice->getPPS()->getUseDQP() ? m_pcEncLib->getRdCost (PARL_PARAM0 (0))->getChromaWeight() : 0.0), #endif -#if K0238_SAO_GREEDY_MERGE_ENCODING m_pcCfg->getTestSAODisableAtPictureLevel(), m_pcCfg->getSaoEncodingRate(), m_pcCfg->getSaoEncodingRateChroma(), m_pcCfg->getSaoCtuBoundary(), m_pcCfg->getSaoGreedyMergeEnc() ); -#else - m_pcCfg->getTestSAODisableAtPictureLevel(), m_pcCfg->getSaoEncodingRate(), m_pcCfg->getSaoEncodingRateChroma(), m_pcCfg->getSaoCtuBoundary() ); -#endif //assign SAO slice header for(int s=0; s< uiNumSliceSegments; s++) { @@ -2418,16 +2811,38 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, if( pcSlice->getSPS()->getALFEnabledFlag() ) { - AlfSliceParam alfSliceParam; - m_pcALF->initCABACEstimator( m_pcEncLib->getCABACEncoder(), m_pcEncLib->getCtxCache(), pcSlice ); + m_pcALF->destroy(); + m_pcALF->create( m_pcCfg, picWidth, picHeight, chromaFormatIDC, maxCUWidth, maxCUHeight, maxTotalCUDepth, m_pcCfg->getBitDepth(), m_pcCfg->getInputBitDepth() ); - m_pcALF->ALFProcess( cs, pcSlice->getLambdas(), + for (int s = 0; s < uiNumSliceSegments; s++) + { + pcPic->slices[s]->setTileGroupAlfEnabledFlag(COMPONENT_Y, false); + } + m_pcALF->initCABACEstimator(m_pcEncLib->getCABACEncoder(), m_pcEncLib->getCtxCache(), pcSlice, m_pcEncLib->getApsMap()); + m_pcALF->ALFProcess(cs, pcSlice->getLambdas() #if ENABLE_QPA - (m_pcCfg->getUsePerceptQPA() && !m_pcCfg->getUseRateCtrl() && pcSlice->getPPS()->getUseDQP() ? m_pcEncLib->getRdCost (PARL_PARAM0 (0))->getChromaWeight() : 0.0), + , (m_pcCfg->getUsePerceptQPA() && !m_pcCfg->getUseRateCtrl() && pcSlice->getPPS()->getUseDQP() ? m_pcEncLib->getRdCost(PARL_PARAM0(0))->getChromaWeight() : 0.0) #endif - alfSliceParam ); + ); + //assign ALF slice header - pcPic->cs->aps->setAlfAPSParam(alfSliceParam); + for (int s = 0; s < uiNumSliceSegments; s++) + { + pcPic->slices[s]->setTileGroupAlfEnabledFlag(COMPONENT_Y, cs.slice->getTileGroupAlfEnabledFlag(COMPONENT_Y)); + pcPic->slices[s]->setTileGroupAlfEnabledFlag(COMPONENT_Cb, cs.slice->getTileGroupAlfEnabledFlag(COMPONENT_Cb)); + pcPic->slices[s]->setTileGroupAlfEnabledFlag(COMPONENT_Cr, cs.slice->getTileGroupAlfEnabledFlag(COMPONENT_Cr)); + if (pcPic->slices[s]->getTileGroupAlfEnabledFlag(COMPONENT_Y)) + { + pcPic->slices[s]->setTileGroupNumAps(cs.slice->getTileGroupNumAps()); + pcPic->slices[s]->setAlfAPSs(cs.slice->getTileGroupApsIdLuma()); + } + else + { + pcPic->slices[s]->setTileGroupNumAps(0); + } + pcPic->slices[s]->setAlfAPSs(cs.slice->getAlfAPSs()); + pcPic->slices[s]->setTileGroupApsIdChroma(cs.slice->getTileGroupApsIdChroma()); + } } if (m_pcCfg->getUseCompositeRef() && getPrepareLTRef()) { @@ -2455,6 +2870,8 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, } } + pcSlice->freeScaledRefPicList( scaledRefPic ); + if( m_pcCfg->getUseAMaxBT() ) { for( const CodingUnit *cu : pcPic->cs->cus ) @@ -2474,20 +2891,18 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, /////////////////////////////////////////////////////////////////////////////////////////////////// File writing // write various parameter sets -#if JCTVC_Y0038_PARAMS bool writePS = m_bSeqFirst || (m_pcCfg->getReWriteParamSets() && (pcSlice->isIRAP())); if (writePS) { m_pcEncLib->setParamSetChanged(pcSlice->getSPS()->getSPSId(), pcSlice->getPPS()->getPPSId()); } - actualTotalBits += xWriteParameterSets(accessUnit, pcSlice, writePS); - if (writePS) -#else - actualTotalBits += xWriteParameterSets( accessUnit, pcSlice, m_bSeqFirst ); + int layerIdx = m_pcEncLib->getVPS() == nullptr ? 0 : m_pcEncLib->getVPS()->getGeneralLayerIdx( m_pcEncLib->getLayerId() ); - if ( m_bSeqFirst ) -#endif + // it is assumed that layerIdx equal to 0 is always present + actualTotalBits += xWriteParameterSets( accessUnit, pcSlice, writePS && !layerIdx ); + + if (writePS) { // create prefix SEI messages at the beginning of the sequence CHECK(!(leadingSeiMessages.empty()), "Unspecified error"); @@ -2495,19 +2910,68 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, m_bSeqFirst = false; } - if (m_pcCfg->getAccessUnitDelimiter()) + + // it is assumed that layerIdx equal to 0 is always present + if( m_pcCfg->getAccessUnitDelimiter() && !layerIdx ) { xWriteAccessUnitDelimiter(accessUnit, pcSlice); } - if (pcSlice->getSPS()->getALFEnabledFlag() && pcSlice->getAPS()->getAlfAPSParam().enabledFlag[COMPONENT_Y]) + + //send LMCS APS when LMCSModel is updated. It can be updated even current slice does not enable reshaper. + //For example, in RA, update is on intra slice, but intra slice may not use reshaper + if (pcSlice->getSPS()->getUseLmcs()) { - pcSlice->setTileGroupAlfEnabledFlag(true); - pcSlice->setAPSId(pcSlice->getAPS()->getAPSId()); - actualTotalBits += xWriteAPS(accessUnit, pcSlice->getAPS()); + //only 1 LMCS data for 1 picture + int apsId = picHeader->getLmcsAPSId(); + ParameterSetMap<APS> *apsMap = m_pcEncLib->getApsMap(); + APS* aps = apsMap->getPS((apsId << NUM_APS_TYPE_LEN) + LMCS_APS); + bool writeAPS = aps && apsMap->getChangedFlag((apsId << NUM_APS_TYPE_LEN) + LMCS_APS); + if (writeAPS) + { + actualTotalBits += xWriteAPS( accessUnit, aps, m_pcEncLib->getLayerId(), true ); + apsMap->clearChangedFlag((apsId << NUM_APS_TYPE_LEN) + LMCS_APS); + CHECK(aps != picHeader->getLmcsAPS(), "Wrong LMCS APS pointer in compressGOP"); + } } - else + + // only 1 SCALING LIST data for 1 picture + if( pcSlice->getSPS()->getScalingListFlag() && ( m_pcCfg->getUseScalingListId() == SCALING_LIST_FILE_READ ) ) + { + int apsId = picHeader->getScalingListAPSId(); + ParameterSetMap<APS> *apsMap = m_pcEncLib->getApsMap(); + APS* aps = apsMap->getPS( ( apsId << NUM_APS_TYPE_LEN ) + SCALING_LIST_APS ); + bool writeAPS = aps && apsMap->getChangedFlag( ( apsId << NUM_APS_TYPE_LEN ) + SCALING_LIST_APS ); + if( writeAPS ) + { + actualTotalBits += xWriteAPS( accessUnit, aps, m_pcEncLib->getLayerId(), true ); + apsMap->clearChangedFlag( ( apsId << NUM_APS_TYPE_LEN ) + SCALING_LIST_APS ); + CHECK( aps != picHeader->getScalingListAPS(), "Wrong SCALING LIST APS pointer in compressGOP" ); + } + } + + if (pcSlice->getSPS()->getALFEnabledFlag() && pcSlice->getTileGroupAlfEnabledFlag(COMPONENT_Y)) { - pcSlice->setTileGroupAlfEnabledFlag(false); + for (int apsId = 0; apsId < ALF_CTB_MAX_NUM_APS; apsId++) + { + ParameterSetMap<APS> *apsMap = m_pcEncLib->getApsMap(); + + APS* aps = apsMap->getPS((apsId << NUM_APS_TYPE_LEN) + ALF_APS); + bool writeAPS = aps && apsMap->getChangedFlag((apsId << NUM_APS_TYPE_LEN) + ALF_APS); + if (!aps && pcSlice->getAlfAPSs() && pcSlice->getAlfAPSs()[apsId]) + { + writeAPS = true; + aps = pcSlice->getAlfAPSs()[apsId]; // use asp from slice header + *apsMap->allocatePS(apsId) = *aps; //allocate and cpy + m_pcALF->setApsIdStart( apsId ); + } + + if (writeAPS ) + { + actualTotalBits += xWriteAPS( accessUnit, aps, m_pcEncLib->getLayerId(), true ); + apsMap->clearChangedFlag((apsId << NUM_APS_TYPE_LEN) + ALF_APS); + CHECK(aps != pcSlice->getAlfAPSs()[apsId], "Wrong APS pointer in compressGOP"); + } + } } // reset presence of BP SEI indication @@ -2519,11 +2983,7 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, std::size_t binCountsInNalUnits = 0; // For implementation of cabac_zero_word stuffing (section 7.4.3.10) std::size_t numBytesInVclNalUnits = 0; // For implementation of cabac_zero_word stuffing (section 7.4.3.10) -#if HEVC_DEPENDENT_SLICES - for( uint32_t sliceSegmentStartCtuTsAddr = 0, sliceSegmentIdxCount=0; sliceSegmentStartCtuTsAddr < numberOfCtusInFrame; sliceSegmentIdxCount++, sliceSegmentStartCtuTsAddr=pcSlice->getSliceSegmentCurEndCtuTsAddr() ) -#else - for(uint32_t sliceSegmentStartCtuTsAddr = 0, sliceSegmentIdxCount = 0; sliceSegmentStartCtuTsAddr < numberOfCtusInFrame; sliceSegmentIdxCount++, sliceSegmentStartCtuTsAddr = pcSlice->getSliceCurEndCtuTsAddr()) -#endif + for(uint32_t sliceSegmentIdxCount = 0; sliceSegmentIdxCount < pcPic->cs->pps->getNumSlicesInPic(); sliceSegmentIdxCount++ ) { pcSlice = pcPic->slices[sliceSegmentIdxCount]; if(sliceSegmentIdxCount > 0 && pcSlice->getSliceType()!= I_SLICE) @@ -2532,40 +2992,102 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, } m_pcSliceEncoder->setSliceSegmentIdx(sliceSegmentIdxCount); - pcSlice->setRPS (pcPic->slices[0]->getRPS()); - pcSlice->setRPSidx(pcPic->slices[0]->getRPSidx()); - - for ( uint32_t ui = 0 ; ui < numSubstreams; ui++ ) - { - substreamsOut[ui].clear(); - } - - /* start slice NALunit */ - OutputNALUnit nalu( pcSlice->getNalUnitType(), pcSlice->getTLayer() ); - m_HLSWriter->setBitstream( &nalu.m_Bitstream ); + pcSlice->setRPL0(pcPic->slices[0]->getRPL0()); + pcSlice->setRPL1(pcPic->slices[0]->getRPL1()); + pcSlice->setRPL0idx(pcPic->slices[0]->getRPL0idx()); + pcSlice->setRPL1idx(pcPic->slices[0]->getRPL1idx()); - pcSlice->setNoRaslOutputFlag(false); + pcSlice->setNoIncorrectPicOutputFlag(false); if (pcSlice->isIRAP()) { -#if !JVET_M0101_HLS - if (pcSlice->getNalUnitType() >= NAL_UNIT_CODED_SLICE_BLA_W_LP && pcSlice->getNalUnitType() <= NAL_UNIT_CODED_SLICE_IDR_N_LP) -#else if (pcSlice->getNalUnitType() >= NAL_UNIT_CODED_SLICE_IDR_W_RADL && pcSlice->getNalUnitType() <= NAL_UNIT_CODED_SLICE_IDR_N_LP) -#endif { - pcSlice->setNoRaslOutputFlag(true); + pcSlice->setNoIncorrectPicOutputFlag(true); } //the inference for NoOutputPriorPicsFlag // KJS: This cannot happen at the encoder - if (!m_bFirst && pcSlice->isIRAP() && pcSlice->getNoRaslOutputFlag()) + if (!m_bFirst && (pcSlice->isIRAP() || pcSlice->getNalUnitType() >= NAL_UNIT_CODED_SLICE_GDR) && pcSlice->getNoIncorrectPicOutputFlag()) { - if (pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_CRA) + if (pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_CRA || pcSlice->getNalUnitType() >= NAL_UNIT_CODED_SLICE_GDR) { - pcSlice->setNoOutputPriorPicsFlag(true); + picHeader->setNoOutputOfPriorPicsFlag(true); } } } + // code picture header before first slice + if(sliceSegmentIdxCount == 0) + { + // code RPL in picture header or slice headers + if( !m_pcCfg->getSliceLevelRpl() && (!pcSlice->getIdrPicFlag() || pcSlice->getSPS()->getIDRRefParamListPresent()) ) + { + picHeader->setPicRplPresentFlag(true); + picHeader->setRPL0idx(pcSlice->getRPL0idx()); + picHeader->setRPL1idx(pcSlice->getRPL1idx()); + picHeader->setRPL0(pcSlice->getRPL0()); + picHeader->setRPL1(pcSlice->getRPL1()); + *picHeader->getLocalRPL0() = *pcSlice->getLocalRPL0(); + *picHeader->getLocalRPL1() = *pcSlice->getLocalRPL1(); + } + else { + picHeader->setPicRplPresentFlag(false); + } + + // code DBLK in picture header or slice headers + if( !m_pcCfg->getSliceLevelDblk() ) + { + picHeader->setDeblockingFilterOverridePresentFlag( true ); + picHeader->setDeblockingFilterOverrideFlag ( pcSlice->getDeblockingFilterOverrideFlag() ); + picHeader->setDeblockingFilterDisable ( pcSlice->getDeblockingFilterDisable() ); + picHeader->setDeblockingFilterBetaOffsetDiv2 ( pcSlice->getDeblockingFilterBetaOffsetDiv2() ); + picHeader->setDeblockingFilterTcOffsetDiv2 ( pcSlice->getDeblockingFilterTcOffsetDiv2() ); + } + else { + picHeader->setDeblockingFilterOverridePresentFlag( false ); + } + + // code SAO parameters in picture header or slice headers + if( !m_pcCfg->getSliceLevelSao() ) + { + picHeader->setSaoEnabledPresentFlag( true ); + picHeader->setSaoEnabledFlag(CHANNEL_TYPE_LUMA, pcSlice->getSaoEnabledFlag(CHANNEL_TYPE_LUMA )); + picHeader->setSaoEnabledFlag(CHANNEL_TYPE_CHROMA, pcSlice->getSaoEnabledFlag(CHANNEL_TYPE_CHROMA)); + } + else { + picHeader->setSaoEnabledPresentFlag( false ); + } + + // code ALF parameters in picture header or slice headers + if( !m_pcCfg->getSliceLevelAlf() ) + { + picHeader->setAlfEnabledPresentFlag( true ); + picHeader->setAlfEnabledFlag(COMPONENT_Y, pcSlice->getTileGroupAlfEnabledFlag(COMPONENT_Y ) ); + picHeader->setAlfEnabledFlag(COMPONENT_Cb, pcSlice->getTileGroupAlfEnabledFlag(COMPONENT_Cb) ); + picHeader->setAlfEnabledFlag(COMPONENT_Cr, pcSlice->getTileGroupAlfEnabledFlag(COMPONENT_Cr) ); + picHeader->setNumAlfAps(pcSlice->getTileGroupNumAps()); + picHeader->setAlfAPSs(pcSlice->getTileGroupApsIdLuma()); + picHeader->setAlfApsIdChroma(pcSlice->getTileGroupApsIdChroma()); + } + else { + picHeader->setAlfEnabledPresentFlag( false ); + } + + pcPic->cs->picHeader->setPic(pcPic); + pcPic->cs->picHeader->setValid(); + actualTotalBits += xWritePicHeader(accessUnit, pcPic->cs->picHeader); + } + pcSlice->setPicHeader( pcPic->cs->picHeader ); + + for ( uint32_t ui = 0 ; ui < numSubstreams; ui++ ) + { + substreamsOut[ui].clear(); + } + + /* start slice NALunit */ + OutputNALUnit nalu( pcSlice->getNalUnitType(), m_pcEncLib->getLayerId(), pcSlice->getTLayer() ); + m_HLSWriter->setBitstream( &nalu.m_Bitstream ); + + tmpBitsBeforeWriting = m_HLSWriter->getNumberOfWrittenBits(); m_HLSWriter->codeSliceHeader( pcSlice ); actualHeadBits += ( m_HLSWriter->getNumberOfWrittenBits() - tmpBitsBeforeWriting ); @@ -2583,27 +3105,14 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, // The final bitstream is either nalu.m_Bitstream or pcBitstreamRedirect; // Complete the slice header info. m_HLSWriter->setBitstream( &nalu.m_Bitstream ); -#if HEVC_TILES_WPP m_HLSWriter->codeTilesWPPEntryPoint( pcSlice ); -#endif // Append substreams... OutputBitstream *pcOut = pcBitstreamRedirect; -#if HEVC_TILES_WPP -#if HEVC_DEPENDENT_SLICES - - const int numZeroSubstreamsAtStartOfSlice = pcPic->tileMap->getSubstreamForCtuAddr(pcSlice->getSliceSegmentCurStartCtuTsAddr(), false, pcSlice); -#else - const int numZeroSubstreamsAtStartOfSlice = pcPic->tileMap->getSubstreamForCtuAddr(pcSlice->getSliceCurStartCtuTsAddr(), false, pcSlice); -#endif - const int numSubstreamsToCode = pcSlice->getNumberOfSubstreamSizes()+1; -#else - const int numZeroSubstreamsAtStartOfSlice = 0; const int numSubstreamsToCode = pcSlice->getNumberOfSubstreamSizes()+1; -#endif for ( uint32_t ui = 0 ; ui < numSubstreamsToCode; ui++ ) { - pcOut->addSubstream(&(substreamsOut[ui+numZeroSubstreamsAtStartOfSlice])); + pcOut->addSubstream(&(substreamsOut[ui])); } } @@ -2623,10 +3132,9 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, } if( ( m_pcCfg->getPictureTimingSEIEnabled() || m_pcCfg->getDecodingUnitInfoSEIEnabled() ) && - ( pcSlice->getSPS()->getVuiParametersPresentFlag() ) && - ( ( pcSlice->getSPS()->getVuiParameters()->getHrdParameters()->getNalHrdParametersPresentFlag() ) - || ( pcSlice->getSPS()->getVuiParameters()->getHrdParameters()->getVclHrdParametersPresentFlag() ) ) && - ( pcSlice->getSPS()->getVuiParameters()->getHrdParameters()->getSubPicCpbParamsPresentFlag() ) ) + ( ( pcSlice->getSPS()->getHrdParameters()->getNalHrdParametersPresentFlag() ) + || ( pcSlice->getSPS()->getHrdParameters()->getVclHrdParametersPresentFlag() ) ) && + ( pcSlice->getSPS()->getHrdParameters()->getGeneralDecodingUnitHrdParamsPresentFlag() ) ) { uint32_t numNalus = 0; uint32_t numRBSPBytes = 0; @@ -2661,10 +3169,9 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, m_pcCfg->setEncodedFlag(iGOPid, true); double PSNR_Y; - xCalculateAddPSNRs(isField, isTff, iGOPid, pcPic, accessUnit, rcListPic, encTime, snr_conversion, printFrameMSE, &PSNR_Y - , isEncodeLtRef - ); + xCalculateAddPSNRs(isField, isTff, iGOPid, pcPic, accessUnit, rcListPic, encTime, snr_conversion, printFrameMSE, &PSNR_Y, isEncodeLtRef ); +#if HEVC_SEI // Only produce the Green Metadata SEI message with the last picture. if( m_pcCfg->getSEIGreenMetadataInfoSEIEnable() && pcSlice->getPOC() == ( m_pcCfg->getFramesToBeEncoded() - 1 ) ) { @@ -2672,6 +3179,7 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, m_seiEncoder.initSEIGreenMetadataInfo(seiGreenMetadataInfo, (uint32_t)(PSNR_Y * 100 + 0.5)); trailingSeiMessages.push_back(seiGreenMetadataInfo); } +#endif xWriteTrailingSEIMessages(trailingSeiMessages, accessUnit, pcSlice->getTLayer(), pcSlice->getSPS()); @@ -2706,12 +3214,14 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, } #endif } - + xCreateFrameFieldInfoSEI( leadingSeiMessages, pcSlice, isField ); xCreatePictureTimingSEI( m_pcCfg->getEfficientFieldIRAPEnabled() ? effFieldIRAPMap.GetIRAPGOPid() : 0, leadingSeiMessages, nestedSeiMessages, duInfoSeiMessages, pcSlice, isField, duData ); - if( m_pcCfg->getScalableNestingSEIEnabled() ) +#if HEVC_SEI + if( m_pcCfg->getScalableNestingSEIEnabled() ) { xCreateScalableNestingSEI( leadingSeiMessages, nestedSeiMessages ); } +#endif xWriteLeadingSEIMessages( leadingSeiMessages, duInfoSeiMessages, accessUnit, pcSlice->getTLayer(), pcSlice->getSPS(), duData ); xWriteDuSEIMessages( duInfoSeiMessages, accessUnit, pcSlice->getTLayer(), pcSlice->getSPS(), duData ); @@ -2728,7 +3238,12 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, m_bFirst = false; m_iNumPicCoded++; if (!(m_pcCfg->getUseCompositeRef() && isEncodeLtRef)) - m_totalCoded ++; + { + for( int i = pcSlice->getTLayer() ; i < pcSlice->getSPS()->getMaxTLayers() ; i ++ ) + { + m_totalCoded[i]++; + } + } /* logging: insert a newline at end of picture period */ if (m_pcCfg->getEfficientFieldIRAPEnabled()) @@ -2743,17 +3258,16 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, delete pcBitstreamRedirect; - CHECK(!( (m_iNumPicCoded == iNumPicRcvd) ), "Unspecified error"); - + CHECK( m_iNumPicCoded > 1, "Unspecified error" ); } -void EncGOP::printOutSummary(uint32_t uiNumAllPicCoded, bool isField, const bool printMSEBasedSNR, const bool printSequenceMSE, const bool printHexPsnr, const BitDepths &bitDepths) +void EncGOP::printOutSummary( uint32_t uiNumAllPicCoded, bool isField, const bool printMSEBasedSNR, const bool printSequenceMSE, const bool printHexPsnr, const bool printRprPSNR, const BitDepths &bitDepths ) { #if ENABLE_QPA const bool useWPSNR = m_pcEncLib->getUseWPSNR(); #endif #if WCG_WPSNR - const bool useLumaWPSNR = m_pcEncLib->getLumaLevelToDeltaQPMapping().isEnabled() || (m_pcCfg->getReshaper() && m_pcCfg->getReshapeSignalType() == RESHAPE_SIGNAL_PQ); + const bool useLumaWPSNR = m_pcEncLib->getLumaLevelToDeltaQPMapping().isEnabled() || (m_pcCfg->getLmcs() && m_pcCfg->getReshapeSignalType() == RESHAPE_SIGNAL_PQ); #endif if( m_pcCfg->getDecodeBitstream(0).empty() && m_pcCfg->getDecodeBitstream(1).empty() && !m_pcCfg->useFastForwardToPOC() ) @@ -2779,25 +3293,36 @@ void EncGOP::printOutSummary(uint32_t uiNumAllPicCoded, bool isField, const bool //-- all msg( INFO, "\n" ); msg( DETAILS,"\nSUMMARY --------------------------------------------------------\n" ); +#if JVET_O0756_CALCULATE_HDRMETRICS + const bool calculateHdrMetrics = m_pcEncLib->getCalcluateHdrMetrics(); +#endif #if ENABLE_QPA - m_gcAnalyzeAll.printOut('a', chFmt, printMSEBasedSNR, printSequenceMSE, printHexPsnr, bitDepths, useWPSNR); + m_gcAnalyzeAll.printOut( 'a', chFmt, printMSEBasedSNR, printSequenceMSE, printHexPsnr, printRprPSNR, bitDepths, useWPSNR +#if JVET_O0756_CALCULATE_HDRMETRICS + , calculateHdrMetrics +#endif + ); #else - m_gcAnalyzeAll.printOut('a', chFmt, printMSEBasedSNR, printSequenceMSE, printHexPsnr, bitDepths); + m_gcAnalyzeAll.printOut('a', chFmt, printMSEBasedSNR, printSequenceMSE, printHexPsnr, bitDepths +#if JVET_O0756_CALCULATE_HDRMETRICS + , calculateHdrMetrics #endif - msg( DETAILS,"\n\nI Slices--------------------------------------------------------\n" ); - m_gcAnalyzeI.printOut('i', chFmt, printMSEBasedSNR, printSequenceMSE, printHexPsnr, bitDepths); + ); +#endif + msg( DETAILS, "\n\nI Slices--------------------------------------------------------\n" ); + m_gcAnalyzeI.printOut( 'i', chFmt, printMSEBasedSNR, printSequenceMSE, printHexPsnr, printRprPSNR, bitDepths ); - msg( DETAILS,"\n\nP Slices--------------------------------------------------------\n" ); - m_gcAnalyzeP.printOut('p', chFmt, printMSEBasedSNR, printSequenceMSE, printHexPsnr, bitDepths); + msg( DETAILS, "\n\nP Slices--------------------------------------------------------\n" ); + m_gcAnalyzeP.printOut( 'p', chFmt, printMSEBasedSNR, printSequenceMSE, printHexPsnr, printRprPSNR, bitDepths ); - msg( DETAILS,"\n\nB Slices--------------------------------------------------------\n" ); - m_gcAnalyzeB.printOut('b', chFmt, printMSEBasedSNR, printSequenceMSE, printHexPsnr, bitDepths); + msg( DETAILS, "\n\nB Slices--------------------------------------------------------\n" ); + m_gcAnalyzeB.printOut( 'b', chFmt, printMSEBasedSNR, printSequenceMSE, printHexPsnr, printRprPSNR, bitDepths ); #if WCG_WPSNR if (useLumaWPSNR) { msg(DETAILS, "\nWPSNR SUMMARY --------------------------------------------------------\n"); - m_gcAnalyzeWPSNR.printOut('w', chFmt, printMSEBasedSNR, printSequenceMSE, printHexPsnr, bitDepths, useLumaWPSNR); + m_gcAnalyzeWPSNR.printOut( 'w', chFmt, printMSEBasedSNR, printSequenceMSE, printHexPsnr, printRprPSNR, bitDepths, useLumaWPSNR ); } #endif if (!m_pcCfg->getSummaryOutFilename().empty()) @@ -2825,9 +3350,9 @@ void EncGOP::printOutSummary(uint32_t uiNumAllPicCoded, bool isField, const bool m_gcAnalyzeAll_in.setBits(m_gcAnalyzeAll.getBits()); // prior to the above statement, the interlace analyser does not contain the correct total number of bits. - msg( DETAILS,"\n\nSUMMARY INTERLACED ---------------------------------------------\n" ); + msg( INFO,"\n\nSUMMARY INTERLACED ---------------------------------------------\n" ); #if ENABLE_QPA - m_gcAnalyzeAll_in.printOut('a', chFmt, printMSEBasedSNR, printSequenceMSE, printHexPsnr, bitDepths, useWPSNR); + m_gcAnalyzeAll_in.printOut( 'a', chFmt, printMSEBasedSNR, printSequenceMSE, printHexPsnr, printRprPSNR, bitDepths, useWPSNR ); #else m_gcAnalyzeAll_in.printOut('a', chFmt, printMSEBasedSNR, printSequenceMSE, printHexPsnr, bitDepths); #endif @@ -2920,7 +3445,7 @@ void EncGOP::xGetBuffer( PicList& rcListPic, while (iterPic != rcListPic.end()) { rpcPic = *(iterPic); - if (rpcPic->getPOC() == pocCurr) + if( rpcPic->getPOC() == pocCurr && rpcPic->layerId == m_pcEncLib->getLayerId() ) { break; } @@ -3001,7 +3526,7 @@ static inline double calcWeightedSquaredError(const CPelBuf& org, const C uint64_t EncGOP::xFindDistortionPlane(const CPelBuf& pic0, const CPelBuf& pic1, const uint32_t rshift #if ENABLE_QPA - , const uint32_t chromaShift /*= 0*/ + , const uint32_t chromaShiftHor /*= 0*/, const uint32_t chromaShiftVer /*= 0*/ #endif ) { @@ -3021,7 +3546,7 @@ uint64_t EncGOP::xFindDistortionPlane(const CPelBuf& pic0, const CPelBuf& pic1, const uint32_t W = pic0.width; // image width const uint32_t H = pic0.height; // image height const double R = double(W * H) / (1920.0 * 1080.0); - const uint32_t B = Clip3<uint32_t>(0, 128 >> chromaShift, 4 * uint32_t(16.0 * sqrt(R) + 0.5)); // WPSNR block size in integer multiple of 4 (for SIMD, = 64 at full-HD) + const uint32_t B = Clip3<uint32_t>(0, 128 >> chromaShiftVer, 4 * uint32_t(16.0 * sqrt(R) + 0.5)); // WPSNR block size in integer multiple of 4 (for SIMD, = 64 at full-HD) uint32_t x, y; @@ -3056,7 +3581,7 @@ uint64_t EncGOP::xFindDistortionPlane(const CPelBuf& pic0, const CPelBuf& pic1, } // integer weighted distortion - sumAct = 16.0 * sqrt ((3840.0 * 2160.0) / double((W << chromaShift) * (H << chromaShift))) * double(1 << BD); + sumAct = 16.0 * sqrt ((3840.0 * 2160.0) / double((W << chromaShiftHor) * (H << chromaShiftVer))) * double(1 << BD); return (wmse <= 0.0) ? 0 : uint64_t(wmse * pow(sumAct, BETA) + 0.5); } @@ -3094,7 +3619,7 @@ uint64_t EncGOP::xFindDistortionPlane(const CPelBuf& pic0, const CPelBuf& pic1, double EncGOP::xFindDistortionPlaneWPSNR(const CPelBuf& pic0, const CPelBuf& pic1, const uint32_t rshift, const CPelBuf& picLuma0, ComponentID compID, const ChromaFormat chfmt ) { - const bool useLumaWPSNR = m_pcEncLib->getLumaLevelToDeltaQPMapping().isEnabled() || (m_pcCfg->getReshaper() && m_pcCfg->getReshapeSignalType() == RESHAPE_SIGNAL_PQ); + const bool useLumaWPSNR = m_pcEncLib->getLumaLevelToDeltaQPMapping().isEnabled() || (m_pcCfg->getLmcs() && m_pcCfg->getReshapeSignalType() == RESHAPE_SIGNAL_PQ); if (!useLumaWPSNR) { return 0; @@ -3227,16 +3752,17 @@ void EncGOP::xCalculateAddPSNR(Picture* pcPic, PelUnitBuf cPicD, const AccessUni const CPelUnitBuf& pic = cPicD; CHECK(!(conversion == IPCOLOURSPACE_UNCHANGED), "Unspecified error"); // const CPelUnitBuf& org = (conversion != IPCOLOURSPACE_UNCHANGED) ? pcPic->getPicYuvTrueOrg()->getBuf() : pcPic->getPicYuvOrg()->getBuf(); - const CPelUnitBuf& org = sps.getUseReshaper() ? pcPic->getTrueOrigBuf() : pcPic->getOrigBuf(); + const CPelUnitBuf& org = (sps.getUseLmcs() || m_pcCfg->getGopBasedTemporalFilterEnabled()) ? pcPic->getTrueOrigBuf() : pcPic->getOrigBuf(); #if ENABLE_QPA const bool useWPSNR = m_pcEncLib->getUseWPSNR(); #endif double dPSNR[MAX_NUM_COMPONENT]; #if WCG_WPSNR - const bool useLumaWPSNR = m_pcEncLib->getLumaLevelToDeltaQPMapping().isEnabled() || (m_pcCfg->getReshaper() && m_pcCfg->getReshapeSignalType() == RESHAPE_SIGNAL_PQ); + const bool useLumaWPSNR = m_pcEncLib->getLumaLevelToDeltaQPMapping().isEnabled() || (m_pcCfg->getLmcs() && m_pcCfg->getReshapeSignalType() == RESHAPE_SIGNAL_PQ); double dPSNRWeighted[MAX_NUM_COMPONENT]; double MSEyuvframeWeighted[MAX_NUM_COMPONENT]; #endif + double upscaledPSNR[MAX_NUM_COMPONENT]; for(int i=0; i<MAX_NUM_COMPONENT; i++) { dPSNR[i]=0.0; @@ -3244,7 +3770,17 @@ void EncGOP::xCalculateAddPSNR(Picture* pcPic, PelUnitBuf cPicD, const AccessUni dPSNRWeighted[i]=0.0; MSEyuvframeWeighted[i] = 0.0; #endif + upscaledPSNR[i] = 0.0; + } +#if JVET_O0756_CALCULATE_HDRMETRICS + double deltaE[hdrtoolslib::NB_REF_WHITE]; + double psnrL[hdrtoolslib::NB_REF_WHITE]; + for (int i=0; i<hdrtoolslib::NB_REF_WHITE; i++) + { + deltaE[i] = 0.0; + psnrL[i] = 0.0; } +#endif PelStorage interm; @@ -3264,6 +3800,22 @@ void EncGOP::xCalculateAddPSNR(Picture* pcPic, PelUnitBuf cPicD, const AccessUni const bool bPicIsField = pcPic->fieldPic; const Slice* pcSlice = pcPic->slices[0]; + PelStorage upscaledRec; + + if( m_pcEncLib->isRPREnabled() ) + { + const CPelBuf& upscaledOrg = sps.getUseLmcs() ? pcPic->M_BUFS( 0, PIC_TRUE_ORIGINAL_INPUT).get( COMPONENT_Y ) : pcPic->M_BUFS( 0, PIC_ORIGINAL_INPUT).get( COMPONENT_Y ); + upscaledRec.create( pic.chromaFormat, Area( Position(), upscaledOrg ) ); + + int xScale, yScale; + // it is assumed that full resolution picture PPS has ppsId 0 + const PPS* pps = m_pcEncLib->getPPS(0); + CU::getRprScaling( &sps, pps, pcPic, xScale, yScale ); + std::pair<int, int> scalingRatio = std::pair<int, int>( xScale, yScale ); + + Picture::rescalePicture( scalingRatio, picC, pcPic->getScalingWindow(), upscaledRec, pps->getScalingWindow(), format, sps.getBitDepths(), false, false, sps.getHorCollocatedChromaFlag(), sps.getVerCollocatedChromaFlag() ); + } + for (int comp = 0; comp < ::getNumberValidComponents(formatD); comp++) { const ComponentID compID = ComponentID(comp); @@ -3273,15 +3825,27 @@ void EncGOP::xCalculateAddPSNR(Picture* pcPic, PelUnitBuf cPicD, const AccessUni CHECK(!( p.width == o.width), "Unspecified error"); CHECK(!( p.height == o.height), "Unspecified error"); - const uint32_t width = p.width - (m_pcEncLib->getPad(0) >> ::getComponentScaleX(compID, format)); - const uint32_t height = p.height - (m_pcEncLib->getPad(1) >> (!!bPicIsField+::getComponentScaleY(compID,format))); + int padX = m_pcEncLib->getPad( 0 ); + int padY = m_pcEncLib->getPad( 1 ); + + // when RPR is enabled, picture padding is picture specific due to possible different picture resoluitons, however only full resolution padding is stored in EncLib + // get per picture padding from the conformance window, in this case if conformance window is set not equal to the padding then PSNR results may be inaccurate + if( m_pcEncLib->isRPREnabled() ) + { + Window& conf = pcPic->getConformanceWindow(); + padX = conf.getWindowRightOffset() * SPS::getWinUnitX( format ); + padY = conf.getWindowBottomOffset() * SPS::getWinUnitY( format ); + } + + const uint32_t width = p.width - ( padX >> ::getComponentScaleX( compID, format ) ); + const uint32_t height = p.height - ( padY >> ( !!bPicIsField + ::getComponentScaleY( compID, format ) ) ); // create new buffers with correct dimensions const CPelBuf recPB(p.bufAt(0, 0), p.stride, width, height); const CPelBuf orgPB(o.bufAt(0, 0), o.stride, width, height); const uint32_t bitDepth = sps.getBitDepth(toChannelType(compID)); #if ENABLE_QPA - const uint64_t uiSSDtemp = xFindDistortionPlane(recPB, orgPB, useWPSNR ? bitDepth : 0, ::getComponentScaleX(compID, format)); + const uint64_t uiSSDtemp = xFindDistortionPlane(recPB, orgPB, useWPSNR ? bitDepth : 0, ::getComponentScaleX(compID, format), ::getComponentScaleY(compID, format)); #else const uint64_t uiSSDtemp = xFindDistortionPlane(recPB, orgPB, 0); #endif @@ -3298,12 +3862,43 @@ void EncGOP::xCalculateAddPSNR(Picture* pcPic, PelUnitBuf cPicD, const AccessUni MSEyuvframeWeighted[comp] = (double)uiSSDtempWeighted / size; } #endif + + if( m_pcEncLib->isRPREnabled() ) + { + const CPelBuf& upscaledOrg = sps.getUseLmcs() ? pcPic->M_BUFS( 0, PIC_TRUE_ORIGINAL_INPUT ).get( compID ) : pcPic->M_BUFS( 0, PIC_ORIGINAL_INPUT ).get( compID ); + + const uint32_t upscaledWidth = upscaledOrg.width - ( m_pcEncLib->getPad( 0 ) >> ::getComponentScaleX( compID, format ) ); + const uint32_t upscaledHeight = upscaledOrg.height - ( m_pcEncLib->getPad( 1 ) >> ( !!bPicIsField + ::getComponentScaleY( compID, format ) ) ); + + // create new buffers with correct dimensions + const CPelBuf upscaledRecPB( upscaledRec.get( compID ).bufAt( 0, 0 ), upscaledRec.get( compID ).stride, upscaledWidth, upscaledHeight ); + const CPelBuf upscaledOrgPB( upscaledOrg.bufAt( 0, 0 ), upscaledOrg.stride, upscaledWidth, upscaledHeight ); + +#if ENABLE_QPA + const uint64_t upscaledSSD = xFindDistortionPlane( upscaledRecPB, upscaledOrgPB, useWPSNR ? bitDepth : 0, ::getComponentScaleX( compID, format ) ); +#else + const uint64_t scaledSSD = xFindDistortionPlane( upsacledRecPB, upsacledOrgPB, 0 ); +#endif + + upscaledPSNR[comp] = upscaledSSD ? 10.0 * log10( (double)maxval * maxval * upscaledWidth * upscaledHeight / (double)upscaledSSD ) : 999.99; + } } #if EXTENSION_360_VIDEO m_ext360.calculatePSNRs(pcPic); #endif +#if JVET_O0756_CALCULATE_HDRMETRICS + const bool calculateHdrMetrics = m_pcEncLib->getCalcluateHdrMetrics(); + if (calculateHdrMetrics) + { + auto beforeTime = std::chrono::steady_clock::now(); + xCalculateHDRMetrics(pcPic, deltaE, psnrL); + auto elapsed = std::chrono::steady_clock::now() - beforeTime; + m_metricTime += elapsed; + } +#endif + /* calculate the size of the access unit, excluding: * - any AnnexB contributions (start_code_prefix, zero_byte, etc.,) * - SEI NAL units @@ -3319,11 +3914,7 @@ void EncGOP::xCalculateAddPSNR(Picture* pcPic, PelUnitBuf cPicD, const AccessUni if( ( *it )->m_nalUnitType != NAL_UNIT_PREFIX_SEI && ( *it )->m_nalUnitType != NAL_UNIT_SUFFIX_SEI ) { numRBSPBytes += numRBSPBytes_nal; -#if HEVC_VPS - if( it == accessUnit.begin() || ( *it )->m_nalUnitType == NAL_UNIT_VPS || ( *it )->m_nalUnitType == NAL_UNIT_SPS || ( *it )->m_nalUnitType == NAL_UNIT_PPS ) -#else - if (it == accessUnit.begin() || (*it)->m_nalUnitType == NAL_UNIT_SPS || (*it)->m_nalUnitType == NAL_UNIT_PPS) -#endif + if (it == accessUnit.begin() || (*it)->m_nalUnitType == NAL_UNIT_VPS || (*it)->m_nalUnitType == NAL_UNIT_DPS || (*it)->m_nalUnitType == NAL_UNIT_SPS || (*it)->m_nalUnitType == NAL_UNIT_PPS) { numRBSPBytes += 4; } @@ -3339,45 +3930,73 @@ void EncGOP::xCalculateAddPSNR(Picture* pcPic, PelUnitBuf cPicD, const AccessUni //===== add PSNR ===== m_gcAnalyzeAll.addResult(dPSNR, (double)uibits, MSEyuvframe + , upscaledPSNR , isEncodeLtRef ); #if EXTENSION_360_VIDEO m_ext360.addResult(m_gcAnalyzeAll); +#endif +#if JVET_O0756_CALCULATE_HDRMETRICS + if (calculateHdrMetrics) + { + m_gcAnalyzeAll.addHDRMetricsResult(deltaE, psnrL); + } #endif if (pcSlice->isIntra()) { m_gcAnalyzeI.addResult(dPSNR, (double)uibits, MSEyuvframe + , upscaledPSNR , isEncodeLtRef ); *PSNR_Y = dPSNR[COMPONENT_Y]; #if EXTENSION_360_VIDEO m_ext360.addResult(m_gcAnalyzeI); +#endif +#if JVET_O0756_CALCULATE_HDRMETRICS + if (calculateHdrMetrics) + { + m_gcAnalyzeI.addHDRMetricsResult(deltaE, psnrL); + } #endif } if (pcSlice->isInterP()) { m_gcAnalyzeP.addResult(dPSNR, (double)uibits, MSEyuvframe + , upscaledPSNR , isEncodeLtRef ); *PSNR_Y = dPSNR[COMPONENT_Y]; #if EXTENSION_360_VIDEO m_ext360.addResult(m_gcAnalyzeP); +#endif +#if JVET_O0756_CALCULATE_HDRMETRICS + if (calculateHdrMetrics) + { + m_gcAnalyzeP.addHDRMetricsResult(deltaE, psnrL); + } #endif } if (pcSlice->isInterB()) { m_gcAnalyzeB.addResult(dPSNR, (double)uibits, MSEyuvframe + , upscaledPSNR , isEncodeLtRef ); *PSNR_Y = dPSNR[COMPONENT_Y]; #if EXTENSION_360_VIDEO m_ext360.addResult(m_gcAnalyzeB); +#endif +#if JVET_O0756_CALCULATE_HDRMETRICS + if (calculateHdrMetrics) + { + m_gcAnalyzeB.addHDRMetricsResult(deltaE, psnrL); + } #endif } #if WCG_WPSNR if (useLumaWPSNR) { - m_gcAnalyzeWPSNR.addResult(dPSNRWeighted, (double)uibits, MSEyuvframeWeighted, isEncodeLtRef); + m_gcAnalyzeWPSNR.addResult( dPSNRWeighted, (double)uibits, MSEyuvframeWeighted, upscaledPSNR, isEncodeLtRef ); } #endif @@ -3386,11 +4005,13 @@ void EncGOP::xCalculateAddPSNR(Picture* pcPic, PelUnitBuf cPicD, const AccessUni { c += 32; } + if (m_pcCfg->getDependentRAPIndicationSEIEnabled() && pcSlice->isDRAP()) c = 'D'; if( g_verbosity >= NOTICE ) { - msg( NOTICE, "POC %4d TId: %1d ( %c-SLICE, QP %d ) %10d bits", - pcSlice->getPOC() - pcSlice->getLastIDR(), + msg( NOTICE, "POC %4d LId: %2d TId: %1d ( %c-SLICE, QP %d ) %10d bits", + pcSlice->getPOC(), + pcSlice->getPic()->layerId, pcSlice->getTLayer(), c, pcSlice->getSliceQp(), @@ -3426,6 +4047,55 @@ void EncGOP::xCalculateAddPSNR(Picture* pcPic, PelUnitBuf cPicD, const AccessUni if (useLumaWPSNR) { msg(NOTICE, " [WY %6.4lf dB WU %6.4lf dB WV %6.4lf dB]", dPSNRWeighted[COMPONENT_Y], dPSNRWeighted[COMPONENT_Cb], dPSNRWeighted[COMPONENT_Cr]); + + if (m_pcEncLib->getPrintHexPsnr()) + { + uint64_t xPsnrWeighted[MAX_NUM_COMPONENT]; + for (int i = 0; i < MAX_NUM_COMPONENT; i++) + { + copy(reinterpret_cast<uint8_t *>(&dPSNRWeighted[i]), + reinterpret_cast<uint8_t *>(&dPSNRWeighted[i]) + sizeof(dPSNRWeighted[i]), + reinterpret_cast<uint8_t *>(&xPsnrWeighted[i])); + } + msg(NOTICE, " [xWY %16" PRIx64 " xWU %16" PRIx64 " xWV %16" PRIx64 "]", xPsnrWeighted[COMPONENT_Y], xPsnrWeighted[COMPONENT_Cb], xPsnrWeighted[COMPONENT_Cr]); + } + } +#endif +#if JVET_O0756_CALCULATE_HDRMETRICS + if(calculateHdrMetrics) + { + for (int i=0; i<1; i++) + { + msg(NOTICE, " [DeltaE%d %6.4lf dB]", (int)m_pcCfg->getWhitePointDeltaE(i), deltaE[i]); + if (m_pcEncLib->getPrintHexPsnr()) + { + int64_t xdeltaE[MAX_NUM_COMPONENT]; + for (int i = 0; i < 1; i++) + { + copy(reinterpret_cast<uint8_t *>(&deltaE[i]), + reinterpret_cast<uint8_t *>(&deltaE[i]) + sizeof(deltaE[i]), + reinterpret_cast<uint8_t *>(&xdeltaE[i])); + } + msg(NOTICE, " [xDeltaE%d %16" PRIx64 "]", (int)m_pcCfg->getWhitePointDeltaE(i), xdeltaE[0]); + } + } + for (int i=0; i<1; i++) + { + msg(NOTICE, " [PSNRL%d %6.4lf dB]", (int)m_pcCfg->getWhitePointDeltaE(i), psnrL[i]); + + if (m_pcEncLib->getPrintHexPsnr()) + { + int64_t xpsnrL[MAX_NUM_COMPONENT]; + for (int i = 0; i < 1; i++) + { + copy(reinterpret_cast<uint8_t *>(&psnrL[i]), + reinterpret_cast<uint8_t *>(&psnrL[i]) + sizeof(psnrL[i]), + reinterpret_cast<uint8_t *>(&xpsnrL[i])); + } + msg(NOTICE, " [xPSNRL%d %16" PRIx64 "]", (int)m_pcCfg->getWhitePointDeltaE(i), xpsnrL[0]); + + } + } } #endif msg( NOTICE, " [ET %5.0f ]", dEncTime ); @@ -3437,10 +4107,36 @@ void EncGOP::xCalculateAddPSNR(Picture* pcPic, PelUnitBuf cPicD, const AccessUni msg( NOTICE, " [L%d ", iRefList ); for( int iRefIndex = 0; iRefIndex < pcSlice->getNumRefIdx( RefPicList( iRefList ) ); iRefIndex++ ) { - msg( NOTICE, "%d ", pcSlice->getRefPOC( RefPicList( iRefList ), iRefIndex ) - pcSlice->getLastIDR() ); + const std::pair<int, int>& scaleRatio = pcSlice->getScalingRatio( RefPicList( iRefList ), iRefIndex ); + + if( pcPic->cs->picHeader->getEnableTMVPFlag() && pcSlice->getColFromL0Flag() == bool(1 - iRefList) && pcSlice->getColRefIdx() == iRefIndex ) + { + if ( scaleRatio.first != 1 << SCALE_RATIO_BITS || scaleRatio.second != 1 << SCALE_RATIO_BITS ) + msg( NOTICE, "%dc(%1.2lfx, %1.2lfx) ", pcSlice->getRefPOC( RefPicList( iRefList ), iRefIndex ), double( scaleRatio.first ) / ( 1 << SCALE_RATIO_BITS ), double( scaleRatio.second ) / ( 1 << SCALE_RATIO_BITS ) ); + else + msg( NOTICE, "%dc ", pcSlice->getRefPOC( RefPicList( iRefList ), iRefIndex ) ); + } + else + { + if ( scaleRatio.first != 1 << SCALE_RATIO_BITS || scaleRatio.second != 1 << SCALE_RATIO_BITS ) + msg( NOTICE, "%d(%1.2lfx, %1.2lfx) ", pcSlice->getRefPOC( RefPicList( iRefList ), iRefIndex ), double( scaleRatio.first ) / ( 1 << SCALE_RATIO_BITS ), double( scaleRatio.second ) / ( 1 << SCALE_RATIO_BITS ) ); + else + msg( NOTICE, "%d ", pcSlice->getRefPOC( RefPicList( iRefList ), iRefIndex ) ); + } + + if( pcSlice->getRefPOC( RefPicList( iRefList ), iRefIndex ) == pcSlice->getPOC() ) + { + msg( NOTICE, ".%d", pcSlice->getRefPic( RefPicList( iRefList ), iRefIndex )->layerId ); + } + + msg( NOTICE, " " ); } msg( NOTICE, "]" ); } + if( m_pcEncLib->isRPREnabled() ) + { + msg( NOTICE, "\nPSNR2: [Y %6.4lf dB U %6.4lf dB V %6.4lf dB]", upscaledPSNR[COMPONENT_Y], upscaledPSNR[COMPONENT_Cb], upscaledPSNR[COMPONENT_Cr] ); + } } else if( g_verbosity >= INFO ) { @@ -3449,6 +4145,103 @@ void EncGOP::xCalculateAddPSNR(Picture* pcPic, PelUnitBuf cPicD, const AccessUni } } +#if JVET_O0756_CALCULATE_HDRMETRICS +void EncGOP::xCalculateHDRMetrics( Picture* pcPic, double deltaE[hdrtoolslib::NB_REF_WHITE], double psnrL[hdrtoolslib::NB_REF_WHITE]) +{ + copyBuftoFrame(pcPic); + + ChromaFormat chFmt = pcPic->chromaFormat; + + if (chFmt != CHROMA_444) + { + m_pcConvertFormat->process(m_ppcFrameOrg[1], m_ppcFrameOrg[0]); + m_pcConvertFormat->process(m_ppcFrameRec[1], m_ppcFrameRec[0]); + } + + m_pcConvertIQuantize->process(m_ppcFrameOrg[2], m_ppcFrameOrg[1]); + m_pcConvertIQuantize->process(m_ppcFrameRec[2], m_ppcFrameRec[1]); + + m_pcColorTransform->process(m_ppcFrameOrg[3], m_ppcFrameOrg[2]); + m_pcColorTransform->process(m_ppcFrameRec[3], m_ppcFrameRec[2]); + + m_pcTransferFct->forward(m_ppcFrameOrg[4], m_ppcFrameOrg[3]); + m_pcTransferFct->forward(m_ppcFrameRec[4], m_ppcFrameRec[3]); + + // Calculate the Metrics + m_pcDistortionDeltaE->computeMetric(m_ppcFrameOrg[4], m_ppcFrameRec[4]); + + *deltaE = m_pcDistortionDeltaE->getDeltaE(); + *psnrL = m_pcDistortionDeltaE->getPsnrL(); + +} + +void EncGOP::copyBuftoFrame( Picture* pcPic ) +{ + int cropOffsetLeft = m_pcCfg->getCropOffsetLeft(); + int cropOffsetTop = m_pcCfg->getCropOffsetTop(); + int cropOffsetRight = m_pcCfg->getCropOffsetRight(); + int cropOffsetBottom = m_pcCfg->getCropOffsetBottom(); + + int height = pcPic->getOrigBuf(COMPONENT_Y).height - cropOffsetLeft + cropOffsetRight; + int width = pcPic->getOrigBuf(COMPONENT_Y).width - cropOffsetTop + cropOffsetBottom; + + ChromaFormat chFmt = pcPic->chromaFormat; + + Pel* pOrg = pcPic->getOrigBuf(COMPONENT_Y).buf; + Pel* pRec = pcPic->getRecoBuf(COMPONENT_Y).buf; + + uint16_t* yOrg = m_ppcFrameOrg[0]->m_ui16Comp[hdrtoolslib::Y_COMP]; + uint16_t* yRec = m_ppcFrameRec[0]->m_ui16Comp[hdrtoolslib::Y_COMP]; + uint16_t* uOrg = m_ppcFrameOrg[0]->m_ui16Comp[hdrtoolslib::Cb_COMP]; + uint16_t* uRec = m_ppcFrameRec[0]->m_ui16Comp[hdrtoolslib::Cb_COMP]; + uint16_t* vOrg = m_ppcFrameOrg[0]->m_ui16Comp[hdrtoolslib::Cr_COMP]; + uint16_t* vRec = m_ppcFrameRec[0]->m_ui16Comp[hdrtoolslib::Cr_COMP]; + + if(chFmt == CHROMA_444){ + yOrg = m_ppcFrameOrg[1]->m_ui16Comp[hdrtoolslib::Y_COMP]; + yRec = m_ppcFrameRec[1]->m_ui16Comp[hdrtoolslib::Y_COMP]; + uOrg = m_ppcFrameOrg[1]->m_ui16Comp[hdrtoolslib::Cb_COMP]; + uRec = m_ppcFrameRec[1]->m_ui16Comp[hdrtoolslib::Cb_COMP]; + vOrg = m_ppcFrameOrg[1]->m_ui16Comp[hdrtoolslib::Cr_COMP]; + vRec = m_ppcFrameRec[1]->m_ui16Comp[hdrtoolslib::Cr_COMP]; + } + + for (int i = 0; i < height; i++) { + for (int j = 0; j < width; j++) { + yOrg[i*width + j] = static_cast<uint16_t>(pOrg[(i + cropOffsetTop) * pcPic->getOrigBuf(COMPONENT_Y).stride + j + cropOffsetLeft]); + yRec[i*width + j] = static_cast<uint16_t>(pRec[(i + cropOffsetTop) * pcPic->getRecoBuf(COMPONENT_Y).stride + j + cropOffsetLeft]); + } + } + + if (chFmt != CHROMA_444) { + height >>= 1; + width >>= 1; + cropOffsetLeft >>= 1; + cropOffsetTop >>= 1; + } + + pOrg = pcPic->getOrigBuf(COMPONENT_Cb).buf; + pRec = pcPic->getRecoBuf(COMPONENT_Cb).buf; + + for (int i = 0; i < height; i++) { + for (int j = 0; j < width; j++) { + uOrg[i*width + j] = static_cast<uint16_t>(pOrg[(i + cropOffsetTop) * pcPic->getOrigBuf(COMPONENT_Cb).stride + j + cropOffsetLeft]); + uRec[i*width + j] = static_cast<uint16_t>(pRec[(i + cropOffsetTop) * pcPic->getRecoBuf(COMPONENT_Cb).stride + j + cropOffsetLeft]); + } + } + + pOrg = pcPic->getOrigBuf(COMPONENT_Cr).buf; + pRec = pcPic->getRecoBuf(COMPONENT_Cr).buf; + + for (int i = 0; i < height; i++) { + for (int j = 0; j < width; j++) { + vOrg[i*width + j] = static_cast<uint16_t>(pOrg[(i + cropOffsetTop) * pcPic->getOrigBuf(COMPONENT_Cr).stride + j + cropOffsetLeft]); + vRec[i*width + j] = static_cast<uint16_t>(pRec[(i + cropOffsetTop) * pcPic->getRecoBuf(COMPONENT_Cr).stride + j + cropOffsetLeft]); + } + } +} +#endif + void EncGOP::xCalculateInterlacedAddPSNR( Picture* pcPicOrgFirstField, Picture* pcPicOrgSecondField, PelUnitBuf cPicRecFirstField, PelUnitBuf cPicRecSecondField, const InputColourSpaceConversion conversion, const bool printFrameMSE, double* PSNR_Y @@ -3501,7 +4294,7 @@ void EncGOP::xCalculateInterlacedAddPSNR( Picture* pcPicOrgFirstField, Picture* { CHECK(!(conversion == IPCOLOURSPACE_UNCHANGED), "Unspecified error"); #if ENABLE_QPA - uiSSDtemp += xFindDistortionPlane( acPicRecFields[fieldNum].get(ch), apcPicOrgFields[fieldNum]->getOrigBuf().get(ch), useWPSNR ? bitDepth : 0, ::getComponentScaleX(ch, format) ); + uiSSDtemp += xFindDistortionPlane( acPicRecFields[fieldNum].get(ch), apcPicOrgFields[fieldNum]->getOrigBuf().get(ch), useWPSNR ? bitDepth : 0, ::getComponentScaleX(ch, format), ::getComponentScaleY(ch, format) ); #else uiSSDtemp += xFindDistortionPlane( acPicRecFields[fieldNum].get(ch), apcPicOrgFields[fieldNum]->getOrigBuf().get(ch), 0 ); #endif @@ -3517,12 +4310,13 @@ void EncGOP::xCalculateInterlacedAddPSNR( Picture* pcPicOrgFirstField, Picture* //===== add PSNR ===== m_gcAnalyzeAll_in.addResult (dPSNR, (double)uibits, MSEyuvframe + , MSEyuvframe , isEncodeLtRef ); *PSNR_Y = dPSNR[COMPONENT_Y]; - msg( DETAILS, "\n Interlaced frame %d: [Y %6.4lf dB U %6.4lf dB V %6.4lf dB]", pcPicOrgSecondField->getPOC()/2 , dPSNR[COMPONENT_Y], dPSNR[COMPONENT_Cb], dPSNR[COMPONENT_Cr] ); + msg( INFO, "\n Interlaced frame %d: [Y %6.4lf dB U %6.4lf dB V %6.4lf dB]", pcPicOrgSecondField->getPOC()/2, dPSNR[COMPONENT_Y], dPSNR[COMPONENT_Cb], dPSNR[COMPONENT_Cr] ); if (printFrameMSE) { msg( DETAILS, " [Y MSE %6.4lf U MSE %6.4lf V MSE %6.4lf]", MSEyuvframe[COMPONENT_Y], MSEyuvframe[COMPONENT_Cb], MSEyuvframe[COMPONENT_Cr] ); @@ -3545,17 +4339,13 @@ NalUnitType EncGOP::getNalUnitType(int pocCurr, int lastIDR, bool isField) { if (pocCurr == 0) { - return NAL_UNIT_CODED_SLICE_IDR_W_RADL; + return NAL_UNIT_CODED_SLICE_IDR_N_LP; } if (m_pcCfg->getEfficientFieldIRAPEnabled() && isField && pocCurr == (m_pcCfg->getUseCompositeRef() ? 2: 1)) { // to avoid the picture becoming an IRAP -#if !JVET_M0101_HLS - return NAL_UNIT_CODED_SLICE_TRAIL_R; -#else return NAL_UNIT_CODED_SLICE_TRAIL; -#endif } if (m_pcCfg->getDecodingRefreshType() != 3 && (pocCurr - isField) % (m_pcCfg->getIntraPeriod() * (m_pcCfg->getUseCompositeRef() ? 2 : 1)) == 0) @@ -3578,29 +4368,17 @@ NalUnitType EncGOP::getNalUnitType(int pocCurr, int lastIDR, bool isField) // picture can be still decodable when random accessing to a CRA/CRANT/BLA/BLANT picture by // controlling the reference pictures used for encoding that leading picture. Such a leading // picture need not be marked as a TFD picture. -#if !JVET_M0101_HLS - return NAL_UNIT_CODED_SLICE_RASL_R; -#else return NAL_UNIT_CODED_SLICE_RASL; -#endif } } if (lastIDR>0) { if (pocCurr < lastIDR) { -#if !JVET_M0101_HLS - return NAL_UNIT_CODED_SLICE_RADL_R; -#else return NAL_UNIT_CODED_SLICE_RADL; -#endif } } -#if !JVET_M0101_HLS - return NAL_UNIT_CODED_SLICE_TRAIL_R; -#else return NAL_UNIT_CODED_SLICE_TRAIL; -#endif } void EncGOP::xUpdateRasInit(Slice* slice) @@ -3682,112 +4460,6 @@ void EncGOP::xAttachSliceDataToNalUnit (OutputNALUnit& rNalu, OutputBitstream* c codedSliceData->clear(); } -// Function will arrange the long-term pictures in the decreasing order of poc_lsb_lt, -// and among the pictures with the same lsb, it arranges them in increasing delta_poc_msb_cycle_lt value -void EncGOP::arrangeLongtermPicturesInRPS(Slice *pcSlice, PicList& rcListPic) -{ - if(pcSlice->getRPS()->getNumberOfLongtermPictures() == 0) - { - return; - } - // we can only modify the local RPS! - CHECK(!(pcSlice->getRPSidx()==-1), "Unspecified error"); - ReferencePictureSet *rps = pcSlice->getLocalRPS(); - - // Arrange long-term reference pictures in the correct order of LSB and MSB, - // and assign values for pocLSBLT and MSB present flag - int longtermPicsPoc[MAX_NUM_REF_PICS], longtermPicsLSB[MAX_NUM_REF_PICS], indices[MAX_NUM_REF_PICS]; - int longtermPicsMSB[MAX_NUM_REF_PICS]; - bool mSBPresentFlag[MAX_NUM_REF_PICS]; - ::memset(longtermPicsPoc, 0, sizeof(longtermPicsPoc)); // Store POC values of LTRP - ::memset(longtermPicsLSB, 0, sizeof(longtermPicsLSB)); // Store POC LSB values of LTRP - ::memset(longtermPicsMSB, 0, sizeof(longtermPicsMSB)); // Store POC LSB values of LTRP - ::memset(indices , 0, sizeof(indices)); // Indices to aid in tracking sorted LTRPs - ::memset(mSBPresentFlag , 0, sizeof(mSBPresentFlag)); // Indicate if MSB needs to be present - - // Get the long-term reference pictures - int offset = rps->getNumberOfNegativePictures() + rps->getNumberOfPositivePictures(); - int i, ctr = 0; - int maxPicOrderCntLSB = 1 << pcSlice->getSPS()->getBitsForPOC(); - for(i = rps->getNumberOfPictures() - 1; i >= offset; i--, ctr++) - { - longtermPicsPoc[ctr] = rps->getPOC(i); // LTRP POC - longtermPicsLSB[ctr] = getLSB(longtermPicsPoc[ctr], maxPicOrderCntLSB); // LTRP POC LSB - indices[ctr] = i; - longtermPicsMSB[ctr] = longtermPicsPoc[ctr] - longtermPicsLSB[ctr]; - } - int numLongPics = rps->getNumberOfLongtermPictures(); - CHECK(!(ctr == numLongPics), "Unspecified error"); - - // Arrange pictures in decreasing order of MSB; - for(i = 0; i < numLongPics; i++) - { - for(int j = 0; j < numLongPics - 1; j++) - { - if(longtermPicsMSB[j] < longtermPicsMSB[j+1]) - { - std::swap(longtermPicsPoc[j], longtermPicsPoc[j+1]); - std::swap(longtermPicsLSB[j], longtermPicsLSB[j+1]); - std::swap(longtermPicsMSB[j], longtermPicsMSB[j+1]); - std::swap(indices[j] , indices[j+1] ); - } - } - } - - for(i = 0; i < numLongPics; i++) - { - // Check if MSB present flag should be enabled. - // Check if the buffer contains any pictures that have the same LSB. - PicList::iterator iterPic = rcListPic.begin(); - Picture* pcPic; - while ( iterPic != rcListPic.end() ) - { - pcPic = *iterPic; - if( (getLSB(pcPic->getPOC(), maxPicOrderCntLSB) == longtermPicsLSB[i]) && // Same LSB - (pcPic->referenced) && // Reference picture - (pcPic->getPOC() != longtermPicsPoc[i]) ) // Not the LTRP itself - { - mSBPresentFlag[i] = true; - break; - } - iterPic++; - } - } - - // tempArray for usedByCurr flag - bool tempArray[MAX_NUM_REF_PICS]; ::memset(tempArray, 0, sizeof(tempArray)); - for(i = 0; i < numLongPics; i++) - { - tempArray[i] = rps->getUsed(indices[i]); - } - // Now write the final values; - ctr = 0; - int currMSB = 0, currLSB = 0; - // currPicPoc = currMSB + currLSB - currLSB = getLSB(pcSlice->getPOC(), maxPicOrderCntLSB); - currMSB = pcSlice->getPOC() - currLSB; - - for(i = rps->getNumberOfPictures() - 1; i >= offset; i--, ctr++) - { - rps->setPOC (i, longtermPicsPoc[ctr]); - rps->setDeltaPOC (i, - pcSlice->getPOC() + longtermPicsPoc[ctr]); - rps->setUsed (i, tempArray[ctr]); - rps->setPocLSBLT (i, longtermPicsLSB[ctr]); - rps->setDeltaPocMSBCycleLT (i, (currMSB - (longtermPicsPoc[ctr] - longtermPicsLSB[ctr])) / maxPicOrderCntLSB); - rps->setDeltaPocMSBPresentFlag(i, mSBPresentFlag[ctr]); - - CHECK(!(rps->getDeltaPocMSBCycleLT(i) >= 0), "Unspecified error"); // Non-negative value - } - for(i = rps->getNumberOfPictures() - 1, ctr = 1; i >= offset; i--, ctr++) - { - for(int j = rps->getNumberOfPictures() - 1 - ctr; j >= offset; j--) - { - // Here at the encoder we know that we have set the full POC value for the LTRPs, hence we - // don't have to check the MSB present flag values for this constraint. - CHECK(!( rps->getPOC(i) != rps->getPOC(j) ), "Unspecified error"); // If assert fails, LTRP entry repeated in RPS!!! - } - } -} void EncGOP::arrangeCompositeReference(Slice* pcSlice, PicList& rcListPic, int pocCurr) { @@ -4014,11 +4686,7 @@ void EncGOP::applyDeblockingFilterMetric( Picture* pcPic, uint32_t uiNumSlices ) Pel* tempRec = Rec; const Slice* pcSlice = pcPic->slices[0]; -#if MAX_TB_SIZE_SIGNALLING const uint32_t log2maxTB = pcSlice->getSPS()->getLog2MaxTbSize(); -#else - const uint32_t log2maxTB = MAX_TB_LOG2_SIZEY; -#endif const uint32_t maxTBsize = (1<<log2maxTB); const uint32_t minBlockArtSize = 8; const uint32_t noCol = (picWidth>>log2maxTB); @@ -4262,4 +4930,273 @@ void EncGOP::applyDeblockingFilterParameterSelection( Picture* pcPic, const uint } } #endif + +void EncGOP::xCreateExplicitReferencePictureSetFromReference( Slice* slice, PicList& rcListPic, const ReferencePictureList *rpl0, const ReferencePictureList *rpl1 ) +{ + Picture* rpcPic; + int pocCycle = 0; + + Picture* pic = slice->getPic(); + const VPS* vps = slice->getPic()->cs->vps; + int layerIdx = vps == nullptr ? 0 : vps->getGeneralLayerIdx( pic->layerId ); + + ReferencePictureList* pLocalRPL0 = slice->getLocalRPL0(); + *pLocalRPL0 = ReferencePictureList( slice->getSPS()->getInterLayerPresentFlag() ); + + uint32_t numOfSTRPL0 = 0; + uint32_t numOfLTRPL0 = 0; + uint32_t numOfILRPL0 = 0; + uint32_t numOfRefPic = rpl0->getNumberOfShorttermPictures() + rpl0->getNumberOfLongtermPictures(); + uint32_t refPicIdxL0 = 0; + + for( int ii = 0; ii < numOfRefPic; ii++ ) + { + // loop through all pictures in the reference picture buffer + PicList::iterator iterPic = rcListPic.begin(); + bool isAvailable = false; + + pocCycle = 1 << ( slice->getSPS()->getBitsForPOC() ); + while( iterPic != rcListPic.end() ) + { + rpcPic = *( iterPic++ ); + + if( rpcPic->layerId == pic->layerId ) + { + if( !rpl0->isRefPicLongterm( ii ) && rpcPic->referenced && rpcPic->getPOC() == slice->getPOC() - rpl0->getRefPicIdentifier( ii ) && !slice->isPocRestrictedByDRAP( rpcPic->getPOC(), rpcPic->precedingDRAP ) ) + { + isAvailable = true; + break; + } + else if( rpl0->isRefPicLongterm( ii ) && rpcPic->referenced && ( rpcPic->getPOC() & ( pocCycle - 1 ) ) == rpl0->getRefPicIdentifier( ii ) && !slice->isPocRestrictedByDRAP( rpcPic->getPOC(), rpcPic->precedingDRAP ) ) + { + isAvailable = true; + break; + } + } + } + + if( isAvailable ) + { + pLocalRPL0->setRefPicIdentifier( refPicIdxL0, rpl0->getRefPicIdentifier( ii ), rpl0->isRefPicLongterm( ii ), false, NOT_VALID ); + refPicIdxL0++; + numOfSTRPL0 = numOfSTRPL0 + ( ( rpl0->isRefPicLongterm( ii ) ) ? 0 : 1 ); + numOfLTRPL0 += ( rpl0->isRefPicLongterm( ii ) && !rpl0->isInterLayerRefPic( ii ) ) ? 1 : 0; + isAvailable = false; + } + } + + // inter-layer reference pictures are added to the end of the reference picture list + if( layerIdx && vps && !vps->getAllIndependentLayersFlag() ) + { + numOfRefPic = rpl0->getNumberOfInterLayerPictures() ? rpl0->getNumberOfInterLayerPictures() : m_pcEncLib->getNumRefLayers( layerIdx ); + + for( int ii = 0; ii < numOfRefPic; ii++ ) + { + // loop through all pictures in the reference picture buffer + PicList::iterator iterPic = rcListPic.begin(); + + while( iterPic != rcListPic.end() ) + { + rpcPic = *( iterPic++ ); + int refLayerIdx = vps->getGeneralLayerIdx( rpcPic->layerId ); + + if( rpcPic->referenced && rpcPic->getPOC() == pic->getPOC() && vps->getDirectRefLayerFlag( layerIdx, refLayerIdx ) ) + { + pLocalRPL0->setRefPicIdentifier( refPicIdxL0, 0, true, true, vps->getInterLayerRefIdc( layerIdx, refLayerIdx ) ); + refPicIdxL0++; + numOfILRPL0++; + break; + } + } + } + } + + if( slice->getEnableDRAPSEI() ) + { + pLocalRPL0->setNumberOfShorttermPictures( numOfSTRPL0 ); + pLocalRPL0->setNumberOfLongtermPictures( numOfLTRPL0 ); + pLocalRPL0->setNumberOfInterLayerPictures( numOfILRPL0 ); + + if( !slice->isIRAP() && !slice->isPOCInRefPicList( pLocalRPL0, slice->getAssociatedIRAPPOC() ) ) + { + if( slice->getUseLTforDRAP() && !slice->isPOCInRefPicList( rpl1, slice->getAssociatedIRAPPOC() ) ) + { + // Adding associated IRAP as longterm picture + pLocalRPL0->setRefPicIdentifier( refPicIdxL0, slice->getAssociatedIRAPPOC(), true, false, 0 ); + refPicIdxL0++; + numOfLTRPL0++; + } + else + { + // Adding associated IRAP as shortterm picture + pLocalRPL0->setRefPicIdentifier( refPicIdxL0, slice->getPOC() - slice->getAssociatedIRAPPOC(), false, false, 0 ); + refPicIdxL0++; + numOfSTRPL0++; + } + } + } + + ReferencePictureList* pLocalRPL1 = slice->getLocalRPL1(); + *pLocalRPL1 = ReferencePictureList( slice->getSPS()->getInterLayerPresentFlag() ); + + uint32_t numOfSTRPL1 = 0; + uint32_t numOfLTRPL1 = 0; + uint32_t numOfILRPL1 = 0; + numOfRefPic = rpl1->getNumberOfShorttermPictures() + rpl1->getNumberOfLongtermPictures(); + uint32_t refPicIdxL1 = 0; + + for( int ii = 0; ii < numOfRefPic; ii++ ) + { + // loop through all pictures in the reference picture buffer + PicList::iterator iterPic = rcListPic.begin(); + bool isAvailable = false; + pocCycle = 1 << ( slice->getSPS()->getBitsForPOC() ); + while( iterPic != rcListPic.end() ) + { + rpcPic = *( iterPic++ ); + + if( rpcPic->layerId == pic->layerId ) + { + if( !rpl1->isRefPicLongterm( ii ) && rpcPic->referenced && rpcPic->getPOC() == slice->getPOC() - rpl1->getRefPicIdentifier( ii ) && !slice->isPocRestrictedByDRAP( rpcPic->getPOC(), rpcPic->precedingDRAP ) ) + { + isAvailable = true; + break; + } + else if( rpl1->isRefPicLongterm( ii ) && rpcPic->referenced && ( rpcPic->getPOC() & ( pocCycle - 1 ) ) == rpl1->getRefPicIdentifier( ii ) && !slice->isPocRestrictedByDRAP( rpcPic->getPOC(), rpcPic->precedingDRAP ) ) + { + isAvailable = true; + break; + } + } + } + + if( isAvailable ) + { + pLocalRPL1->setRefPicIdentifier( refPicIdxL1, rpl1->getRefPicIdentifier( ii ), rpl1->isRefPicLongterm( ii ), false, NOT_VALID ); + refPicIdxL1++; + numOfSTRPL1 = numOfSTRPL1 + ( ( rpl1->isRefPicLongterm( ii ) ) ? 0 : 1 ); + numOfLTRPL1 += ( rpl1->isRefPicLongterm( ii ) && !rpl1->isInterLayerRefPic( ii ) ) ? 1 : 0; + isAvailable = false; + } + } + + + // inter-layer reference pictures are added to the end of the reference picture list + if( layerIdx && vps && !vps->getAllIndependentLayersFlag() ) + { + numOfRefPic = rpl1->getNumberOfInterLayerPictures() ? rpl1->getNumberOfInterLayerPictures() : m_pcEncLib->getNumRefLayers( layerIdx ); + + for( int ii = 0; ii < numOfRefPic; ii++ ) + { + // loop through all pictures in the reference picture buffer + PicList::iterator iterPic = rcListPic.begin(); + + while( iterPic != rcListPic.end() ) + { + rpcPic = *( iterPic++ ); + int refLayerIdx = vps->getGeneralLayerIdx( rpcPic->layerId ); + + if( rpcPic->referenced && rpcPic->getPOC() == pic->getPOC() && vps->getDirectRefLayerFlag( layerIdx, refLayerIdx ) ) + { + pLocalRPL1->setRefPicIdentifier( refPicIdxL1, 0, true, true, vps->getInterLayerRefIdc( layerIdx, refLayerIdx ) ); + refPicIdxL1++; + numOfILRPL1++; + break; + } + } + } + } + + //Copy from L1 if we have less than active ref pic + int numOfNeedToFill = rpl0->getNumberOfActivePictures() - (numOfLTRPL0 + numOfSTRPL0); + bool isDisallowMixedRefPic = ( slice->getSPS()->getAllActiveRplEntriesHasSameSignFlag() ) ? true : false; + int originalL0StrpNum = numOfSTRPL0; + int originalL0LtrpNum = numOfLTRPL0; + int originalL0IlrpNum = numOfILRPL0; + + for( int ii = 0; numOfNeedToFill > 0 && ii < ( pLocalRPL1->getNumberOfLongtermPictures() + pLocalRPL1->getNumberOfShorttermPictures() + pLocalRPL1->getNumberOfInterLayerPictures() ); ii++ ) + { + if( ii <= ( numOfLTRPL1 + numOfSTRPL1 + numOfILRPL1 - 1 ) ) + { + //Make sure this copy is not already in L0 + bool canIncludeThis = true; + for( int jj = 0; jj < refPicIdxL0; jj++ ) + { + if( ( pLocalRPL1->getRefPicIdentifier( ii ) == pLocalRPL0->getRefPicIdentifier( jj ) ) && ( pLocalRPL1->isRefPicLongterm( ii ) == pLocalRPL0->isRefPicLongterm( jj ) ) && pLocalRPL1->getInterLayerRefPicIdx( ii ) == pLocalRPL0->getInterLayerRefPicIdx( jj ) ) + { + canIncludeThis = false; + } + + bool sameSign = ( pLocalRPL1->getRefPicIdentifier( ii ) > 0 ) == ( pLocalRPL0->getRefPicIdentifier( 0 ) > 0 ); + + if( isDisallowMixedRefPic && canIncludeThis && !pLocalRPL1->isRefPicLongterm( ii ) && !sameSign ) + { + canIncludeThis = false; + } + } + if( canIncludeThis ) + { + pLocalRPL0->setRefPicIdentifier( refPicIdxL0, pLocalRPL1->getRefPicIdentifier( ii ), pLocalRPL1->isRefPicLongterm( ii ), pLocalRPL1->isInterLayerRefPic( ii ), pLocalRPL1->getInterLayerRefPicIdx( ii ) ); + refPicIdxL0++; + numOfSTRPL0 = numOfSTRPL0 + ( ( pLocalRPL1->isRefPicLongterm( ii ) ) ? 0 : 1 ); + numOfLTRPL0 += ( pLocalRPL1->isRefPicLongterm( ii ) && !pLocalRPL1->isInterLayerRefPic( ii ) ) ? 1 : 0; + numOfILRPL0 += pLocalRPL1->isInterLayerRefPic( ii ) ? 1 : 0; + numOfNeedToFill--; + } + } + } + pLocalRPL0->setNumberOfLongtermPictures( numOfLTRPL0 ); + pLocalRPL0->setNumberOfShorttermPictures( numOfSTRPL0 ); + pLocalRPL0->setNumberOfInterLayerPictures( numOfILRPL0 ); + int numPics = numOfLTRPL0 + numOfSTRPL0; + + pLocalRPL0->setNumberOfActivePictures( ( numPics < rpl0->getNumberOfActivePictures() ? numPics : rpl0->getNumberOfActivePictures() ) + numOfILRPL0 ); + pLocalRPL0->setLtrpInSliceHeaderFlag( rpl0->getLtrpInSliceHeaderFlag() ); + slice->setRPL0idx( -1 ); + slice->setRPL0( pLocalRPL0 ); + + //Copy from L0 if we have less than active ref pic + numOfNeedToFill = pLocalRPL0->getNumberOfActivePictures() - ( numOfLTRPL1 + numOfSTRPL1 ); + + for( int ii = 0; numOfNeedToFill > 0 && ii < ( pLocalRPL0->getNumberOfLongtermPictures() + pLocalRPL0->getNumberOfShorttermPictures() + pLocalRPL0->getNumberOfInterLayerPictures() ); ii++ ) + { + if( ii <= ( originalL0StrpNum + originalL0LtrpNum + originalL0IlrpNum - 1 ) ) + { + //Make sure this copy is not already in L0 + bool canIncludeThis = true; + for( int jj = 0; jj < refPicIdxL1; jj++ ) + { + if( ( pLocalRPL0->getRefPicIdentifier( ii ) == pLocalRPL1->getRefPicIdentifier( jj ) ) && ( pLocalRPL0->isRefPicLongterm( ii ) == pLocalRPL1->isRefPicLongterm( jj ) ) && pLocalRPL0->getInterLayerRefPicIdx( ii ) == pLocalRPL1->getInterLayerRefPicIdx( jj ) ) + { + canIncludeThis = false; + } + + bool sameSign = ( pLocalRPL0->getRefPicIdentifier( ii ) > 0 ) == ( pLocalRPL1->getRefPicIdentifier( 0 ) > 0 ); + + if( isDisallowMixedRefPic && canIncludeThis && !pLocalRPL0->isRefPicLongterm( ii ) && !sameSign ) + { + canIncludeThis = false; + } + } + if( canIncludeThis ) + { + pLocalRPL1->setRefPicIdentifier( refPicIdxL1, pLocalRPL0->getRefPicIdentifier( ii ), pLocalRPL0->isRefPicLongterm( ii ), pLocalRPL0->isInterLayerRefPic( ii ), pLocalRPL0->getInterLayerRefPicIdx( ii ) ); + refPicIdxL1++; + numOfSTRPL1 = numOfSTRPL1 + ( ( pLocalRPL0->isRefPicLongterm( ii ) ) ? 0 : 1 ); + numOfLTRPL1 += ( pLocalRPL0->isRefPicLongterm( ii ) && !pLocalRPL0->isInterLayerRefPic( ii ) ) ? 1 : 0; + numOfLTRPL1 += pLocalRPL0->isInterLayerRefPic( ii ) ? 1 : 0; + numOfNeedToFill--; + } + } + } + pLocalRPL1->setNumberOfLongtermPictures( numOfLTRPL1 ); + pLocalRPL1->setNumberOfShorttermPictures( numOfSTRPL1 ); + pLocalRPL1->setNumberOfInterLayerPictures( numOfILRPL1 ); + numPics = numOfLTRPL1 + numOfSTRPL1; + + pLocalRPL1->setNumberOfActivePictures( ( isDisallowMixedRefPic ? numPics : ( numPics < rpl1->getNumberOfActivePictures() ? numPics : rpl1->getNumberOfActivePictures() ) ) + numOfILRPL1 ); + pLocalRPL1->setLtrpInSliceHeaderFlag( rpl1->getLtrpInSliceHeaderFlag() ); + slice->setRPL1idx( -1 ); + slice->setRPL1( pLocalRPL1 ); +} //! \} diff --git a/source/Lib/EncoderLib/EncGOP.h b/source/Lib/EncoderLib/EncGOP.h index f0c85ca4d8afe14efe94d85bf611499c3565564f..36d28e5145f3f48c470e2208b5905491ed4754ee 100644 --- a/source/Lib/EncoderLib/EncGOP.h +++ b/source/Lib/EncoderLib/EncGOP.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -60,6 +60,16 @@ #include "Analyze.h" #include "RateCtrl.h" #include <vector> +#include "EncHRD.h" + +#if JVET_O0756_CALCULATE_HDRMETRICS +#include "HDRLib/inc/ConvertColorFormat.H" +#include "HDRLib/inc/Convert.H" +#include "HDRLib/inc/ColorTransform.H" +#include "HDRLib/inc/TransferFunction.H" +#include "HDRLib/inc/DistortionMetricDeltaE.H" +#include <chrono> +#endif //! \ingroup EncoderLib //! \{ @@ -117,6 +127,7 @@ private: int m_iNumPicCoded; bool m_bFirst; int m_iLastRecoveryPicPOC; + int m_latestDRAPPOC; int m_lastRasPoc; // Access channel @@ -145,6 +156,8 @@ private: // indicate sequence first bool m_bSeqFirst; + EncHRD* m_HRD; + // clean decoding refresh bool m_bRefreshPending; int m_pocCRA; @@ -152,8 +165,9 @@ private: int m_associatedIRAPPOC; std::vector<int> m_vRVM_RP; - uint32_t m_lastBPSEI; - uint32_t m_totalCoded; + uint32_t m_lastBPSEI[MAX_TLAYER]; + uint32_t m_totalCoded[MAX_TLAYER]; + bool m_rapWithLeading; bool m_bufferingPeriodSEIPresentInAU; SEIEncoder m_seiEncoder; #if W0038_DB_OPT @@ -169,6 +183,23 @@ private: AUWriterIf* m_AUWriterIf; +#if JVET_O0756_CALCULATE_HDRMETRICS + + hdrtoolslib::Frame **m_ppcFrameOrg; + hdrtoolslib::Frame **m_ppcFrameRec; + + hdrtoolslib::ConvertColorFormat *m_pcConvertFormat; + hdrtoolslib::Convert *m_pcConvertIQuantize; + hdrtoolslib::ColorTransform *m_pcColorTransform; + hdrtoolslib::DistortionMetricDeltaE *m_pcDistortionDeltaE; + hdrtoolslib::TransferFunction *m_pcTransferFct; + + hdrtoolslib::ColorTransformParams *m_pcColorTransformParams; + hdrtoolslib::FrameFormat *m_pcFrameFormat; + + std::chrono::duration<long long, ratio<1, 1000000000>> m_metricTime; +#endif + public: EncGOP(); virtual ~EncGOP(); @@ -177,9 +208,11 @@ public: void destroy (); void init ( EncLib* pcEncLib ); + void compressGOP ( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, std::list<PelUnitBuf*>& rcListPicYuvRec, bool isField, bool isTff, const InputColourSpaceConversion snr_conversion, const bool printFrameMSE , bool isEncodeLtRef + , const int picIdInGOP ); void xAttachSliceDataToNalUnit (OutputNALUnit& rNalu, OutputBitstream* pcBitstreamRedirect); @@ -202,13 +235,12 @@ public: void setLastLTRefPoc(int iLastLTRefPoc) { m_lastLTRefPoc = iLastLTRefPoc; } int getLastLTRefPoc() const { return m_lastLTRefPoc; } - void printOutSummary ( uint32_t uiNumAllPicCoded, bool isField, const bool printMSEBasedSNR, const bool printSequenceMSE, const bool printHexPsnr, const BitDepths &bitDepths ); + void printOutSummary( uint32_t uiNumAllPicCoded, bool isField, const bool printMSEBasedSNR, const bool printSequenceMSE, const bool printHexPsnr, const bool printRprPSNR, const BitDepths &bitDepths ); #if W0038_DB_OPT uint64_t preLoopFilterPicAndCalcDist( Picture* pcPic ); #endif EncSlice* getSliceEncoder() { return m_pcSliceEncoder; } NalUnitType getNalUnitType( int pocCurr, int lastIdr, bool isField ); - void arrangeLongtermPicturesInRPS(Slice *, PicList& ); void arrangeCompositeReference(Slice* pcSlice, PicList& rcListPic, int pocCurr); void updateCompositeReference(Slice* pcSlice, PicList& rcListPic, int pocCurr); @@ -218,6 +250,9 @@ public: Analyze& getAnalyzePData() { return m_gcAnalyzeP; } Analyze& getAnalyzeBData() { return m_gcAnalyzeB; } #endif +#if JVET_O0756_CALCULATE_HDRMETRICS + std::chrono::duration<long long, ratio<1, 1000000000>> getMetricTime() const { return m_metricTime; }; +#endif protected: RateCtrl* getRateCtrl() { return m_pcRateCtrl; } @@ -227,9 +262,17 @@ protected: void xInitGOP ( int iPOCLast, int iNumPicRcvd, bool isField , bool isEncodeLtRef ); + void xPicInitHashME( Picture *pic, const PPS *pps, PicList &rcListPic ); + void xPicInitRateControl(int &estimatedBits, int gopId, double &lambda, Picture *pic, Slice *slice); + void xPicInitLMCS (Picture *pic, PicHeader *picHeader, Slice *slice); void xGetBuffer ( PicList& rcListPic, std::list<PelUnitBuf*>& rcListPicYuvRecOut, int iNumPicRcvd, int iTimeOffset, Picture*& rpcPic, int pocCurr, bool isField ); +#if JVET_O0756_CALCULATE_HDRMETRICS + void xCalculateHDRMetrics ( Picture* pcPic, double deltaE[hdrtoolslib::NB_REF_WHITE], double psnrL[hdrtoolslib::NB_REF_WHITE]); + void copyBuftoFrame ( Picture* pcPic ); +#endif + void xCalculateAddPSNRs(const bool isField, const bool isFieldTopFieldFirst, const int iGOPid, Picture* pcPic, const AccessUnit&accessUnit, PicList &rcListPic, int64_t dEncTime, const InputColourSpaceConversion snr_conversion, const bool printFrameMSE, double* PSNR_Y , bool isEncodeLtRef ); @@ -244,7 +287,7 @@ protected: uint64_t xFindDistortionPlane(const CPelBuf& pic0, const CPelBuf& pic1, const uint32_t rshift #if ENABLE_QPA - , const uint32_t chromaShift = 0 + , const uint32_t chromaShiftHor = 0, const uint32_t chromaShiftVer = 0 #endif ); #if WCG_WPSNR @@ -258,12 +301,15 @@ protected: void xCreateIRAPLeadingSEIMessages (SEIMessages& seiMessages, const SPS *sps, const PPS *pps); void xCreatePerPictureSEIMessages (int picInGOP, SEIMessages& seiMessages, SEIMessages& nestedSeiMessages, Slice *slice); + void xCreateFrameFieldInfoSEI (SEIMessages& seiMessages, Slice *slice, bool isField); void xCreatePictureTimingSEI (int IRAPGOPid, SEIMessages& seiMessages, SEIMessages& nestedSeiMessages, SEIMessages& duInfoSeiMessages, Slice *slice, bool isField, std::deque<DUData> &duData); void xUpdateDuData(AccessUnit &testAU, std::deque<DUData> &duData); void xUpdateTimingSEI(SEIPictureTiming *pictureTimingSEI, std::deque<DUData> &duData, const SPS *sps); - void xUpdateDuInfoSEI(SEIMessages &duInfoSeiMessages, SEIPictureTiming *pictureTimingSEI); + void xUpdateDuInfoSEI(SEIMessages &duInfoSeiMessages, SEIPictureTiming *pictureTimingSEI, int maxSubLayers); +#if HEVC_SEI void xCreateScalableNestingSEI (SEIMessages& seiMessages, SEIMessages& nestedSeiMessages); +#endif void xWriteSEI (NalUnitType naluType, SEIMessages& seiMessages, AccessUnit &accessUnit, AccessUnit::iterator &auPos, int temporalId, const SPS *sps); void xWriteSEISeparately (NalUnitType naluType, SEIMessages& seiMessages, AccessUnit &accessUnit, AccessUnit::iterator &auPos, int temporalId, const SPS *sps); void xClearSEIs(SEIMessages& seiMessages, bool deleteMessages); @@ -272,18 +318,19 @@ protected: void xWriteTrailingSEIMessages (SEIMessages& seiMessages, AccessUnit &accessUnit, int temporalId, const SPS *sps); void xWriteDuSEIMessages (SEIMessages& duInfoSeiMessages, AccessUnit &accessUnit, int temporalId, const SPS *sps, std::deque<DUData> &duData); -#if HEVC_VPS int xWriteVPS (AccessUnit &accessUnit, const VPS *vps); -#endif - int xWriteSPS (AccessUnit &accessUnit, const SPS *sps); - int xWritePPS (AccessUnit &accessUnit, const PPS *pps); - int xWriteAPS(AccessUnit &accessUnit, APS *aps); + int xWriteDPS (AccessUnit &accessUnit, const DPS *dps); + int xWriteSPS( AccessUnit &accessUnit, const SPS *sps, const int layerId = 0 ); + int xWritePPS( AccessUnit &accessUnit, const PPS *pps, const SPS *sps, const int layerId = 0 ); + int xWriteAPS( AccessUnit &accessUnit, APS *aps, const int layerId, const bool isPrefixNUT ); int xWriteParameterSets (AccessUnit &accessUnit, Slice *slice, const bool bSeqFirst); + int xWritePicHeader( AccessUnit &accessUnit, PicHeader *picHeader ); void applyDeblockingFilterMetric( Picture* pcPic, uint32_t uiNumSlices ); #if W0038_DB_OPT void applyDeblockingFilterParameterSelection( Picture* pcPic, const uint32_t numSlices, const int gopID ); #endif + void xCreateExplicitReferencePictureSetFromReference( Slice* slice, PicList& rcListPic, const ReferencePictureList *rpl0, const ReferencePictureList *rpl1 ); };// END CLASS DEFINITION EncGOP //! \} diff --git a/source/Lib/EncoderLib/EncHRD.cpp b/source/Lib/EncoderLib/EncHRD.cpp new file mode 100644 index 0000000000000000000000000000000000000000..342e9a747a550583d5e7400289f140c7e23ee167 --- /dev/null +++ b/source/Lib/EncoderLib/EncHRD.cpp @@ -0,0 +1,190 @@ +/* The copyright in this software is being made available under the BSD +* License, included below. This software may be subject to other third party +* and contributor rights, including patent rights, and no such rights are +* granted under this license. +* +* Copyright (c) 2010-2020, ITU/ISO/IEC +* All rights reserved. +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are met: +* +* * Redistributions of source code must retain the above copyright notice, +* this list of conditions and the following disclaimer. +* * Redistributions in binary form must reproduce the above copyright notice, +* this list of conditions and the following disclaimer in the documentation +* and/or other materials provided with the distribution. +* * Neither the name of the ITU/ISO/IEC nor the names of its contributors may +* be used to endorse or promote products derived from this software without +* specific prior written permission. +* +* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS +* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +* THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "EncHRD.h" + +#if U0132_TARGET_BITS_SATURATION + +// calculate scale value of bitrate and initial delay +int EncHRD::xCalcScale(int x) +{ + if (x==0) + { + return 0; + } + uint32_t mask = 0xffffffff; + int scaleValue = 32; + + while ((x&mask) != 0) + { + scaleValue--; + mask = (mask >> 1); + } + + return scaleValue; +} +#endif + +void EncHRD::initHRDParameters (EncCfg* encCfg) +{ + bool useSubCpbParams = encCfg->getNoPicPartitionFlag() == false; + int bitRate = encCfg->getTargetBitrate(); +# if U0132_TARGET_BITS_SATURATION + int cpbSize = encCfg->getCpbSize(); + CHECK(!(cpbSize!=0), "Unspecified error"); // CPB size may not be equal to zero. ToDo: have a better default and check for level constraints + if( !encCfg->getHrdParametersPresentFlag() && !encCfg->getCpbSaturationEnabled() ) +#else + if( !encCfg->getHrdParametersPresentFlag() ) +#endif + { + return; + } + + m_timingInfo.setTimingInfoPresentFlag( true ); + switch( encCfg->getFrameRate() ) + { + case 24: + m_timingInfo.setNumUnitsInTick( 1125000 ); m_timingInfo.setTimeScale ( 27000000 ); + break; + case 25: + m_timingInfo.setNumUnitsInTick( 1080000 ); m_timingInfo.setTimeScale ( 27000000 ); + break; + case 30: + m_timingInfo.setNumUnitsInTick( 900900 ); m_timingInfo.setTimeScale ( 27000000 ); + break; + case 50: + m_timingInfo.setNumUnitsInTick( 540000 ); m_timingInfo.setTimeScale ( 27000000 ); + break; + case 60: + m_timingInfo.setNumUnitsInTick( 450450 ); m_timingInfo.setTimeScale ( 27000000 ); + break; + default: + m_timingInfo.setNumUnitsInTick( 1001 ); m_timingInfo.setTimeScale ( 60000 ); + break; + } + + if (encCfg->getTemporalSubsampleRatio()>1) + { + uint32_t temporalSubsampleRatio = encCfg->getTemporalSubsampleRatio(); + if ( double(m_timingInfo.getNumUnitsInTick()) * temporalSubsampleRatio > std::numeric_limits<uint32_t>::max() ) + { + m_timingInfo.setTimeScale( m_timingInfo.getTimeScale() / temporalSubsampleRatio ); + } + else + { + m_timingInfo.setNumUnitsInTick( m_timingInfo.getNumUnitsInTick() * temporalSubsampleRatio ); + } + } + bool rateCnt = ( bitRate > 0 ); + m_hrdParams.setNalHrdParametersPresentFlag( rateCnt ); + m_hrdParams.setVclHrdParametersPresentFlag( rateCnt ); + useSubCpbParams &= ( m_hrdParams.getNalHrdParametersPresentFlag() || m_hrdParams.getVclHrdParametersPresentFlag() ); + m_hrdParams.setGeneralDecodingUnitHrdParamsPresentFlag( useSubCpbParams ); + + if( m_hrdParams.getGeneralDecodingUnitHrdParamsPresentFlag() ) + { + m_hrdParams.setTickDivisorMinus2( 100 - 2 ); + } + +#if U0132_TARGET_BITS_SATURATION + if (xCalcScale(bitRate) <= 6) + { + m_hrdParams.setBitRateScale(0); + } + else + { + m_hrdParams.setBitRateScale(xCalcScale(bitRate) - 6); + } + + if (xCalcScale(cpbSize) <= 4) + { + m_hrdParams.setCpbSizeScale(0); + } + else + { + m_hrdParams.setCpbSizeScale(xCalcScale(cpbSize) - 4); + } +#else + m_hrdParams.setBitRateScale( 4 ); // in units of 2^( 6 + 4 ) = 1,024 bps + m_hrdParams.setCpbSizeScale( 6 ); // in units of 2^( 4 + 6 ) = 1,024 bit +#endif + + m_hrdParams.setCpbSizeDuScale( 6 ); // in units of 2^( 4 + 6 ) = 1,024 bit + + + // Note: parameters for all temporal layers are initialized with the same values + int i, j; + uint32_t bitrateValue, cpbSizeValue; + uint32_t duCpbSizeValue; + uint32_t duBitRateValue = 0; + + for( i = 0; i < MAX_TLAYER; i ++ ) + { + m_hrdParams.setFixedPicRateFlag( i, 1 ); + m_hrdParams.setPicDurationInTcMinus1( i, 0 ); + m_hrdParams.setLowDelayHrdFlag( i, 0 ); + m_hrdParams.setCpbCntMinus1( i, 0 ); + + //! \todo check for possible PTL violations + // BitRate[ i ] = ( bit_rate_value_minus1[ i ] + 1 ) * 2^( 6 + bit_rate_scale ) + bitrateValue = bitRate / (1 << (6 + m_hrdParams.getBitRateScale()) ); // bitRate is in bits, so it needs to be scaled down + // CpbSize[ i ] = ( cpb_size_value_minus1[ i ] + 1 ) * 2^( 4 + cpb_size_scale ) +#if U0132_TARGET_BITS_SATURATION + cpbSizeValue = cpbSize / (1 << (4 + m_hrdParams.getCpbSizeScale()) ); // using bitRate results in 1 second CPB size +#else + cpbSizeValue = bitRate / (1 << (4 + m_hrdParams.getCpbSizeScale()) ); // using bitRate results in 1 second CPB size +#endif + + + // DU CPB size could be smaller (i.e. bitrateValue / number of DUs), but we don't know + // in how many DUs the slice segment settings will result + duCpbSizeValue = bitrateValue; + duBitRateValue = cpbSizeValue; + + for( j = 0; j < ( m_hrdParams.getCpbCntMinus1( i ) + 1 ); j ++ ) + { + m_hrdParams.setBitRateValueMinus1( i, j, 0, ( bitrateValue - 1 ) ); + m_hrdParams.setCpbSizeValueMinus1( i, j, 0, ( cpbSizeValue - 1 ) ); + m_hrdParams.setDuCpbSizeValueMinus1( i, j, 0, ( duCpbSizeValue - 1 ) ); + m_hrdParams.setDuBitRateValueMinus1( i, j, 0, ( duBitRateValue - 1 ) ); + m_hrdParams.setCbrFlag( i, j, 0, false ); + + m_hrdParams.setBitRateValueMinus1( i, j, 1, ( bitrateValue - 1) ); + m_hrdParams.setCpbSizeValueMinus1( i, j, 1, ( cpbSizeValue - 1 ) ); + m_hrdParams.setDuCpbSizeValueMinus1( i, j, 1, ( duCpbSizeValue - 1 ) ); + m_hrdParams.setDuBitRateValueMinus1( i, j, 1, ( duBitRateValue - 1 ) ); + m_hrdParams.setCbrFlag( i, j, 1, false ); + } + } +} + diff --git a/source/Lib/EncoderLib/EncHRD.h b/source/Lib/EncoderLib/EncHRD.h new file mode 100644 index 0000000000000000000000000000000000000000..1aa7c695f74ec6e08318ca22d23a44d044138fee --- /dev/null +++ b/source/Lib/EncoderLib/EncHRD.h @@ -0,0 +1,55 @@ +/* The copyright in this software is being made available under the BSD +* License, included below. This software may be subject to other third party +* and contributor rights, including patent rights, and no such rights are +* granted under this license. +* +* Copyright (c) 2010-2020, ITU/ISO/IEC +* All rights reserved. +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are met: +* +* * Redistributions of source code must retain the above copyright notice, +* this list of conditions and the following disclaimer. +* * Redistributions in binary form must reproduce the above copyright notice, +* this list of conditions and the following disclaimer in the documentation +* and/or other materials provided with the distribution. +* * Neither the name of the ITU/ISO/IEC nor the names of its contributors may +* be used to endorse or promote products derived from this software without +* specific prior written permission. +* +* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS +* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +* THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +#ifndef __ENCHRD__ +#define __ENCHRD__ + +#include "CommonLib/Common.h" +#include "CommonLib/HRD.h" +#include "CommonLib/Slice.h" +#include "EncCfg.h" + +class EncHRD:public HRD +{ +public: + void initHRDParameters (EncCfg* encCfg); + +protected: + // calculate scale value of bitrate and initial delay + int xCalcScale(int x); + +}; + + +#endif // __ENCHRD__ diff --git a/source/Lib/EncoderLib/EncLib.cpp b/source/Lib/EncoderLib/EncLib.cpp index dcc383dc2ce7716b14edc62e0450bbb045d32ef4..4df4089a5c102e6b8800eb2e689c8f9caab4d438 100644 --- a/source/Lib/EncoderLib/EncLib.cpp +++ b/source/Lib/EncoderLib/EncLib.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -34,7 +34,6 @@ /** \file EncLib.cpp \brief encoder class */ - #include "EncLib.h" #include "EncModeCtrl.h" @@ -47,6 +46,7 @@ #if ENABLE_SPLIT_PARALLELISM #include <omp.h> #endif +#include "EncLibCommon.h" //! \ingroup EncoderLib //! \{ @@ -55,16 +55,19 @@ // Constructor / destructor / create / destroy // ==================================================================================================================== - - -EncLib::EncLib() - : m_spsMap( MAX_NUM_SPS ) - , m_ppsMap( MAX_NUM_PPS ) - , m_apsMap( MAX_NUM_APS ) +EncLib::EncLib( EncLibCommon* encLibCommon ) + : m_cListPic( encLibCommon->getPictureBuffer() ) + , m_cEncALF( encLibCommon->getApsIdStart() ) + , m_spsMap( encLibCommon->getSpsMap() ) + , m_ppsMap( encLibCommon->getPpsMap() ) + , m_apsMap( encLibCommon->getApsMap() ) , m_AUWriterIf( nullptr ) #if JVET_J0090_MEMORY_BANDWITH_MEASURE , m_cacheModel() #endif + , m_lmcsAPS(nullptr) + , m_scalinglistAPS( nullptr ) + , m_doPlt( true ) { m_iPOCLast = -1; m_iNumPicRcvd = 0; @@ -75,30 +78,33 @@ EncLib::EncLib() #if ENABLE_SIMD_OPT_BUFFER g_pelBufOP.initPelBufOpsX86(); #endif + +#if JVET_O0756_CALCULATE_HDRMETRICS + m_metricTime = std::chrono::milliseconds(0); +#endif + + memset(m_apss, 0, sizeof(m_apss)); + + m_layerId = NOT_VALID; + m_picIdInGOP = NOT_VALID; } EncLib::~EncLib() { } -void EncLib::create () +void EncLib::create( const int layerId ) { - // initialize global variables - initROM(); - TComHash::initBlockSizeToIndex(); + m_layerId = layerId; m_iPOCLast = m_compositeRefEnabled ? -2 : -1; // create processing unit classes m_cGOPEncoder. create( ); - m_cSliceEncoder. create( getSourceWidth(), getSourceHeight(), m_chromaFormatIDC, m_maxCUWidth, m_maxCUHeight, m_maxTotalCUDepth ); -#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM +#if ENABLE_SPLIT_PARALLELISM #if ENABLE_SPLIT_PARALLELISM m_numCuEncStacks = m_numSplitThreads == 1 ? 1 : NUM_RESERVERD_SPLIT_JOBS; #else m_numCuEncStacks = 1; #endif -#if ENABLE_WPP_PARALLELISM - m_numCuEncStacks *= ( m_numWppThreads + m_numWppExtraLines ); -#endif m_cCuEncoder = new EncCu [m_numCuEncStacks]; m_cInterSearch = new InterSearch [m_numCuEncStacks]; @@ -118,32 +124,15 @@ void EncLib::create () #if JVET_J0090_MEMORY_BANDWITH_MEASURE m_cInterSearch.cacheAssign( &m_cacheModel ); #endif - const uint32_t widthInCtus = (getSourceWidth() + m_maxCUWidth - 1) / m_maxCUWidth; - const uint32_t heightInCtus = (getSourceHeight() + m_maxCUHeight - 1) / m_maxCUHeight; - const uint32_t numCtuInFrame = widthInCtus * heightInCtus; - - if (m_bUseSAO) - { - m_cEncSAO.create( getSourceWidth(), getSourceHeight(), m_chromaFormatIDC, m_maxCUWidth, m_maxCUHeight, m_maxTotalCUDepth, m_log2SaoOffsetScale[CHANNEL_TYPE_LUMA], m_log2SaoOffsetScale[CHANNEL_TYPE_CHROMA] ); - m_cEncSAO.createEncData(getSaoCtuBoundary(), numCtuInFrame); - } m_cLoopFilter.create( m_maxTotalCUDepth ); - if ( !m_bLoopFilterDisable ) - { - m_cLoopFilter.initEncPicYuvBuffer( m_chromaFormatIDC, getSourceWidth(), getSourceHeight() ); - } - if( m_alf ) - { - m_cEncALF.create( getSourceWidth(), getSourceHeight(), m_chromaFormatIDC, m_maxCUWidth, m_maxCUHeight, m_maxTotalCUDepth, m_bitDepth, m_inputBitDepth ); - } -#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM +#if ENABLE_SPLIT_PARALLELISM m_cReshaper = new EncReshape[m_numCuEncStacks]; #endif - if (m_lumaReshapeEnable) + if (m_lmcsEnabled) { -#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM +#if ENABLE_SPLIT_PARALLELISM for (int jId = 0; jId < m_numCuEncStacks; jId++) { m_cReshaper[jId].createEnc(getSourceWidth(), getSourceHeight(), m_maxCUWidth, m_maxCUHeight, m_bitDepth[COMPONENT_Y]); @@ -165,7 +154,7 @@ void EncLib::destroy () // destroy processing unit classes m_cGOPEncoder. destroy(); m_cSliceEncoder. destroy(); -#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM +#if ENABLE_SPLIT_PARALLELISM for( int jId = 0; jId < m_numCuEncStacks; jId++ ) { m_cCuEncoder[jId].destroy(); @@ -181,7 +170,7 @@ void EncLib::destroy () m_cEncSAO. destroy(); m_cLoopFilter. destroy(); m_cRateCtrl. destroy(); -#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM +#if ENABLE_SPLIT_PARALLELISM for (int jId = 0; jId < m_numCuEncStacks; jId++) { m_cReshaper[jId]. destroy(); @@ -189,7 +178,7 @@ void EncLib::destroy () #else m_cReshaper. destroy(); #endif -#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM +#if ENABLE_SPLIT_PARALLELISM for( int jId = 0; jId < m_numCuEncStacks; jId++ ) { m_cInterSearch[jId]. destroy(); @@ -200,7 +189,7 @@ void EncLib::destroy () m_cIntraSearch. destroy(); #endif -#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM +#if ENABLE_SPLIT_PARALLELISM delete[] m_cCuEncoder; delete[] m_cInterSearch; delete[] m_cIntraSearch; @@ -210,11 +199,6 @@ void EncLib::destroy () delete[] m_CtxCache; #endif - - - - // destroy ROM - destroyROM(); return; } @@ -223,14 +207,18 @@ void EncLib::init( bool isFieldCoding, AUWriterIf* auWriterIf ) m_AUWriterIf = auWriterIf; SPS &sps0=*(m_spsMap.allocatePS(0)); // NOTE: implementations that use more than 1 SPS need to be aware of activation issues. - PPS &pps0=*(m_ppsMap.allocatePS(0)); - APS &aps0=*(m_apsMap.allocatePS(0)); + PPS &pps0 = *( m_ppsMap.allocatePS( m_layerId ) ); + APS &aps0 = *( m_apsMap.allocatePS( SCALING_LIST_APS ) ); + aps0.setAPSId( 0 ); + aps0.setAPSType( SCALING_LIST_APS ); // initialize SPS - xInitSPS(sps0); -#if HEVC_VPS + xInitSPS( sps0, m_cVPS ); xInitVPS(m_cVPS, sps0); -#endif + + int dpsId = getDecodingParameterSetEnabled() ? 1 : 0; + xInitDPS(m_dps, sps0, dpsId); + sps0.setDecodingParameterSetId(m_dps.getDecodingParameterSetId()); #if ENABLE_SPLIT_PARALLELISM if( omp_get_dynamic() ) @@ -240,7 +228,7 @@ void EncLib::init( bool isFieldCoding, AUWriterIf* auWriterIf ) omp_set_nested( true ); #endif - if (getUseCompositeRef()) + if (getUseCompositeRef() || getDependentRAPIndicationSEIEnabled()) { sps0.setLongTermRefsPresent(true); } @@ -248,10 +236,10 @@ void EncLib::init( bool isFieldCoding, AUWriterIf* auWriterIf ) #if U0132_TARGET_BITS_SATURATION if (m_RCCpbSaturationEnabled) { - m_cRateCtrl.initHrdParam(sps0.getVuiParameters()->getHrdParameters(), m_iFrameRate, m_RCInitialCpbFullness); + m_cRateCtrl.initHrdParam(sps0.getHrdParameters(), m_iFrameRate, m_RCInitialCpbFullness); } #endif -#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM +#if ENABLE_SPLIT_PARALLELISM for( int jId = 0; jId < m_numCuEncStacks; jId++ ) { m_cRdCost[jId].setCostMode ( m_costMode ); @@ -261,10 +249,50 @@ void EncLib::init( bool isFieldCoding, AUWriterIf* auWriterIf ) #endif // initialize PPS + pps0.setPicWidthInLumaSamples( m_iSourceWidth ); + pps0.setPicHeightInLumaSamples( m_iSourceHeight ); + pps0.setConformanceWindow( m_conformanceWindow ); xInitPPS(pps0, sps0); // initialize APS - xInitAPS(aps0); - xInitRPS(sps0, isFieldCoding); + xInitRPL(sps0, isFieldCoding); + + if( m_rprEnabled ) + { + PPS &pps = *( m_ppsMap.allocatePS( ENC_PPS_ID_RPR ) ); + Window& inputScalingWindow = pps0.getScalingWindow(); +#if JVET_Q0487_SCALING_WINDOW_ISSUES + int scaledWidth = int( ( pps0.getPicWidthInLumaSamples() - SPS::getWinUnitX( sps0.getChromaFormatIdc() ) * ( inputScalingWindow.getWindowLeftOffset() + inputScalingWindow.getWindowRightOffset() ) ) / m_scalingRatioHor ); +#else + int scaledWidth = int( ( pps0.getPicWidthInLumaSamples() - inputScalingWindow.getWindowLeftOffset() - inputScalingWindow.getWindowRightOffset() ) / m_scalingRatioHor ); +#endif + int minSizeUnit = std::max(8, (int)(sps0.getMaxCUHeight() >> (sps0.getMaxCodingDepth() - 1))); + int temp = scaledWidth / minSizeUnit; + int width = ( scaledWidth - ( temp * minSizeUnit) > 0 ? temp + 1 : temp ) * minSizeUnit; + +#if JVET_Q0487_SCALING_WINDOW_ISSUES + int scaledHeight = int( ( pps0.getPicHeightInLumaSamples() - SPS::getWinUnitY( sps0.getChromaFormatIdc() ) * ( inputScalingWindow.getWindowTopOffset() + inputScalingWindow.getWindowBottomOffset() ) ) / m_scalingRatioVer ); +#else + int scaledHeight = int( ( pps0.getPicHeightInLumaSamples() - inputScalingWindow.getWindowTopOffset() - inputScalingWindow.getWindowBottomOffset() ) / m_scalingRatioVer ); +#endif + temp = scaledHeight / minSizeUnit; + int height = ( scaledHeight - ( temp * minSizeUnit) > 0 ? temp + 1 : temp ) * minSizeUnit; + + pps.setPicWidthInLumaSamples( width ); + pps.setPicHeightInLumaSamples( height ); + + Window conformanceWindow; + conformanceWindow.setWindow( 0, ( width - scaledWidth ) / SPS::getWinUnitX( sps0.getChromaFormatIdc() ), 0, ( height - scaledHeight ) / SPS::getWinUnitY( sps0.getChromaFormatIdc() ) ); + pps.setConformanceWindow( conformanceWindow ); + + Window scalingWindow; + scalingWindow.setWindow( 0, width - scaledWidth, 0, height - scaledHeight ); + pps.setScalingWindow( scalingWindow ); + + // disable picture partitioning for scaled RPR pictures (slice/tile config only provided for the original resolution) + m_noPicPartitionFlag = true; + + xInitPPS( pps, sps0 ); // will allocate memory for and initialize pps.pcv inside + } #if ER_CHROMA_QP_WCG_PPS if (m_wcgChromaQpControl.isEnabled()) @@ -279,11 +307,12 @@ void EncLib::init( bool isFieldCoding, AUWriterIf* auWriterIf ) xInitPPS(pps2, sps0); xInitPPSforLT(pps2); } + xInitPicHeader(m_picHeader, sps0, pps0); // initialize processing unit classes m_cGOPEncoder. init( this ); m_cSliceEncoder.init( this, sps0 ); -#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM +#if ENABLE_SPLIT_PARALLELISM for( int jId = 0; jId < m_numCuEncStacks; jId++ ) { // precache a few objects @@ -297,19 +326,14 @@ void EncLib::init( bool isFieldCoding, AUWriterIf* auWriterIf ) // initialize transform & quantization class m_cTrQuant[jId].init( jId == 0 ? nullptr : m_cTrQuant[0].getQuant(), -#if MAX_TB_SIZE_SIGNALLING 1 << m_log2MaxTbSize, -#else - MAX_TB_SIZEY, -#endif m_useRDOQ, m_useRDOQTS, #if T0196_SELECTIVE_RDOQ m_useSelectiveRDOQ, #endif - true, - m_useTransformSkipFast + true ); // initialize encoder search class @@ -320,6 +344,7 @@ void EncLib::init( bool isFieldCoding, AUWriterIf* auWriterIf ) cabacEstimator, getCtxCache( jId ), m_maxCUWidth, m_maxCUHeight, m_maxTotalCUDepth , &m_cReshaper[jId] + , sps0.getBitDepth(CHANNEL_TYPE_LUMA) ); m_cInterSearch[jId].init( this, &m_cTrQuant[jId], @@ -339,18 +364,13 @@ void EncLib::init( bool isFieldCoding, AUWriterIf* auWriterIf ) // initialize transform & quantization class m_cTrQuant.init( nullptr, -#if MAX_TB_SIZE_SIGNALLING 1 << m_log2MaxTbSize, -#else - MAX_TB_SIZEY, -#endif m_useRDOQ, m_useRDOQTS, #if T0196_SELECTIVE_RDOQ m_useSelectiveRDOQ, #endif - true, - m_useTransformSkipFast + true ); // initialize encoder search class @@ -361,6 +381,7 @@ void EncLib::init( bool isFieldCoding, AUWriterIf* auWriterIf ) cabacEstimator, getCtxCache(), m_maxCUWidth, m_maxCUHeight, m_maxTotalCUDepth , &m_cReshaper + , sps0.getBitDepth(CHANNEL_TYPE_LUMA) ); m_cInterSearch.init( this, &m_cTrQuant, @@ -378,47 +399,45 @@ void EncLib::init( bool isFieldCoding, AUWriterIf* auWriterIf ) m_iMaxRefPicNum = 0; -#if HEVC_USE_SCALING_LISTS #if ER_CHROMA_QP_WCG_PPS if( m_wcgChromaQpControl.isEnabled() ) { - xInitScalingLists( sps0, *m_ppsMap.getPS(1) ); - xInitScalingLists( sps0, pps0 ); + xInitScalingLists( sps0, *m_apsMap.getPS( 1 ) ); + xInitScalingLists( sps0, aps0 ); } else #endif { - xInitScalingLists( sps0, pps0 ); + xInitScalingLists( sps0, aps0 ); + } + if( m_rprEnabled ) + { + xInitScalingLists( sps0, *m_apsMap.getPS( ENC_PPS_ID_RPR ) ); } -#endif -#if ENABLE_WPP_PARALLELISM - m_entropyCodingSyncContextStateVec.resize( pps0.pcv->heightInCtus ); -#endif if (getUseCompositeRef()) { Picture *picBg = new Picture; - picBg->create(sps0.getChromaFormatIdc(), Size(sps0.getPicWidthInLumaSamples(), sps0.getPicHeightInLumaSamples()), sps0.getMaxCUWidth(), sps0.getMaxCUWidth() + 16, false); + picBg->create( sps0.getChromaFormatIdc(), Size( pps0.getPicWidthInLumaSamples(), pps0.getPicHeightInLumaSamples() ), sps0.getMaxCUWidth(), sps0.getMaxCUWidth() + 16, false, m_layerId ); picBg->getRecoBuf().fill(0); - picBg->finalInit(sps0, pps0, aps0); + picBg->finalInit( &m_cVPS, sps0, pps0, &m_picHeader, m_apss, m_lmcsAPS, m_scalinglistAPS ); picBg->allocateNewSlice(); picBg->createSpliceIdx(pps0.pcv->sizeInCtus); m_cGOPEncoder.setPicBg(picBg); Picture *picOrig = new Picture; - picOrig->create(sps0.getChromaFormatIdc(), Size(sps0.getPicWidthInLumaSamples(), sps0.getPicHeightInLumaSamples()), sps0.getMaxCUWidth(), sps0.getMaxCUWidth() + 16, false); + picOrig->create( sps0.getChromaFormatIdc(), Size( pps0.getPicWidthInLumaSamples(), pps0.getPicHeightInLumaSamples() ), sps0.getMaxCUWidth(), sps0.getMaxCUWidth() + 16, false, m_layerId ); picOrig->getOrigBuf().fill(0); m_cGOPEncoder.setPicOrig(picOrig); } } -#if HEVC_USE_SCALING_LISTS -void EncLib::xInitScalingLists(SPS &sps, PPS &pps) +void EncLib::xInitScalingLists( SPS &sps, APS &aps ) { // Initialise scaling lists // The encoder will only use the SPS scaling lists. The PPS will never be marked present. const int maxLog2TrDynamicRange[MAX_NUM_CHANNEL_TYPE] = { - sps.getMaxLog2TrDynamicRange(CHANNEL_TYPE_LUMA), - sps.getMaxLog2TrDynamicRange(CHANNEL_TYPE_CHROMA) + sps.getMaxLog2TrDynamicRange(CHANNEL_TYPE_LUMA), + sps.getMaxLog2TrDynamicRange(CHANNEL_TYPE_CHROMA) }; Quant* quant = getTrQuant()->getQuant(); @@ -427,71 +446,60 @@ void EncLib::xInitScalingLists(SPS &sps, PPS &pps) { quant->setFlatScalingList(maxLog2TrDynamicRange, sps.getBitDepths()); quant->setUseScalingList(false); -#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM +#if ENABLE_SPLIT_PARALLELISM for( int jId = 1; jId < m_numCuEncStacks; jId++ ) { getTrQuant( jId )->getQuant()->setFlatScalingList( maxLog2TrDynamicRange, sps.getBitDepths() ); getTrQuant( jId )->getQuant()->setUseScalingList( false ); } #endif - sps.setScalingListPresentFlag(false); - pps.setScalingListPresentFlag(false); } else if(getUseScalingListId() == SCALING_LIST_DEFAULT) { - sps.getScalingList().setDefaultScalingList (); - sps.setScalingListPresentFlag(false); - pps.setScalingListPresentFlag(false); - - quant->setScalingList(&(sps.getScalingList()), maxLog2TrDynamicRange, sps.getBitDepths()); + aps.getScalingList().setDefaultScalingList (); + quant->setScalingList( &( aps.getScalingList() ), maxLog2TrDynamicRange, sps.getBitDepths() ); quant->setUseScalingList(true); -#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM +#if ENABLE_SPLIT_PARALLELISM for( int jId = 1; jId < m_numCuEncStacks; jId++ ) { getTrQuant( jId )->getQuant()->setUseScalingList( true ); } + aps.getScalingList().setDisableScalingMatrixForLfnstBlks(getDisableScalingMatrixForLfnstBlks()); #endif } else if(getUseScalingListId() == SCALING_LIST_FILE_READ) { - sps.getScalingList().setDefaultScalingList (); - if(sps.getScalingList().xParseScalingList(getScalingListFileName())) + aps.getScalingList().setDefaultScalingList(); + CHECK( aps.getScalingList().xParseScalingList( getScalingListFileName() ), "Error Parsing Scaling List Input File" ); + aps.getScalingList().checkDcOfMatrix(); + if( aps.getScalingList().isNotDefaultScalingList() == false ) { - THROW( "parse scaling list"); + setUseScalingListId( SCALING_LIST_DEFAULT ); } - sps.getScalingList().checkDcOfMatrix(); - sps.setScalingListPresentFlag(sps.getScalingList().checkDefaultScalingList()); - pps.setScalingListPresentFlag(false); - - quant->setScalingList(&(sps.getScalingList()), maxLog2TrDynamicRange, sps.getBitDepths()); + quant->setScalingList( &( aps.getScalingList() ), maxLog2TrDynamicRange, sps.getBitDepths() ); quant->setUseScalingList(true); -#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM +#if ENABLE_SPLIT_PARALLELISM for( int jId = 1; jId < m_numCuEncStacks; jId++ ) { getTrQuant( jId )->getQuant()->setUseScalingList( true ); } #endif + aps.getScalingList().setDisableScalingMatrixForLfnstBlks(getDisableScalingMatrixForLfnstBlks()); } else { THROW("error : ScalingList == " << getUseScalingListId() << " not supported\n"); } - if (getUseScalingListId() != SCALING_LIST_OFF) + if( getUseScalingListId() == SCALING_LIST_FILE_READ ) { // Prepare delta's: - for(uint32_t sizeId = 0; sizeId < SCALING_LIST_SIZE_NUM; sizeId++) + for (uint32_t scalingListId = 0; scalingListId < 28; scalingListId++) { - const int predListStep = (sizeId == SCALING_LIST_32x32? (SCALING_LIST_NUM/NUMBER_OF_PREDICTION_MODES) : 1); // if 32x32, skip over chroma entries. - - for(uint32_t listId = 0; listId < SCALING_LIST_NUM; listId+=predListStep) - { - sps.getScalingList().checkPredMode( sizeId, listId ); - } + aps.getScalingList().checkPredMode(scalingListId); } } } -#endif void EncLib::xInitPPSforLT(PPS& pps) { @@ -524,48 +532,39 @@ void EncLib::deletePicBuffer() delete pcPic; pcPic = NULL; } + + m_cListPic.clear(); } -/** - - Application has picture buffer list with size of GOP + 1 - - Picture buffer list acts like as ring buffer - - End of the list has the latest picture - . - \param flush cause encoder to encode a partial GOP - \param pcPicYuvOrg original YUV picture - \param pcPicYuvTrueOrg - \param snrCSC - \retval rcListPicYuvRecOut list of reconstruction YUV pictures - \retval accessUnitsOut list of output access units - \retval iNumEncoded number of encoded pictures - */ -void EncLib::encode( bool flush, PelStorage* pcPicYuvOrg, PelStorage* cPicYuvTrueOrg, const InputColourSpaceConversion snrCSC, std::list<PelUnitBuf*>& rcListPicYuvRecOut, - int& iNumEncoded ) +bool EncLib::encodePrep( bool flush, PelStorage* pcPicYuvOrg, PelStorage* cPicYuvTrueOrg, const InputColourSpaceConversion snrCSC, std::list<PelUnitBuf*>& rcListPicYuvRecOut, int& iNumEncoded ) { - if (m_compositeRefEnabled && m_cGOPEncoder.getPicBg()->getSpliceFull() && m_iPOCLast >= 10 && m_iNumPicRcvd == 0 && m_cGOPEncoder.getEncodedLTRef() == false) + if( m_compositeRefEnabled && m_cGOPEncoder.getPicBg()->getSpliceFull() && m_iPOCLast >= 10 && m_iNumPicRcvd == 0 && m_cGOPEncoder.getEncodedLTRef() == false ) { Picture* picCurr = NULL; - xGetNewPicBuffer(rcListPicYuvRecOut, picCurr, 2); - const PPS *pps = m_ppsMap.getPS(2); - const SPS *sps = m_spsMap.getPS(pps->getSPSId()); + xGetNewPicBuffer( rcListPicYuvRecOut, picCurr, 2 ); + const PPS *pps = m_ppsMap.getPS( 2 ); + const SPS *sps = m_spsMap.getPS( pps->getSPSId() ); - picCurr->M_BUFS(0, PIC_ORIGINAL).copyFrom(m_cGOPEncoder.getPicBg()->getRecoBuf()); - APS *aps = m_apsMap.getPS(0); - picCurr->finalInit(*sps, *pps, *aps); + picCurr->M_BUFS( 0, PIC_ORIGINAL ).copyFrom( m_cGOPEncoder.getPicBg()->getRecoBuf() ); + picCurr->finalInit( &m_cVPS, *sps, *pps, &m_picHeader, m_apss, m_lmcsAPS, m_scalinglistAPS ); picCurr->poc = m_iPOCLast - 1; m_iPOCLast -= 2; - if (getUseAdaptiveQP()) + if( getUseAdaptiveQP() ) { - AQpPreanalyzer::preanalyze(picCurr); + AQpPreanalyzer::preanalyze( picCurr ); } - if (m_RCEnableRateControl) + if( m_RCEnableRateControl ) { - m_cRateCtrl.initRCGOP(m_iNumPicRcvd); + m_cRateCtrl.initRCGOP( m_iNumPicRcvd ); } - m_cGOPEncoder.compressGOP(m_iPOCLast, m_iNumPicRcvd, m_cListPic, rcListPicYuvRecOut, - false, false, snrCSC, m_printFrameMSE, true); - m_cGOPEncoder.setEncodedLTRef(true); - if (m_RCEnableRateControl) + + m_cGOPEncoder.compressGOP( m_iPOCLast, m_iNumPicRcvd, m_cListPic, rcListPicYuvRecOut, false, false, snrCSC, m_printFrameMSE, true, 0 ); + +#if JVET_O0756_CALCULATE_HDRMETRICS + m_metricTime = m_cGOPEncoder.getMetricTime(); +#endif + m_cGOPEncoder.setEncodedLTRef( true ); + if( m_RCEnableRateControl ) { m_cRateCtrl.destroyRCGOP(); } @@ -573,71 +572,160 @@ void EncLib::encode( bool flush, PelStorage* pcPicYuvOrg, PelStorage* cPicYuvTru iNumEncoded = 0; m_iNumPicRcvd = 0; } + //PROF_ACCUM_AND_START_NEW_SET( getProfilerPic(), P_GOP_LEVEL ); - if (pcPicYuvOrg != NULL) + if( pcPicYuvOrg != NULL ) { // get original YUV Picture* pcPicCurr = NULL; + int ppsID = -1; // Use default PPS ID #if ER_CHROMA_QP_WCG_PPS - int ppsID=-1; // Use default PPS ID - if (getWCGChromaQPControl().isEnabled()) + if( getWCGChromaQPControl().isEnabled() ) { - ppsID = getdQPs()[m_iPOCLast / (m_compositeRefEnabled ? 2 : 1) + 1]; - ppsID+=(getSwitchPOC() != -1 && (m_iPOCLast+1 >= getSwitchPOC())?1:0); + ppsID = getdQPs()[m_iPOCLast / ( m_compositeRefEnabled ? 2 : 1 ) + 1]; + ppsID += ( getSwitchPOC() != -1 && ( m_iPOCLast + 1 >= getSwitchPOC() ) ? 1 : 0 ); } - xGetNewPicBuffer( rcListPicYuvRecOut, - pcPicCurr, ppsID ); -#else - xGetNewPicBuffer( rcListPicYuvRecOut, - pcPicCurr, -1 ); // Uses default PPS ID. However, could be modified, for example, to use a PPS ID as a function of POC (m_iPOCLast+1) #endif + if( m_rprEnabled && m_uiIntraPeriod == -1 ) { - const PPS *pPPS=(ppsID<0) ? m_ppsMap.getFirstPS() : m_ppsMap.getPS(ppsID); - const SPS *pSPS=m_spsMap.getPS(pPPS->getSPSId()); + const int poc = m_iPOCLast + ( m_compositeRefEnabled ? 2 : 1 ); - pcPicCurr->M_BUFS( 0, PIC_ORIGINAL ).swap( *pcPicYuvOrg ); - pcPicCurr->M_BUFS( 0, PIC_TRUE_ORIGINAL ).swap(*cPicYuvTrueOrg ); + if( poc / m_switchPocPeriod % 2 ) + { + ppsID = ENC_PPS_ID_RPR; + } + else + { + ppsID = 0; + } + } + + if( m_cVPS.getMaxLayers() > 1 ) + { + ppsID = m_layerId; + } + + xGetNewPicBuffer( rcListPicYuvRecOut, pcPicCurr, ppsID ); + + const PPS *pPPS = ( ppsID < 0 ) ? m_ppsMap.getFirstPS() : m_ppsMap.getPS( ppsID ); + const SPS *pSPS = m_spsMap.getPS( pPPS->getSPSId() ); + + if( m_rprEnabled ) + { + pcPicCurr->M_BUFS( 0, PIC_ORIGINAL_INPUT ).getBuf( COMPONENT_Y ).copyFrom( pcPicYuvOrg->getBuf( COMPONENT_Y ) ); + pcPicCurr->M_BUFS( 0, PIC_ORIGINAL_INPUT ).getBuf( COMPONENT_Cb ).copyFrom( pcPicYuvOrg->getBuf( COMPONENT_Cb ) ); + pcPicCurr->M_BUFS( 0, PIC_ORIGINAL_INPUT ).getBuf( COMPONENT_Cr ).copyFrom( pcPicYuvOrg->getBuf( COMPONENT_Cr ) ); + + pcPicCurr->M_BUFS( 0, PIC_TRUE_ORIGINAL_INPUT ).getBuf( COMPONENT_Y ).copyFrom( cPicYuvTrueOrg->getBuf( COMPONENT_Y ) ); + pcPicCurr->M_BUFS( 0, PIC_TRUE_ORIGINAL_INPUT ).getBuf( COMPONENT_Cb ).copyFrom( cPicYuvTrueOrg->getBuf( COMPONENT_Cb ) ); + pcPicCurr->M_BUFS( 0, PIC_TRUE_ORIGINAL_INPUT ).getBuf( COMPONENT_Cr ).copyFrom( cPicYuvTrueOrg->getBuf( COMPONENT_Cr ) ); + + const ChromaFormat chromaFormatIDC = pSPS->getChromaFormatIdc(); + + const PPS *refPPS = m_ppsMap.getPS( 0 ); + const Window& curScalingWindow = pPPS->getScalingWindow(); +#if JVET_Q0487_SCALING_WINDOW_ISSUES + int curPicWidth = pPPS->getPicWidthInLumaSamples() - SPS::getWinUnitX( pSPS->getChromaFormatIdc() ) * ( curScalingWindow.getWindowLeftOffset() + curScalingWindow.getWindowRightOffset() ); + int curPicHeight = pPPS->getPicHeightInLumaSamples() - SPS::getWinUnitY( pSPS->getChromaFormatIdc() ) * ( curScalingWindow.getWindowTopOffset() + curScalingWindow.getWindowBottomOffset() ); +#else + int curPicWidth = pPPS->getPicWidthInLumaSamples() - curScalingWindow.getWindowLeftOffset() - curScalingWindow.getWindowRightOffset(); + int curPicHeight = pPPS->getPicHeightInLumaSamples() - curScalingWindow.getWindowTopOffset() - curScalingWindow.getWindowBottomOffset(); +#endif + + const Window& refScalingWindow = refPPS->getScalingWindow(); +#if JVET_Q0487_SCALING_WINDOW_ISSUES + int refPicWidth = refPPS->getPicWidthInLumaSamples() - SPS::getWinUnitX( pSPS->getChromaFormatIdc() ) * ( refScalingWindow.getWindowLeftOffset() + refScalingWindow.getWindowRightOffset() ); + int refPicHeight = refPPS->getPicHeightInLumaSamples() - SPS::getWinUnitY( pSPS->getChromaFormatIdc() ) * ( refScalingWindow.getWindowTopOffset() + refScalingWindow.getWindowBottomOffset() ); +#else + int refPicWidth = refPPS->getPicWidthInLumaSamples() - refScalingWindow.getWindowLeftOffset() - refScalingWindow.getWindowRightOffset(); + int refPicHeight = refPPS->getPicHeightInLumaSamples() - refScalingWindow.getWindowTopOffset() - refScalingWindow.getWindowBottomOffset(); +#endif - APS *pAPS = m_apsMap.getPS(0); - pcPicCurr->finalInit(*pSPS, *pPPS, *pAPS); + int xScale = ( ( refPicWidth << SCALE_RATIO_BITS ) + ( curPicWidth >> 1 ) ) / curPicWidth; + int yScale = ( ( refPicHeight << SCALE_RATIO_BITS ) + ( curPicHeight >> 1 ) ) / curPicHeight; + std::pair<int, int> scalingRatio = std::pair<int, int>( xScale, yScale ); + + Picture::rescalePicture( scalingRatio, *pcPicYuvOrg, refPPS->getScalingWindow(), pcPicCurr->getOrigBuf(), pPPS->getScalingWindow(), chromaFormatIDC, pSPS->getBitDepths(), true, true, + pSPS->getHorCollocatedChromaFlag(), pSPS->getVerCollocatedChromaFlag() ); + Picture::rescalePicture( scalingRatio, *cPicYuvTrueOrg, refPPS->getScalingWindow(), pcPicCurr->getTrueOrigBuf(), pPPS->getScalingWindow(), chromaFormatIDC, pSPS->getBitDepths(), true, true, + pSPS->getHorCollocatedChromaFlag(), pSPS->getVerCollocatedChromaFlag() ); + } + else + { + pcPicCurr->M_BUFS( 0, PIC_ORIGINAL ).swap( *pcPicYuvOrg ); + pcPicCurr->M_BUFS( 0, PIC_TRUE_ORIGINAL ).swap( *cPicYuvTrueOrg ); } + pcPicCurr->finalInit( &m_cVPS, *pSPS, *pPPS, &m_picHeader, m_apss, m_lmcsAPS, m_scalinglistAPS ); pcPicCurr->poc = m_iPOCLast; // compute image characteristics - if ( getUseAdaptiveQP() ) + if( getUseAdaptiveQP() ) { AQpPreanalyzer::preanalyze( pcPicCurr ); } } - if ((m_iNumPicRcvd == 0) || (!flush && (m_iPOCLast != 0) && (m_iNumPicRcvd != m_iGOPSize) && (m_iGOPSize != 0))) + if( ( m_iNumPicRcvd == 0 ) || ( !flush && ( m_iPOCLast != 0 ) && ( m_iNumPicRcvd != m_iGOPSize ) && ( m_iGOPSize != 0 ) ) ) { iNumEncoded = 0; - return; + return true; } - if ( m_RCEnableRateControl ) + if( m_RCEnableRateControl ) { m_cRateCtrl.initRCGOP( m_iNumPicRcvd ); } + m_picIdInGOP = 0; + + return false; +} + +/** + - Application has picture buffer list with size of GOP + 1 + - Picture buffer list acts like as ring buffer + - End of the list has the latest picture + . + \param flush cause encoder to encode a partial GOP + \param pcPicYuvOrg original YUV picture + \param pcPicYuvTrueOrg + \param snrCSC + \retval rcListPicYuvRecOut list of reconstruction YUV pictures + \retval accessUnitsOut list of output access units + \retval iNumEncoded number of encoded pictures + */ + +bool EncLib::encode( const InputColourSpaceConversion snrCSC, std::list<PelUnitBuf*>& rcListPicYuvRecOut, int& iNumEncoded ) +{ // compress GOP - m_cGOPEncoder.compressGOP(m_iPOCLast, m_iNumPicRcvd, m_cListPic, rcListPicYuvRecOut, - false, false, snrCSC, m_printFrameMSE - , false - ); + m_cGOPEncoder.compressGOP( m_iPOCLast, m_iNumPicRcvd, m_cListPic, rcListPicYuvRecOut, + false, false, snrCSC, m_printFrameMSE, false, m_picIdInGOP ); - if ( m_RCEnableRateControl ) + m_picIdInGOP++; + + // go over all pictures in a GOP excluding the first IRAP + if( m_picIdInGOP != m_iGOPSize && m_iPOCLast ) + { + return true; + } + +#if JVET_O0756_CALCULATE_HDRMETRICS + m_metricTime = m_cGOPEncoder.getMetricTime(); +#endif + + if( m_RCEnableRateControl ) { m_cRateCtrl.destroyRCGOP(); } - iNumEncoded = m_iNumPicRcvd; - m_iNumPicRcvd = 0; + iNumEncoded = m_iNumPicRcvd; + m_iNumPicRcvd = 0; m_uiNumAllPicCoded += iNumEncoded; + + return false; } /**------------------------------------------------ @@ -662,72 +750,106 @@ void separateFields(Pel* org, Pel* dstField, uint32_t stride, uint32_t width, ui } -void EncLib::encode( bool flush, PelStorage* pcPicYuvOrg, PelStorage* pcPicYuvTrueOrg, const InputColourSpaceConversion snrCSC, std::list<PelUnitBuf*>& rcListPicYuvRecOut, - int& iNumEncoded, bool isTff ) +bool EncLib::encodePrep( bool flush, PelStorage* pcPicYuvOrg, PelStorage* pcPicYuvTrueOrg, const InputColourSpaceConversion snrCSC, std::list<PelUnitBuf*>& rcListPicYuvRecOut, + int& iNumEncoded, bool isTff ) { iNumEncoded = 0; + bool keepDoing = true; - for (int fieldNum=0; fieldNum<2; fieldNum++) + for( int fieldNum = 0; fieldNum < 2; fieldNum++ ) { - if (pcPicYuvOrg) + if( pcPicYuvOrg ) { /* -- field initialization -- */ - const bool isTopField=isTff==(fieldNum==0); + const bool isTopField = isTff == ( fieldNum == 0 ); Picture *pcField; xGetNewPicBuffer( rcListPicYuvRecOut, pcField, -1 ); - for (uint32_t comp = 0; comp < ::getNumberValidComponents(pcPicYuvOrg->chromaFormat); comp++) + for( uint32_t comp = 0; comp < ::getNumberValidComponents( pcPicYuvOrg->chromaFormat ); comp++ ) { - const ComponentID compID = ComponentID(comp); + const ComponentID compID = ComponentID( comp ); { PelBuf compBuf = pcPicYuvOrg->get( compID ); separateFields( compBuf.buf, - pcField->getOrigBuf().get(compID).buf, - compBuf.stride, - compBuf.width, - compBuf.height, - isTopField); + pcField->getOrigBuf().get( compID ).buf, + compBuf.stride, + compBuf.width, + compBuf.height, + isTopField ); + // to get fields of true original buffer to avoid wrong PSNR calculation in summary + compBuf = pcPicYuvTrueOrg->get( compID ); + separateFields( compBuf.buf, + pcField->getTrueOrigBuf().get(compID).buf, + compBuf.stride, + compBuf.width, + compBuf.height, + isTopField); } } { - int ppsID=-1; // Use default PPS ID - const PPS *pPPS=(ppsID<0) ? m_ppsMap.getFirstPS() : m_ppsMap.getPS(ppsID); - const SPS *pSPS=m_spsMap.getPS(pPPS->getSPSId()); - - APS *pAPS = m_apsMap.getPS(0); - pcField->finalInit(*pSPS, *pPPS, *pAPS); + int ppsID = -1; // Use default PPS ID + const PPS *pPPS = ( ppsID < 0 ) ? m_ppsMap.getFirstPS() : m_ppsMap.getPS( ppsID ); + const SPS *pSPS = m_spsMap.getPS( pPPS->getSPSId() ); + pcField->finalInit( &m_cVPS, *pSPS, *pPPS, &m_picHeader, m_apss, m_lmcsAPS, m_scalinglistAPS ); } pcField->poc = m_iPOCLast; pcField->reconstructed = false; - pcField->setBorderExtension(false);// where is this normally? + pcField->setBorderExtension( false );// where is this normally? pcField->topField = isTopField; // interlaced requirement // compute image characteristics - if ( getUseAdaptiveQP() ) + if( getUseAdaptiveQP() ) { AQpPreanalyzer::preanalyze( pcField ); } } - if ( m_iNumPicRcvd && ((flush&&fieldNum==1) || (m_iPOCLast/2)==0 || m_iNumPicRcvd==m_iGOPSize ) ) - { - // compress GOP - m_cGOPEncoder.compressGOP(m_iPOCLast, m_iNumPicRcvd, m_cListPic, rcListPicYuvRecOut, true, isTff, snrCSC, m_printFrameMSE - , false - ); - - iNumEncoded += m_iNumPicRcvd; - m_uiNumAllPicCoded += m_iNumPicRcvd; - m_iNumPicRcvd = 0; - } } + + if( m_iNumPicRcvd && ( flush || m_iPOCLast == 1 || m_iNumPicRcvd == m_iGOPSize ) ) + { + m_picIdInGOP = 0; + m_iPOCLast -= 2; + keepDoing = false; + } + + return keepDoing; } +bool EncLib::encode( const InputColourSpaceConversion snrCSC, std::list<PelUnitBuf*>& rcListPicYuvRecOut, int& iNumEncoded, bool isTff ) +{ + iNumEncoded = 0; + + for( int fieldNum = 0; fieldNum < 2; fieldNum++ ) + { + m_iPOCLast = ( m_iNumPicRcvd == m_iGOPSize ) ? m_uiNumAllPicCoded + m_iNumPicRcvd - 1 : m_iPOCLast + 1; + + // compress GOP + m_cGOPEncoder.compressGOP( m_iPOCLast, m_iPOCLast < 2 ? m_iPOCLast + 1 : m_iNumPicRcvd, m_cListPic, rcListPicYuvRecOut, true, isTff, snrCSC, m_printFrameMSE, false, m_picIdInGOP ); +#if JVET_O0756_CALCULATE_HDRMETRICS + m_metricTime = m_cGOPEncoder.getMetricTime(); +#endif + + m_picIdInGOP++; + } + + // go over all pictures in a GOP excluding first top field and first bottom field + if( m_picIdInGOP != m_iGOPSize && m_iPOCLast > 1 ) + { + return true; + } + + iNumEncoded += m_iNumPicRcvd; + m_uiNumAllPicCoded += m_iNumPicRcvd; + m_iNumPicRcvd = 0; + + return false; +} // ==================================================================================================================== // Protected member functions @@ -742,7 +864,7 @@ void EncLib::encode( bool flush, PelStorage* pcPicYuvOrg, PelStorage* pcPicYuvTr */ void EncLib::xGetNewPicBuffer ( std::list<PelUnitBuf*>& rcListPicYuvRecOut, Picture*& rpcPic, int ppsId ) { - // rotate he output buffer + // rotate the output buffer rcListPicYuvRecOut.push_back( rcListPicYuvRecOut.front() ); rcListPicYuvRecOut.pop_front(); rpcPic=0; @@ -759,23 +881,27 @@ void EncLib::xGetNewPicBuffer ( std::list<PelUnitBuf*>& rcListPicYuvRecOut, Pict Slice::sortPicList(m_cListPic); // use an entry in the buffered list if the maximum number that need buffering has been reached: - if (m_cListPic.size() >= (uint32_t)(m_iGOPSize + getMaxDecPicBuffering(MAX_TLAYER-1) + 2) ) + if( m_cListPic.size() >= (uint32_t)( m_iGOPSize + getMaxDecPicBuffering( MAX_TLAYER - 1 ) + 2 ) ) { - PicList::iterator iterPic = m_cListPic.begin(); + PicList::iterator iterPic = m_cListPic.begin(); int iSize = int( m_cListPic.size() ); - for ( int i = 0; i < iSize; i++ ) + for( int i = 0; i < iSize; i++ ) { rpcPic = *iterPic; - if( ! rpcPic->referenced ) + if( !rpcPic->referenced && rpcPic->layerId == m_layerId ) { break; } + else + { + rpcPic = nullptr; + } iterPic++; } // If PPS ID is the same, we will assume that it has not changed since it was last used // and return the old object. - if (pps.getPPSId() != rpcPic->cs->pps->getPPSId()) + if( rpcPic && pps.getPPSId() != rpcPic->cs->pps->getPPSId() ) { // the IDs differ - free up an entry in the list, and then create a new one, as with the case where the max buffering state has not been reached. rpcPic->destroy(); @@ -788,15 +914,19 @@ void EncLib::xGetNewPicBuffer ( std::list<PelUnitBuf*>& rcListPicYuvRecOut, Pict if (rpcPic==0) { rpcPic = new Picture; - - rpcPic->create( sps.getChromaFormatIdc(), Size( sps.getPicWidthInLumaSamples(), sps.getPicHeightInLumaSamples()), sps.getMaxCUWidth(), sps.getMaxCUWidth()+16, false ); + rpcPic->create( sps.getChromaFormatIdc(), Size( pps.getPicWidthInLumaSamples(), pps.getPicHeightInLumaSamples() ), sps.getMaxCUWidth(), sps.getMaxCUWidth() + 16, false, m_layerId ); + if( m_rprEnabled ) + { + rpcPic->M_BUFS( 0, PIC_ORIGINAL_INPUT ).create( sps.getChromaFormatIdc(), Area( Position(), Size( sps.getMaxPicWidthInLumaSamples(), sps.getMaxPicHeightInLumaSamples() ) ) ); + rpcPic->M_BUFS( 0, PIC_TRUE_ORIGINAL_INPUT ).create( sps.getChromaFormatIdc(), Area( Position(), Size( sps.getMaxPicWidthInLumaSamples(), sps.getMaxPicHeightInLumaSamples() ) ) ); + } if ( getUseAdaptiveQP() ) { - const uint32_t iMaxDQPLayer = pps.getCuQpDeltaSubdiv()/2+1; + const uint32_t iMaxDQPLayer = m_picHeader.getCuQpDeltaSubdivIntra()/2+1; rpcPic->aqlayer.resize( iMaxDQPLayer ); for (uint32_t d = 0; d < iMaxDQPLayer; d++) { - rpcPic->aqlayer[d] = new AQpLayer( sps.getPicWidthInLumaSamples(), sps.getPicHeightInLumaSamples(), sps.getMaxCUWidth()>>d, sps.getMaxCUHeight()>>d ); + rpcPic->aqlayer[d] = new AQpLayer( pps.getPicWidthInLumaSamples(), pps.getPicHeightInLumaSamples(), sps.getMaxCUWidth() >> d, sps.getMaxCUHeight() >> d ); } } @@ -812,81 +942,27 @@ void EncLib::xGetNewPicBuffer ( std::list<PelUnitBuf*>& rcListPicYuvRecOut, Pict m_iNumPicRcvd++; } - -#if HEVC_VPS -void EncLib::xInitVPS(VPS &vps, const SPS &sps) +void EncLib::xInitVPS(VPS& vps, const SPS& sps) { // The SPS must have already been set up. // set the VPS profile information. - *vps.getPTL() = *sps.getPTL(); - vps.setMaxOpSets(1); - vps.getTimingInfo()->setTimingInfoPresentFlag ( false ); - vps.setNumHrdParameters( 0 ); + vps.setMaxSubLayers(sps.getMaxTLayers()); +} - vps.createHrdParamBuffer(); - for( uint32_t i = 0; i < vps.getNumHrdParameters(); i ++ ) - { - vps.setHrdOpSetIdx( 0, i ); - vps.setCprmsPresentFlag( false, i ); - // Set up HrdParameters here. - } +void EncLib::xInitDPS(DPS &dps, const SPS &sps, const int dpsId) +{ + // The SPS must have already been set up. + // set the DPS profile information. + dps.setDecodingParameterSetId(dpsId); + dps.setMaxSubLayersMinus1(sps.getMaxTLayers()-1); + std::vector<ProfileTierLevel> ptls; + ptls.resize(1); + ptls[0] = *sps.getProfileTierLevel(); + dps.setProfileTierLevel(ptls); } -#endif -void EncLib::xInitSPS(SPS &sps) +void EncLib::xInitSPS( SPS& sps, VPS& vps ) { -#if !JVET_M0101_HLS - sps.setIntraOnlyConstraintFlag(m_bIntraOnlyConstraintFlag); - sps.setMaxBitDepthConstraintIdc(m_maxBitDepthConstraintIdc); - sps.setMaxChromaFormatConstraintIdc(m_maxChromaFormatConstraintIdc); - sps.setFrameConstraintFlag(m_frameOnlyConstraintFlag); - sps.setNoQtbttDualTreeIntraConstraintFlag(m_bNoQtbttDualTreeIntraConstraintFlag); - sps.setNoSaoConstraintFlag(m_bNoSaoConstraintFlag); - sps.setNoAlfConstraintFlag(m_bNoAlfConstraintFlag); - sps.setNoPcmConstraintFlag(m_bNoPcmConstraintFlag); - sps.setNoRefWraparoundConstraintFlag(m_bNoRefWraparoundConstraintFlag); - sps.setNoTemporalMvpConstraintFlag(m_bNoTemporalMvpConstraintFlag); - sps.setNoSbtmvpConstraintFlag(m_bNoSbtmvpConstraintFlag); - sps.setNoAmvrConstraintFlag(m_bNoAmvrConstraintFlag); - sps.setNoBdofConstraintFlag(m_bNoBdofConstraintFlag); - sps.setNoCclmConstraintFlag(m_bNoCclmConstraintFlag); - sps.setNoMtsConstraintFlag(m_bNoMtsConstraintFlag); - sps.setNoAffineMotionConstraintFlag(m_bNoAffineMotionConstraintFlag); - sps.setNoGbiConstraintFlag(m_bNoGbiConstraintFlag); - sps.setNoMhIntraConstraintFlag(m_bNoMhIntraConstraintFlag); - sps.setNoTriangleConstraintFlag(m_bNoTriangleConstraintFlag); - sps.setNoLadfConstraintFlag(m_bNoLadfConstraintFlag); - sps.setNoCurrPicRefConstraintFlag(m_bNoCurrPicRefConstraintFlag); - sps.setNoQpDeltaConstraintFlag(m_bNoQpDeltaConstraintFlag); - sps.setNoDepQuantConstraintFlag(m_bNoDepQuantConstraintFlag); - sps.setNoSignDataHidingConstraintFlag(m_bNoSignDataHidingConstraintFlag); - - ProfileTierLevel& profileTierLevel = *sps.getPTL()->getGeneralPTL(); - profileTierLevel.setLevelIdc (m_level); - profileTierLevel.setTierFlag (m_levelTier); - profileTierLevel.setProfileIdc (m_profile); - profileTierLevel.setProfileCompatibilityFlag (m_profile, 1); - profileTierLevel.setProgressiveSourceFlag (m_progressiveSourceFlag); - profileTierLevel.setInterlacedSourceFlag (m_interlacedSourceFlag); - profileTierLevel.setNonPackedConstraintFlag (m_nonPackedConstraintFlag); - profileTierLevel.setFrameOnlyConstraintFlag (m_frameOnlyConstraintFlag); - profileTierLevel.setBitDepthConstraint (m_bitDepthConstraintValue); - profileTierLevel.setChromaFormatConstraint (m_chromaFormatConstraintValue); - profileTierLevel.setIntraConstraintFlag (m_intraConstraintFlag); - profileTierLevel.setOnePictureOnlyConstraintFlag(m_onePictureOnlyConstraintFlag); - profileTierLevel.setLowerBitRateConstraintFlag (m_lowerBitRateConstraintFlag); - - if ((m_profile == Profile::MAIN10) && (m_bitDepth[CHANNEL_TYPE_LUMA] == 8) && (m_bitDepth[CHANNEL_TYPE_CHROMA] == 8)) - { - /* The above constraint is equal to Profile::MAIN */ - profileTierLevel.setProfileCompatibilityFlag(Profile::MAIN, 1); - } - if (m_profile == Profile::MAIN) - { - /* A Profile::MAIN10 decoder can always decode Profile::MAIN */ - profileTierLevel.setProfileCompatibilityFlag( Profile::MAIN10, 1 ); - } -#else ProfileTierLevel* profileTierLevel = sps.getProfileTierLevel(); ConstraintInfo* cinfo = profileTierLevel->getConstraintInfo(); cinfo->setProgressiveSourceFlag (m_progressiveSourceFlag); @@ -897,38 +973,54 @@ void EncLib::xInitSPS(SPS &sps) cinfo->setMaxBitDepthConstraintIdc (m_maxBitDepthConstraintIdc); cinfo->setMaxChromaFormatConstraintIdc((ChromaFormat)m_maxChromaFormatConstraintIdc); cinfo->setNoQtbttDualTreeIntraConstraintFlag(m_bNoQtbttDualTreeIntraConstraintFlag); + cinfo->setNoPartitionConstraintsOverrideConstraintFlag(m_noPartitionConstraintsOverrideConstraintFlag); cinfo->setNoSaoConstraintFlag(m_bNoSaoConstraintFlag); cinfo->setNoAlfConstraintFlag(m_bNoAlfConstraintFlag); - cinfo->setNoPcmConstraintFlag(m_bNoPcmConstraintFlag); cinfo->setNoRefWraparoundConstraintFlag(m_bNoRefWraparoundConstraintFlag); cinfo->setNoTemporalMvpConstraintFlag(m_bNoTemporalMvpConstraintFlag); cinfo->setNoSbtmvpConstraintFlag(m_bNoSbtmvpConstraintFlag); cinfo->setNoAmvrConstraintFlag(m_bNoAmvrConstraintFlag); cinfo->setNoBdofConstraintFlag(m_bNoBdofConstraintFlag); + cinfo->setNoDmvrConstraintFlag(m_noDmvrConstraintFlag); cinfo->setNoCclmConstraintFlag(m_bNoCclmConstraintFlag); cinfo->setNoMtsConstraintFlag(m_bNoMtsConstraintFlag); + cinfo->setNoSbtConstraintFlag(m_noSbtConstraintFlag); cinfo->setNoAffineMotionConstraintFlag(m_bNoAffineMotionConstraintFlag); - cinfo->setNoGbiConstraintFlag(m_bNoGbiConstraintFlag); - cinfo->setNoMhIntraConstraintFlag(m_bNoMhIntraConstraintFlag); + cinfo->setNoBcwConstraintFlag(m_bNoBcwConstraintFlag); + cinfo->setNoIbcConstraintFlag(m_noIbcConstraintFlag); + cinfo->setNoCiipConstraintFlag(m_bNoCiipConstraintFlag); + cinfo->setNoFPelMmvdConstraintFlag(m_noFPelMmvdConstraintFlag); cinfo->setNoTriangleConstraintFlag(m_bNoTriangleConstraintFlag); cinfo->setNoLadfConstraintFlag(m_bNoLadfConstraintFlag); - cinfo->setNoCurrPicRefConstraintFlag(m_bNoCurrPicRefConstraintFlag); + cinfo->setNoTransformSkipConstraintFlag(m_noTransformSkipConstraintFlag); + cinfo->setNoBDPCMConstraintFlag(m_noBDPCMConstraintFlag); + cinfo->setNoJointCbCrConstraintFlag(m_noJointCbCrConstraintFlag); cinfo->setNoQpDeltaConstraintFlag(m_bNoQpDeltaConstraintFlag); cinfo->setNoDepQuantConstraintFlag(m_bNoDepQuantConstraintFlag); cinfo->setNoSignDataHidingConstraintFlag(m_bNoSignDataHidingConstraintFlag); + cinfo->setNoTrailConstraintFlag(m_noTrailConstraintFlag); + cinfo->setNoStsaConstraintFlag(m_noStsaConstraintFlag); + cinfo->setNoRaslConstraintFlag(m_noRaslConstraintFlag); + cinfo->setNoRadlConstraintFlag(m_noRadlConstraintFlag); + cinfo->setNoIdrConstraintFlag(m_noIdrConstraintFlag); + cinfo->setNoCraConstraintFlag(m_noCraConstraintFlag); + cinfo->setNoGdrConstraintFlag(m_noGdrConstraintFlag); + cinfo->setNoApsConstraintFlag(m_noApsConstraintFlag); profileTierLevel->setLevelIdc (m_level); profileTierLevel->setTierFlag (m_levelTier); profileTierLevel->setProfileIdc (m_profile); - -#endif + profileTierLevel->setNumSubProfile(m_numSubProfile); + for (int k = 0; k < m_numSubProfile; k++) + { + profileTierLevel->setSubProfileIdc(k, m_subProfile[k]); + } /* XXX: should Main be marked as compatible with still picture? */ /* XXX: may be a good idea to refactor the above into a function * that chooses the actual compatibility based upon options */ - - sps.setPicWidthInLumaSamples ( m_iSourceWidth ); - sps.setPicHeightInLumaSamples ( m_iSourceHeight ); - sps.setConformanceWindow ( m_conformanceWindow ); + sps.setVPSId(m_cVPS.getVPSId()); + sps.setMaxPicWidthInLumaSamples( m_iSourceWidth ); + sps.setMaxPicHeightInLumaSamples( m_iSourceHeight ); sps.setMaxCUWidth ( m_maxCUWidth ); sps.setMaxCUHeight ( m_maxCUHeight ); sps.setMaxCodingDepth ( m_maxTotalCUDepth ); @@ -938,24 +1030,45 @@ void EncLib::xInitSPS(SPS &sps) sps.setCTUSize ( m_CTUSize ); sps.setSplitConsOverrideEnabledFlag ( m_useSplitConsOverride ); sps.setMinQTSizes ( m_uiMinQT ); - sps.setMaxBTDepth ( m_uiMaxBTDepth, m_uiMaxBTDepthI, m_uiMaxBTDepthIChroma ); + sps.setMaxMTTHierarchyDepth ( m_uiMaxMTTHierarchyDepth, m_uiMaxMTTHierarchyDepthI, m_uiMaxMTTHierarchyDepthIChroma ); + unsigned maxBtSize[3], maxTtSize[3]; + memcpy(maxBtSize, m_uiMinQT, sizeof(maxBtSize)); + memcpy(maxTtSize, m_uiMinQT, sizeof(maxTtSize)); + if (m_uiMaxMTTHierarchyDepth) + { + maxBtSize[1] = std::min(m_CTUSize, (unsigned)MAX_BT_SIZE_INTER); + maxTtSize[1] = std::min(m_CTUSize, (unsigned)MAX_TT_SIZE_INTER); + } + if (m_uiMaxMTTHierarchyDepthI) + { + maxBtSize[0] = std::min(m_CTUSize, (unsigned)MAX_BT_SIZE); + maxTtSize[0] = std::min(m_CTUSize, (unsigned)MAX_TT_SIZE); + } + if (m_uiMaxMTTHierarchyDepthIChroma) + { + maxBtSize[2] = std::min(m_CTUSize, (unsigned)MAX_BT_SIZE_C); + maxTtSize[2] = std::min(m_CTUSize, (unsigned)MAX_TT_SIZE_C); + } + sps.setMaxBTSize ( maxBtSize[1], maxBtSize[0], maxBtSize[2] ); + sps.setMaxTTSize ( maxTtSize[1], maxTtSize[0], maxTtSize[2] ); + sps.setIDRRefParamListPresent ( m_idrRefParamList ); sps.setUseDualITree ( m_dualITree ); + sps.setUseLFNST ( m_LFNST ); sps.setSBTMVPEnabledFlag ( m_SubPuMvpMode ); sps.setAMVREnabledFlag ( m_ImvMode != IMV_OFF ); sps.setBDOFEnabledFlag ( m_BIO ); sps.setUseAffine ( m_Affine ); sps.setUseAffineType ( m_AffineType ); + sps.setUsePROF ( m_PROF ); sps.setUseLMChroma ( m_LMChroma ? true : false ); - sps.setCclmCollocatedChromaFlag( m_cclmCollocatedChromaFlag ); + sps.setHorCollocatedChromaFlag( m_horCollocatedChromaFlag ); + sps.setVerCollocatedChromaFlag( m_verCollocatedChromaFlag ); sps.setUseMTS ( m_IntraMTS || m_InterMTS || m_ImplicitMTS ); sps.setUseIntraMTS ( m_IntraMTS ); sps.setUseInterMTS ( m_InterMTS ); sps.setUseSBT ( m_SBT ); - if( sps.getUseSBT() ) - { - sps.setMaxSbtSize ( m_iSourceWidth >= 1920 ? 64 : 32 ); - } - sps.setUseGBi ( m_GBi ); + sps.setUseSMVD ( m_SMVD ); + sps.setUseBcw ( m_bcw ); #if LUMA_ADAPTIVE_DEBLOCKING_FILTER_QP_OFFSET sps.setLadfEnabled ( m_LadfEnabled ); if ( m_LadfEnabled ) @@ -970,17 +1083,25 @@ void EncLib::xInitSPS(SPS &sps) } #endif - sps.setUseMHIntra ( m_MHIntra ); + sps.setUseCiip ( m_ciip ); sps.setUseTriangle ( m_Triangle ); - sps.setDisFracMmvdEnabledFlag ( m_allowDisFracMMVD ); + sps.setUseMMVD ( m_MMVD ); + sps.setFpelMmvdEnabledFlag (( m_MMVD ) ? m_allowDisFracMMVD : false); + sps.setBdofControlPresentFlag(m_BIO); + sps.setDmvrControlPresentFlag(m_DMVR); + sps.setProfControlPresentFlag(m_PROF); sps.setAffineAmvrEnabledFlag ( m_AffineAmvr ); sps.setUseDMVR ( m_DMVR ); - + sps.setUseColorTrans(m_useColorTrans); + sps.setPLTMode ( m_PLTMode); sps.setIBCFlag ( m_IBCMode); sps.setWrapAroundEnabledFlag ( m_wrapAround ); sps.setWrapAroundOffset ( m_wrapAroundOffset ); // ADD_NEW_TOOL : (encoder lib) set tool enabling flags and associated parameters here - sps.setUseReshaper ( m_lumaReshapeEnable ); + sps.setUseISP ( m_ISP ); + sps.setUseLmcs ( m_lmcsEnabled ); + sps.setUseMRL ( m_MRL ); + sps.setUseMIP ( m_MIP ); int minCUSize = sps.getMaxCUWidth() >> sps.getLog2DiffMaxMinCodingBlockSize(); int log2MinCUSize = 0; while(minCUSize > 1) @@ -990,26 +1111,30 @@ void EncLib::xInitSPS(SPS &sps) } sps.setLog2MinCodingBlockSize(log2MinCUSize); + CHECK(log2MinCUSize > std::min(6, floorLog2(sps.getMaxCUWidth())), "log2_min_luma_coding_block_size_minus2 shall be in the range of 0 to min (4, log2_ctu_size - 2)"); + CHECK(m_uiMaxMTTHierarchyDepth > 2 * (floorLog2(sps.getCTUSize()) - sps.getLog2MinCodingBlockSize()), "sps_max_mtt_hierarchy_depth_inter_slice shall be in the range 0 to 2*(ctbLog2SizeY - log2MinCUSize)"); + CHECK(m_uiMaxMTTHierarchyDepthI > 2 * (floorLog2(sps.getCTUSize()) - sps.getLog2MinCodingBlockSize()), "sps_max_mtt_hierarchy_depth_intra_slice_luma shall be in the range 0 to 2*(ctbLog2SizeY - log2MinCUSize)"); + CHECK(m_uiMaxMTTHierarchyDepthIChroma > 2 * (floorLog2(sps.getCTUSize()) - sps.getLog2MinCodingBlockSize()), "sps_max_mtt_hierarchy_depth_intra_slice_chroma shall be in the range 0 to 2*(ctbLog2SizeY - log2MinCUSize)"); - sps.setPCMLog2MinSize (m_uiPCMLog2MinSize); - sps.setPCMEnabledFlag ( m_usePCM ); - sps.setPCMLog2MaxSize( m_pcmLog2MaxSize ); + sps.setTransformSkipEnabledFlag(m_useTransformSkip); + sps.setBDPCMEnabled(m_useBDPCM); sps.setSPSTemporalMVPEnabledFlag((getTMVPModeId() == 2 || getTMVPModeId() == 1)); -#if MAX_TB_SIZE_SIGNALLING sps.setLog2MaxTbSize ( m_log2MaxTbSize ); -#endif for (uint32_t channelType = 0; channelType < MAX_NUM_CHANNEL_TYPE; channelType++) { sps.setBitDepth (ChannelType(channelType), m_bitDepth[channelType] ); sps.setQpBDOffset (ChannelType(channelType), (6 * (m_bitDepth[channelType] - 8))); - sps.setPCMBitDepth (ChannelType(channelType), m_PCMBitDepth[channelType] ); + sps.setMinQpPrimeTsMinus4(ChannelType(channelType), (6 * (m_bitDepth[channelType] - m_inputBitDepth[channelType]))); } - sps.setSAOEnabledFlag( m_bUseSAO ); + sps.setUseWP( m_useWeightedPred ); + sps.setUseWPBiPred( m_useWeightedBiPred ); + sps.setSAOEnabledFlag( m_bUseSAO ); + sps.setJointCbCrEnabledFlag( m_JointCbCrMode ); sps.setMaxTLayers( m_maxTempLayer ); sps.setTemporalIdNestingFlag( ( m_maxTempLayer == 1 ) ? true : false ); @@ -1019,13 +1144,7 @@ void EncLib::xInitSPS(SPS &sps) sps.setNumReorderPics(m_numReorderPics[i], i); } - sps.setPCMFilterDisableFlag ( m_bPCMFilterDisableFlag ); -#if HEVC_USE_SCALING_LISTS sps.setScalingListFlag ( (m_useScalingListId == SCALING_LIST_OFF) ? 0 : 1 ); -#endif -#if HEVC_USE_INTRA_SMOOTHING_T32 || HEVC_USE_INTRA_SMOOTHING_T64 - sps.setUseStrongIntraSmoothing( m_useStrongIntraSmoothing ); -#endif sps.setALFEnabledFlag( m_alf ); sps.setVuiParametersPresentFlag(getVuiParametersPresentFlag()); @@ -1033,38 +1152,22 @@ void EncLib::xInitSPS(SPS &sps) { VUI* pcVUI = sps.getVuiParameters(); pcVUI->setAspectRatioInfoPresentFlag(getAspectRatioInfoPresentFlag()); + pcVUI->setAspectRatioConstantFlag(!getSampleAspectRatioInfoSEIEnabled()); pcVUI->setAspectRatioIdc(getAspectRatioIdc()); pcVUI->setSarWidth(getSarWidth()); pcVUI->setSarHeight(getSarHeight()); - pcVUI->setOverscanInfoPresentFlag(getOverscanInfoPresentFlag()); - pcVUI->setOverscanAppropriateFlag(getOverscanAppropriateFlag()); - pcVUI->setVideoSignalTypePresentFlag(getVideoSignalTypePresentFlag()); - pcVUI->setVideoFormat(getVideoFormat()); - pcVUI->setVideoFullRangeFlag(getVideoFullRangeFlag()); pcVUI->setColourDescriptionPresentFlag(getColourDescriptionPresentFlag()); pcVUI->setColourPrimaries(getColourPrimaries()); pcVUI->setTransferCharacteristics(getTransferCharacteristics()); pcVUI->setMatrixCoefficients(getMatrixCoefficients()); + pcVUI->setFieldSeqFlag(false); pcVUI->setChromaLocInfoPresentFlag(getChromaLocInfoPresentFlag()); pcVUI->setChromaSampleLocTypeTopField(getChromaSampleLocTypeTopField()); pcVUI->setChromaSampleLocTypeBottomField(getChromaSampleLocTypeBottomField()); - pcVUI->setNeutralChromaIndicationFlag(getNeutralChromaIndicationFlag()); - pcVUI->setDefaultDisplayWindow(getDefaultDisplayWindow()); - pcVUI->setFrameFieldInfoPresentFlag(getFrameFieldInfoPresentFlag()); - pcVUI->setFieldSeqFlag(false); - pcVUI->setHrdParametersPresentFlag(false); - pcVUI->getTimingInfo()->setPocProportionalToTimingFlag(getPocProportionalToTimingFlag()); - pcVUI->getTimingInfo()->setNumTicksPocDiffOneMinus1 (getNumTicksPocDiffOneMinus1() ); - pcVUI->setBitstreamRestrictionFlag(getBitstreamRestrictionFlag()); -#if HEVC_TILES_WPP - pcVUI->setTilesFixedStructureFlag(getTilesFixedStructureFlag()); -#endif - pcVUI->setMotionVectorsOverPicBoundariesFlag(getMotionVectorsOverPicBoundariesFlag()); - pcVUI->setMinSpatialSegmentationIdc(getMinSpatialSegmentationIdc()); - pcVUI->setMaxBytesPerPicDenom(getMaxBytesPerPicDenom()); - pcVUI->setMaxBitsPerMinCuDenom(getMaxBitsPerMinCuDenom()); - pcVUI->setLog2MaxMvLengthHorizontal(getLog2MaxMvLengthHorizontal()); - pcVUI->setLog2MaxMvLengthVertical(getLog2MaxMvLengthVertical()); + pcVUI->setChromaSampleLocType(getChromaSampleLocType()); + pcVUI->setOverscanInfoPresentFlag(getOverscanInfoPresentFlag()); + pcVUI->setOverscanAppropriateFlag(getOverscanAppropriateFlag()); + pcVUI->setVideoFullRangeFlag(getVideoFullRangeFlag()); } sps.setNumLongTermRefPicSPS(NUM_LONG_TERM_REF_PIC_SPS); @@ -1074,6 +1177,10 @@ void EncLib::xInitSPS(SPS &sps) sps.setLtRefPicPocLsbSps(k, 0); sps.setUsedByCurrPicLtSPSFlag(k, 0); } + int numQpTables = m_chromaQpMappingTableParams.getSameCQPTableForAllChromaFlag() ? 1 : (sps.getJointCbCrEnabledFlag() ? 3 : 2); + m_chromaQpMappingTableParams.setNumQpTables(numQpTables); + sps.setChromaQpMappingTableFromParams(m_chromaQpMappingTableParams, sps.getQpBDOffset(CHANNEL_TYPE_CHROMA)); + sps.derivedChromaQPMappingTables(); #if U0132_TARGET_BITS_SATURATION if( getPictureTimingSEIEnabled() || getDecodingUnitInfoSEIEnabled() || getCpbSaturationEnabled() ) @@ -1085,7 +1192,7 @@ void EncLib::xInitSPS(SPS &sps) } if( getBufferingPeriodSEIEnabled() || getPictureTimingSEIEnabled() || getDecodingUnitInfoSEIEnabled() ) { - sps.getVuiParameters()->setHrdParametersPresentFlag( true ); + sps.setHrdParametersPresentFlag( true ); } // Set up SPS range extension settings @@ -1100,182 +1207,70 @@ void EncLib::xInitSPS(SPS &sps) sps.getSpsRangeExtension().setHighPrecisionOffsetsEnabledFlag(m_highPrecisionOffsetsEnabledFlag); sps.getSpsRangeExtension().setPersistentRiceAdaptationEnabledFlag(m_persistentRiceAdaptationEnabledFlag); sps.getSpsRangeExtension().setCabacBypassAlignmentEnabledFlag(m_cabacBypassAlignmentEnabledFlag); -} - -#if U0132_TARGET_BITS_SATURATION -// calculate scale value of bitrate and initial delay -int calcScale(int x) -{ - if (x==0) - { - return 0; - } - uint32_t iMask = 0xffffffff; - int ScaleValue = 32; - while ((x&iMask) != 0) + if( m_uiIntraPeriod < 0 ) { - ScaleValue--; - iMask = (iMask >> 1); + sps.setRPL1CopyFromRPL0Flag( true ); } - return ScaleValue; -} -#endif -void EncLib::xInitHrdParameters(SPS &sps) -{ - bool useSubCpbParams = (getSliceMode() > 0) || (getSliceSegmentMode() > 0); - int bitRate = getTargetBitrate(); - bool isRandomAccess = getIntraPeriod() > 0; -# if U0132_TARGET_BITS_SATURATION - int cpbSize = getCpbSize(); - CHECK(!(cpbSize!=0), "Unspecified error"); // CPB size may not be equal to zero. ToDo: have a better default and check for level constraints - if( !getVuiParametersPresentFlag() && !getCpbSaturationEnabled() ) -#else - if( !getVuiParametersPresentFlag() ) -#endif + sps.setSubPicPresentFlag(m_subPicPresentFlag); + if (m_subPicPresentFlag) { - return; - } - - VUI *vui = sps.getVuiParameters(); - HRD *hrd = vui->getHrdParameters(); - - TimingInfo *timingInfo = vui->getTimingInfo(); - timingInfo->setTimingInfoPresentFlag( true ); - switch( getFrameRate() ) - { - case 24: - timingInfo->setNumUnitsInTick( 1125000 ); timingInfo->setTimeScale ( 27000000 ); - break; - case 25: - timingInfo->setNumUnitsInTick( 1080000 ); timingInfo->setTimeScale ( 27000000 ); - break; - case 30: - timingInfo->setNumUnitsInTick( 900900 ); timingInfo->setTimeScale ( 27000000 ); - break; - case 50: - timingInfo->setNumUnitsInTick( 540000 ); timingInfo->setTimeScale ( 27000000 ); - break; - case 60: - timingInfo->setNumUnitsInTick( 450450 ); timingInfo->setTimeScale ( 27000000 ); - break; - default: - timingInfo->setNumUnitsInTick( 1001 ); timingInfo->setTimeScale ( 60000 ); - break; - } - - if (getTemporalSubsampleRatio()>1) - { - uint32_t temporalSubsampleRatio = getTemporalSubsampleRatio(); - if ( double(timingInfo->getNumUnitsInTick()) * temporalSubsampleRatio > std::numeric_limits<uint32_t>::max() ) + sps.setNumSubPics(m_numSubPics); + for (int i = 0; i < m_numSubPics; i++) { - timingInfo->setTimeScale( timingInfo->getTimeScale() / temporalSubsampleRatio ); + sps.setSubPicCtuTopLeftX(i, m_subPicCtuTopLeftX[i] ); + sps.setSubPicCtuTopLeftY(i, m_subPicCtuTopLeftY[i]); + sps.setSubPicWidth(i, m_subPicWidth[i]); + sps.setSubPicHeight(i, m_subPicHeight[i]); + sps.setSubPicTreatedAsPicFlag(i, m_subPicTreatedAsPicFlag[i]); + sps.setLoopFilterAcrossSubpicEnabledFlag(i, m_loopFilterAcrossSubpicEnabledFlag[i]); } - else - { - timingInfo->setNumUnitsInTick( timingInfo->getNumUnitsInTick() * temporalSubsampleRatio ); - } - } - - bool rateCnt = ( bitRate > 0 ); - hrd->setNalHrdParametersPresentFlag( rateCnt ); - hrd->setVclHrdParametersPresentFlag( rateCnt ); - hrd->setSubPicCpbParamsPresentFlag( useSubCpbParams ); - - if( hrd->getSubPicCpbParamsPresentFlag() ) - { - hrd->setTickDivisorMinus2( 100 - 2 ); // - hrd->setDuCpbRemovalDelayLengthMinus1( 7 ); // 8-bit precision ( plus 1 for last DU in AU ) - hrd->setSubPicCpbParamsInPicTimingSEIFlag( true ); - hrd->setDpbOutputDelayDuLengthMinus1( 5 + 7 ); // With sub-clock tick factor of 100, at least 7 bits to have the same value as AU dpb delay } - else - { - hrd->setSubPicCpbParamsInPicTimingSEIFlag( false ); - } - -#if U0132_TARGET_BITS_SATURATION - if (calcScale(bitRate) <= 6) + sps.setSubPicIdPresentFlag(m_subPicIdPresentFlag); + if (m_subPicIdPresentFlag) { - hrd->setBitRateScale(0); - } - else + sps.setSubPicIdSignallingPresentFlag(m_subPicIdSignallingPresentFlag); + if (m_subPicIdSignallingPresentFlag) + { + sps.setSubPicIdLen(m_subPicIdLen); + for (int i = 0; i < m_numSubPics; i++) { - hrd->setBitRateScale(calcScale(bitRate) - 6); + sps.setSubPicId(i, m_subPicId[i]); + } + } } - if (calcScale(cpbSize) <= 4) + sps.setLoopFilterAcrossVirtualBoundariesDisabledFlag( m_loopFilterAcrossVirtualBoundariesDisabledFlag ); + sps.setNumVerVirtualBoundaries ( m_numVerVirtualBoundaries ); + sps.setNumHorVirtualBoundaries ( m_numHorVirtualBoundaries ); + for( unsigned int i = 0; i < m_numVerVirtualBoundaries; i++ ) { - hrd->setCpbSizeScale(0); + sps.setVirtualBoundariesPosX ( m_virtualBoundariesPosX[i], i ); } - else + for( unsigned int i = 0; i < m_numHorVirtualBoundaries; i++ ) { - hrd->setCpbSizeScale(calcScale(cpbSize) - 4); + sps.setVirtualBoundariesPosY ( m_virtualBoundariesPosY[i], i ); } -#else - hrd->setBitRateScale( 4 ); // in units of 2^( 6 + 4 ) = 1,024 bps - hrd->setCpbSizeScale( 6 ); // in units of 2^( 4 + 6 ) = 1,024 bit -#endif - hrd->setDuCpbSizeScale( 6 ); // in units of 2^( 4 + 6 ) = 1,024 bit - - hrd->setInitialCpbRemovalDelayLengthMinus1(15); // assuming 0.5 sec, log2( 90,000 * 0.5 ) = 16-bit - if( isRandomAccess ) - { - hrd->setCpbRemovalDelayLengthMinus1(5); // 32 = 2^5 (plus 1) - hrd->setDpbOutputDelayLengthMinus1 (5); // 32 + 3 = 2^6 - } - else + sps.setInterLayerPresentFlag( vps.getMaxLayers() > 1 && !vps.getAllIndependentLayersFlag() ); + for (unsigned int i = 0; i < vps.getMaxLayers(); ++i) { - hrd->setCpbRemovalDelayLengthMinus1(9); // max. 2^10 - hrd->setDpbOutputDelayLengthMinus1 (9); // max. 2^10 + CHECK((vps.getIndependentLayerFlag(i) == 1) && (sps.getInterLayerPresentFlag() != 0), " When vps_independent_layer_flag[GeneralLayerIdx[nuh_layer_id ]] is equal to 1, the value of inter_layer_ref_pics_present_flag shall be equal to 0."); } - // Note: parameters for all temporal layers are initialized with the same values - int i, j; - uint32_t bitrateValue, cpbSizeValue; - uint32_t duCpbSizeValue; - uint32_t duBitRateValue = 0; - - for( i = 0; i < MAX_TLAYER; i ++ ) - { - hrd->setFixedPicRateFlag( i, 1 ); - hrd->setPicDurationInTcMinus1( i, 0 ); - hrd->setLowDelayHrdFlag( i, 0 ); - hrd->setCpbCntMinus1( i, 0 ); - - //! \todo check for possible PTL violations - // BitRate[ i ] = ( bit_rate_value_minus1[ i ] + 1 ) * 2^( 6 + bit_rate_scale ) - bitrateValue = bitRate / (1 << (6 + hrd->getBitRateScale()) ); // bitRate is in bits, so it needs to be scaled down - // CpbSize[ i ] = ( cpb_size_value_minus1[ i ] + 1 ) * 2^( 4 + cpb_size_scale ) -#if U0132_TARGET_BITS_SATURATION - cpbSizeValue = cpbSize / (1 << (4 + hrd->getCpbSizeScale()) ); // using bitRate results in 1 second CPB size -#else - cpbSizeValue = bitRate / (1 << (4 + hrd->getCpbSizeScale()) ); // using bitRate results in 1 second CPB size -#endif + sps.setRprEnabledFlag( m_rprEnabled || sps.getInterLayerPresentFlag() ); +} +void EncLib::xInitHrdParameters(SPS &sps) +{ + m_encHRD.initHRDParameters((EncCfg*) this); - // DU CPB size could be smaller (i.e. bitrateValue / number of DUs), but we don't know - // in how many DUs the slice segment settings will result - duCpbSizeValue = bitrateValue; - duBitRateValue = cpbSizeValue; + HRDParameters *hrdParams = sps.getHrdParameters(); + *hrdParams = m_encHRD.getHRDParameters(); - for( j = 0; j < ( hrd->getCpbCntMinus1( i ) + 1 ); j ++ ) - { - hrd->setBitRateValueMinus1( i, j, 0, ( bitrateValue - 1 ) ); - hrd->setCpbSizeValueMinus1( i, j, 0, ( cpbSizeValue - 1 ) ); - hrd->setDuCpbSizeValueMinus1( i, j, 0, ( duCpbSizeValue - 1 ) ); - hrd->setDuBitRateValueMinus1( i, j, 0, ( duBitRateValue - 1 ) ); - hrd->setCbrFlag( i, j, 0, false ); - - hrd->setBitRateValueMinus1( i, j, 1, ( bitrateValue - 1) ); - hrd->setCpbSizeValueMinus1( i, j, 1, ( cpbSizeValue - 1 ) ); - hrd->setDuCpbSizeValueMinus1( i, j, 1, ( duCpbSizeValue - 1 ) ); - hrd->setDuBitRateValueMinus1( i, j, 1, ( duBitRateValue - 1 ) ); - hrd->setCbrFlag( i, j, 1, false ); - } - } + TimingInfo *timingInfo = sps.getTimingInfo(); + *timingInfo = m_encHRD.getTimingInfo(); } void EncLib::xInitPPS(PPS &pps, const SPS &sps) @@ -1283,7 +1278,22 @@ void EncLib::xInitPPS(PPS &pps, const SPS &sps) // pps ID already initialised. pps.setSPSId(sps.getSPSId()); - pps.setConstrainedIntraPred( m_bUseConstrainedIntraPred ); + pps.setConstantSliceHeaderParamsEnabledFlag(getConstantSliceHeaderParamsEnabledFlag()); + pps.setPPSDepQuantEnabledIdc(getPPSDepQuantEnabledIdc()); + pps.setPPSRefPicListSPSIdc0(getPPSRefPicListSPSIdc0()); + pps.setPPSRefPicListSPSIdc1(getPPSRefPicListSPSIdc1()); + pps.setPPSMvdL1ZeroIdc(getPPSMvdL1ZeroIdc()); + pps.setPPSCollocatedFromL0Idc(getPPSCollocatedFromL0Idc()); + pps.setPPSSixMinusMaxNumMergeCandPlus1(getPPSSixMinusMaxNumMergeCandPlus1()); + pps.setPPSMaxNumMergeCandMinusMaxNumTriangleCandPlus1(getPPSMaxNumMergeCandMinusMaxNumTriangleCandPlus1()); + + pps.setNumSubPics(sps.getNumSubPics()); + pps.setSubPicIdSignallingPresentFlag(false); + pps.setSubPicIdLen(sps.getSubPicIdLen()); + for(int picIdx=0; picIdx<pps.getNumSubPics(); picIdx++) + { + pps.setSubPicId(picIdx, sps.getSubPicId(picIdx)); + } bool bUseDQP = (getCuQpDeltaSubdiv() > 0)? true : false; if((getMaxDeltaQP() != 0 )|| getUseAdaptiveQP()) @@ -1314,30 +1324,25 @@ void EncLib::xInitPPS(PPS &pps, const SPS &sps) if ( m_RCEnableRateControl ) { pps.setUseDQP(true); - pps.setCuQpDeltaSubdiv( 0 ); } else if(bUseDQP) { pps.setUseDQP(true); - pps.setCuQpDeltaSubdiv( m_cuQpDeltaSubdiv ); } else { pps.setUseDQP(false); - pps.setCuQpDeltaSubdiv( 0 ); } if ( m_cuChromaQpOffsetSubdiv >= 0 ) { - pps.getPpsRangeExtension().setCuChromaQpOffsetSubdiv(m_cuChromaQpOffsetSubdiv); - pps.getPpsRangeExtension().clearChromaQpOffsetList(); - pps.getPpsRangeExtension().setChromaQpOffsetListEntry(1, 6, 6); + pps.clearChromaQpOffsetList(); + pps.setChromaQpOffsetListEntry(1, 6, 6, 6); /* todo, insert table entries from command line (NB, 0 should not be touched) */ } else { - pps.getPpsRangeExtension().setCuChromaQpOffsetSubdiv(0); - pps.getPpsRangeExtension().clearChromaQpOffsetList(); + pps.clearChromaQpOffsetList(); } pps.getPpsRangeExtension().setCrossComponentPredictionEnabledFlag(m_crossComponentPredictionEnabledFlag); pps.getPpsRangeExtension().setLog2SaoOffsetScale(CHANNEL_TYPE_LUMA, m_log2SaoOffsetScale[CHANNEL_TYPE_LUMA ]); @@ -1359,6 +1364,15 @@ void EncLib::xInitPPS(PPS &pps, const SPS &sps) pps.setPicInitQPMinus26( std::min( maxDQP, std::max( minDQP, baseQp ) )); } + if (sps.getJointCbCrEnabledFlag() == false || getChromaFormatIdc() == CHROMA_400) + { + pps.setJointCbCrQpOffsetPresentFlag(false); + } + else + { + pps.setJointCbCrQpOffsetPresentFlag(true); + } + #if ER_CHROMA_QP_WCG_PPS if (getWCGChromaQPControl().isEnabled()) { @@ -1370,12 +1384,20 @@ void EncLib::xInitPPS(PPS &pps, const SPS &sps) const int crQP =(int)(dcrQP + ( dcrQP < 0 ? -0.5 : 0.5) ); pps.setQpOffset(COMPONENT_Cb, Clip3( -12, 12, min(0, cbQP) + m_chromaCbQpOffset )); pps.setQpOffset(COMPONENT_Cr, Clip3( -12, 12, min(0, crQP) + m_chromaCrQpOffset)); + if(pps.getJointCbCrQpOffsetPresentFlag()) + pps.setQpOffset(JOINT_CbCr, Clip3(-12, 12, (min(0, cbQP) + min(0, crQP)) / 2 + m_chromaCbCrQpOffset)); + else + pps.setQpOffset(JOINT_CbCr, 0); } else { #endif pps.setQpOffset(COMPONENT_Cb, m_chromaCbQpOffset ); pps.setQpOffset(COMPONENT_Cr, m_chromaCrQpOffset ); + if (pps.getJointCbCrQpOffsetPresentFlag()) + pps.setQpOffset(JOINT_CbCr, m_chromaCbCrQpOffset); + else + pps.setQpOffset(JOINT_CbCr, 0); #if ER_CHROMA_QP_WCG_PPS } #endif @@ -1407,13 +1429,53 @@ void EncLib::xInitPPS(PPS &pps, const SPS &sps) !pps.getSliceChromaQpFlag() && sps.getUseDualITree() && (getChromaFormatIdc() != CHROMA_400)) { - pps.setSliceChromaQpFlag(m_chromaCbQpOffsetDualTree != 0 || m_chromaCrQpOffsetDualTree != 0); + pps.setSliceChromaQpFlag(m_chromaCbQpOffsetDualTree != 0 || m_chromaCrQpOffsetDualTree != 0 || m_chromaCbCrQpOffsetDualTree != 0); } -#if HEVC_TILES_WPP pps.setEntropyCodingSyncEnabledFlag( m_entropyCodingSyncEnabledFlag ); - pps.setTilesEnabledFlag( (m_iNumColumnsMinus1 > 0 || m_iNumRowsMinus1 > 0) ); -#endif + + pps.setNoPicPartitionFlag( m_noPicPartitionFlag ); + if( m_noPicPartitionFlag == false ) + { + pps.setLog2CtuSize( ceilLog2( sps.getCTUSize()) ); + pps.setNumExpTileColumns( (uint32_t) m_tileColumnWidth.size() ); + pps.setNumExpTileRows( (uint32_t) m_tileRowHeight.size() ); + pps.setTileColumnWidths( m_tileColumnWidth ); + pps.setTileRowHeights( m_tileRowHeight ); + pps.initTiles(); + pps.setRectSliceFlag( m_rectSliceFlag ); + if( m_rectSliceFlag ) + { + pps.setNumSlicesInPic( m_numSlicesInPic ); + pps.setTileIdxDeltaPresentFlag( m_tileIdxDeltaPresentFlag ); + pps.setRectSlices( m_rectSlices ); + pps.initRectSliceMap( ); + } + else + { + pps.initRasterSliceMap( m_rasterSliceSize ); + } + pps.setLoopFilterAcrossTilesEnabledFlag( m_bLFCrossTileBoundaryFlag ); + pps.setLoopFilterAcrossSlicesEnabledFlag( m_bLFCrossSliceBoundaryFlag ); + } + else + { + pps.setLog2CtuSize( ceilLog2( sps.getCTUSize()) ); + pps.setNumExpTileColumns(1); + pps.setNumExpTileRows(1); + pps.addTileColumnWidth( pps.getPicWidthInCtu( ) ); + pps.addTileRowHeight( pps.getPicHeightInCtu( ) ); + pps.initTiles(); + pps.setRectSliceFlag( 1 ); + pps.setNumSlicesInPic( 1 ); + pps.initRectSlices( ); + pps.setTileIdxDeltaPresentFlag( 0 ); + pps.setSliceTileIdx( 0, 0 ); + pps.initRectSliceMap( ); + pps.setLoopFilterAcrossTilesEnabledFlag( true ); + pps.setLoopFilterAcrossSlicesEnabledFlag( true ); + } + pps.setUseWP( m_useWeightedPred ); pps.setWPBiPred( m_useWeightedBiPred ); pps.setOutputFlagPresentFlag( false ); @@ -1448,7 +1510,6 @@ void EncLib::xInitPPS(PPS &pps, const SPS &sps) pps.setDeblockingFilterControlPresentFlag(deblockingFilterControlPresentFlag); - pps.setLog2ParallelMergeLevelMinus2 (m_log2ParallelMergeLevelMinus2 ); pps.setCabacInitPresentFlag(CABAC_INIT_PRESENT_FLAG); pps.setLoopFilterAcrossSlicesEnabledFlag( m_bLFCrossSliceBoundaryFlag ); @@ -1460,8 +1521,8 @@ void EncLib::xInitPPS(PPS &pps, const SPS &sps) } for( int i = 0; i < getGOPSize(); i++) { - CHECK(!(getGOPEntry(i).m_numRefPicsActive >= 0 && getGOPEntry(i).m_numRefPicsActive <= MAX_NUM_REF), "Unspecified error"); - histogram[getGOPEntry(i).m_numRefPicsActive]++; + CHECK(!(getRPLEntry(0, i).m_numRefPicsActive >= 0 && getRPLEntry(0, i).m_numRefPicsActive <= MAX_NUM_REF), "Unspecified error"); + histogram[getRPLEntry(0, i).m_numRefPicsActive]++; } int maxHist=-1; @@ -1477,400 +1538,362 @@ void EncLib::xInitPPS(PPS &pps, const SPS &sps) CHECK(!(bestPos <= 15), "Unspecified error"); pps.setNumRefIdxL0DefaultActive(bestPos); pps.setNumRefIdxL1DefaultActive(bestPos); - pps.setTransquantBypassEnabledFlag(getTransquantBypassEnabledFlag()); - pps.setUseTransformSkip( m_useTransformSkip ); - pps.getPpsRangeExtension().setLog2MaxTransformSkipBlockSize( m_log2MaxTransformSkipBlockSize ); + pps.setLog2MaxTransformSkipBlockSize(m_log2MaxTransformSkipBlockSize); + pps.setPictureHeaderExtensionPresentFlag(false); + + pps.pcv = new PreCalcValues( sps, pps, true ); + pps.setRpl1IdxPresentFlag(sps.getRPL1IdxPresentFlag()); +} -#if HEVC_DEPENDENT_SLICES - if (m_sliceSegmentMode != NO_SLICES) +void EncLib::xInitPicHeader(PicHeader &picHeader, const SPS &sps, const PPS &pps) +{ + int i; + picHeader.initPicHeader(); + + // parameter sets + picHeader.setSPSId( sps.getSPSId() ); + picHeader.setPPSId( pps.getPPSId() ); + + // merge list sizes + picHeader.setMaxNumMergeCand ( getMaxNumMergeCand() ); + picHeader.setMaxNumAffineMergeCand( getMaxNumAffineMergeCand() ); + picHeader.setMaxNumTriangleCand ( getMaxNumTriangleCand() ); + picHeader.setMaxNumIBCMergeCand ( getMaxNumIBCMergeCand() ); + + // copy partitioning constraints from SPS + picHeader.setSplitConsOverrideFlag(false); + picHeader.setMinQTSizes( sps.getMinQTSizes() ); + picHeader.setMaxMTTHierarchyDepths( sps.getMaxMTTHierarchyDepths() ); + picHeader.setMaxBTSizes( sps.getMaxBTSizes() ); + picHeader.setMaxTTSizes( sps.getMaxTTSizes() ); + + // quantization + picHeader.setDepQuantEnabledFlag( getDepQuantEnabledFlag() ); + picHeader.setSignDataHidingEnabledFlag( getSignDataHidingEnabledFlag() ); + + bool bUseDQP = (getCuQpDeltaSubdiv() > 0)? true : false; + + if( (getMaxDeltaQP() != 0 )|| getUseAdaptiveQP() ) { - pps.setDependentSliceSegmentsEnabledFlag( true ); + bUseDQP = true; } -#endif -#if HEVC_TILES_WPP - xInitPPSforTiles(pps); +#if SHARP_LUMA_DELTA_QP + if( getLumaLevelToDeltaQPMapping().isEnabled() ) + { + bUseDQP = true; + } +#endif +#if ENABLE_QPA + if( getUsePerceptQPA() && !bUseDQP ) + { + CHECK( m_cuQpDeltaSubdiv != 0, "max. delta-QP subdiv must be zero!" ); + bUseDQP = (getBaseQP() < 38) && (getSourceWidth() > 512 || getSourceHeight() > 320); + } #endif - pps.pcv = new PreCalcValues( sps, pps, true ); + if( m_costMode==COST_SEQUENCE_LEVEL_LOSSLESS || m_costMode==COST_LOSSLESS_CODING ) + { + bUseDQP=false; + } + + if( m_RCEnableRateControl ) + { + picHeader.setCuQpDeltaSubdivIntra( 0 ); + picHeader.setCuQpDeltaSubdivInter( 0 ); + } + else if( bUseDQP ) + { + picHeader.setCuQpDeltaSubdivIntra( m_cuQpDeltaSubdiv ); + picHeader.setCuQpDeltaSubdivInter( m_cuQpDeltaSubdiv ); + } + else + { + picHeader.setCuQpDeltaSubdivIntra( 0 ); + picHeader.setCuQpDeltaSubdivInter( 0 ); + } + + if( m_cuChromaQpOffsetSubdiv >= 0 ) + { + picHeader.setCuChromaQpOffsetSubdivIntra(m_cuChromaQpOffsetSubdiv); + picHeader.setCuChromaQpOffsetSubdivInter(m_cuChromaQpOffsetSubdiv); + } + else + { + picHeader.setCuChromaQpOffsetSubdivIntra(0); + picHeader.setCuChromaQpOffsetSubdivInter(0); + } + + // sub-pictures + picHeader.setSubPicIdSignallingPresentFlag(sps.getSubPicIdSignallingPresentFlag()); + picHeader.setSubPicIdLen(sps.getSubPicIdLen()); + for(i=0; i<sps.getNumSubPics(); i++) { + picHeader.setSubPicId(i, sps.getSubPicId(i)); + } + + // virtual boundaries + picHeader.setLoopFilterAcrossVirtualBoundariesDisabledFlag(sps.getLoopFilterAcrossVirtualBoundariesDisabledFlag()); + picHeader.setNumVerVirtualBoundaries(sps.getNumVerVirtualBoundaries()); + picHeader.setNumHorVirtualBoundaries(sps.getNumHorVirtualBoundaries()); + for(i=0; i<3; i++) { + picHeader.setVirtualBoundariesPosX(sps.getVirtualBoundariesPosX(i), i); + picHeader.setVirtualBoundariesPosY(sps.getVirtualBoundariesPosY(i), i); + } + + // gradual decoder refresh flag + picHeader.setGdrPicFlag(false); + + // BDOF / DMVR / PROF + picHeader.setDisBdofFlag(false); + picHeader.setDisDmvrFlag(false); + picHeader.setDisProfFlag(false); } void EncLib::xInitAPS(APS &aps) { //Do nothing now } -//Function for initializing m_RPSList, a list of ReferencePictureSet, based on the GOPEntry objects read from the config file. -void EncLib::xInitRPS(SPS &sps, bool isFieldCoding) + +void EncLib::xInitRPL(SPS &sps, bool isFieldCoding) { - ReferencePictureSet* rps; + ReferencePictureList* rpl; - sps.createRPSList(getGOPSize() + m_extraRPSs + 1); - RPSList* rpsList = sps.getRPSList(); + int numRPLCandidates = getRPLCandidateSize(0); + // To allocate one additional memory for RPL of POC1 (first bottom field) which is not specified in cfg file + sps.createRPLList0(numRPLCandidates + (isFieldCoding ? 1 : 0)); + sps.createRPLList1(numRPLCandidates + (isFieldCoding ? 1 : 0)); + RPLList* rplList = 0; - for( int i = 0; i < getGOPSize()+m_extraRPSs; i++) + for (int i = 0; i < 2; i++) { - const GOPEntry &ge = getGOPEntry(i); - rps = rpsList->getReferencePictureSet(i); - rps->setNumberOfPictures(ge.m_numRefPics); - rps->setNumRefIdc(ge.m_numRefIdc); - int numNeg = 0; - int numPos = 0; - for( int j = 0; j < ge.m_numRefPics; j++) + rplList = (i == 0) ? sps.getRPLList0() : sps.getRPLList1(); + for (int j = 0; j < numRPLCandidates; j++) { - rps->setDeltaPOC(j,ge.m_referencePics[j]); - rps->setUsed(j,ge.m_usedByCurrPic[j]); - if(ge.m_referencePics[j]>0) - { - numPos++; - } - else + const RPLEntry &ge = getRPLEntry(i, j); + rpl = rplList->getReferencePictureList(j); + rpl->setNumberOfShorttermPictures(ge.m_numRefPics); + rpl->setNumberOfLongtermPictures(0); //Hardcoded as 0 for now. need to update this when implementing LTRP + rpl->setNumberOfActivePictures(ge.m_numRefPicsActive); + rpl->setLtrpInSliceHeaderFlag(ge.m_ltrp_in_slice_header_flag); + rpl->setInterLayerPresentFlag( sps.getInterLayerPresentFlag() ); + // inter-layer reference picture is not signaled in SPS RPL, SPS is shared currently + rpl->setNumberOfInterLayerPictures( 0 ); + + for (int k = 0; k < ge.m_numRefPics; k++) { - numNeg++; - } - } - rps->setNumberOfNegativePictures(numNeg); - rps->setNumberOfPositivePictures(numPos); - - // handle inter RPS intialization from the config file. - rps->setInterRPSPrediction(ge.m_interRPSPrediction > 0); // not very clean, converting anything > 0 to true. - rps->setDeltaRIdxMinus1(0); // index to the Reference RPS is always the previous one. - ReferencePictureSet* RPSRef = i>0 ? rpsList->getReferencePictureSet(i-1): NULL; // get the reference RPS - - if (ge.m_interRPSPrediction == 2) // Automatic generation of the inter RPS idc based on the RIdx provided. - { - CHECK(!(RPSRef!=NULL), "Unspecified error"); - int deltaRPS = getGOPEntry(i-1).m_POC - ge.m_POC; // the ref POC - current POC - int numRefDeltaPOC = RPSRef->getNumberOfPictures(); - - rps->setDeltaRPS(deltaRPS); // set delta RPS - rps->setNumRefIdc(numRefDeltaPOC+1); // set the numRefIdc to the number of pictures in the reference RPS + 1. - int count=0; - for (int j = 0; j <= numRefDeltaPOC; j++ ) // cycle through pics in reference RPS. - { - int RefDeltaPOC = (j<numRefDeltaPOC)? RPSRef->getDeltaPOC(j): 0; // if it is the last decoded picture, set RefDeltaPOC = 0 - rps->setRefIdc(j, 0); - for (int k = 0; k < rps->getNumberOfPictures(); k++ ) // cycle through pics in current RPS. - { - if (rps->getDeltaPOC(k) == ( RefDeltaPOC + deltaRPS)) // if the current RPS has a same picture as the reference RPS. - { - rps->setRefIdc(j, (rps->getUsed(k)?1:2)); - count++; - break; - } - } - } - if (count != rps->getNumberOfPictures()) - { - msg( WARNING, "Warning: Unable fully predict all delta POCs using the reference RPS index given in the config file. Setting Inter RPS to false for this RPS.\n"); - rps->setInterRPSPrediction(0); - } - } - else if (ge.m_interRPSPrediction == 1) // inter RPS idc based on the RefIdc values provided in config file. - { - CHECK(!(RPSRef!=NULL), "Unspecified error"); - rps->setDeltaRPS(ge.m_deltaRPS); - rps->setNumRefIdc(ge.m_numRefIdc); - for (int j = 0; j < ge.m_numRefIdc; j++ ) - { - rps->setRefIdc(j, ge.m_refIdc[j]); - } - // the following code overwrite the deltaPOC and Used by current values read from the config file with the ones - // computed from the RefIdc. A warning is printed if they are not identical. - numNeg = 0; - numPos = 0; - ReferencePictureSet RPSTemp; // temporary variable - - for (int j = 0; j < ge.m_numRefIdc; j++ ) - { - if (ge.m_refIdc[j]) - { - int deltaPOC = ge.m_deltaRPS + ((j < RPSRef->getNumberOfPictures())? RPSRef->getDeltaPOC(j) : 0); - RPSTemp.setDeltaPOC((numNeg+numPos),deltaPOC); - RPSTemp.setUsed((numNeg+numPos),ge.m_refIdc[j]==1?1:0); - if (deltaPOC<0) - { - numNeg++; - } - else - { - numPos++; - } - } - } - if (numNeg != rps->getNumberOfNegativePictures()) - { - msg( WARNING, "Warning: number of negative pictures in RPS is different between intra and inter RPS specified in the config file.\n"); - rps->setNumberOfNegativePictures(numNeg); - rps->setNumberOfPictures(numNeg+numPos); - } - if (numPos != rps->getNumberOfPositivePictures()) - { - msg( WARNING, "Warning: number of positive pictures in RPS is different between intra and inter RPS specified in the config file.\n"); - rps->setNumberOfPositivePictures(numPos); - rps->setNumberOfPictures(numNeg+numPos); - } - RPSTemp.setNumberOfPictures(numNeg+numPos); - RPSTemp.setNumberOfNegativePictures(numNeg); - RPSTemp.sortDeltaPOC(); // sort the created delta POC before comparing - // check if Delta POC and Used are the same - // print warning if they are not. - for (int j = 0; j < ge.m_numRefIdc; j++ ) - { - if (RPSTemp.getDeltaPOC(j) != rps->getDeltaPOC(j)) - { - msg( WARNING, "Warning: delta POC is different between intra RPS and inter RPS specified in the config file.\n"); - rps->setDeltaPOC(j,RPSTemp.getDeltaPOC(j)); - } - if (RPSTemp.getUsed(j) != rps->getUsed(j)) - { - msg( WARNING, "Warning: Used by Current in RPS is different between intra and inter RPS specified in the config file.\n"); - rps->setUsed(j,RPSTemp.getUsed(j)); - } + rpl->setRefPicIdentifier( k, ge.m_deltaRefPics[k], 0, false, 0 ); } } } - //In case of field coding, we need to set special parameters for the first bottom field of the sequence, since it is not specified in the cfg file. - //The position = GOPSize + extraRPSs which is (a priori) unused is reserved for this field in the RPS. + if (isFieldCoding) { - rps = rpsList->getReferencePictureSet(getGOPSize()+m_extraRPSs); - rps->setNumberOfPictures(1); - rps->setNumberOfNegativePictures(1); - rps->setNumberOfPositivePictures(0); - rps->setNumberOfLongtermPictures(0); - rps->setDeltaPOC(0,-1); - rps->setPOC(0,0); - rps->setUsed(0,true); - rps->setInterRPSPrediction(false); - rps->setDeltaRIdxMinus1(0); - rps->setDeltaRPS(0); - rps->setNumRefIdc(0); + // To set RPL of POC1 (first bottom field) which is not specified in cfg file + for (int i = 0; i < 2; i++) + { + rplList = (i == 0) ? sps.getRPLList0() : sps.getRPLList1(); + rpl = rplList->getReferencePictureList(numRPLCandidates); + rpl->setNumberOfShorttermPictures(1); + rpl->setNumberOfLongtermPictures(0); + rpl->setNumberOfActivePictures(1); + rpl->setLtrpInSliceHeaderFlag(0); + rpl->setRefPicIdentifier(0, 1, 0, false, 0); + rpl->setPOC(0, 0); + } } -} - // This is a function that - // determines what Reference Picture Set to use - // for a specific slice (with POC = POCCurr) -void EncLib::selectReferencePictureSet(Slice* slice, int POCCurr, int GOPid - , int ltPoc -) -{ - bool isEncodeLtRef = (POCCurr == ltPoc); - if (m_compositeRefEnabled && isEncodeLtRef) - { - POCCurr++; - } - int rIdx = GOPid; - slice->setRPSidx(GOPid); + //Check if all delta POC of STRP in each RPL has the same sign + //Check RPLL0 first + const RPLList* rplList0 = sps.getRPLList0(); + const RPLList* rplList1 = sps.getRPLList1(); + uint32_t numberOfRPL = sps.getNumRPL0(); - for(int extraNum=m_iGOPSize; extraNum<m_extraRPSs+m_iGOPSize; extraNum++) + bool isAllEntriesinRPLHasSameSignFlag = true; + bool isFirstEntry = true; + bool lastSign = true; //true = positive ; false = negative + for (uint32_t ii = 0; isAllEntriesinRPLHasSameSignFlag && ii < numberOfRPL; ii++) { - if(m_uiIntraPeriod > 0 && getDecodingRefreshType() > 0) + const ReferencePictureList* rpl = rplList0->getReferencePictureList(ii); + for (uint32_t jj = 0; isAllEntriesinRPLHasSameSignFlag && jj < rpl->getNumberOfActivePictures(); jj++) { - int POCIndex = POCCurr%m_uiIntraPeriod; - if(POCIndex == 0) + if (!rpl->isRefPicLongterm(jj) && isFirstEntry) { - POCIndex = m_uiIntraPeriod; + lastSign = (rpl->getRefPicIdentifier(jj) >= 0) ? true : false; + isFirstEntry = false; } - if(POCIndex == m_GOPList[extraNum].m_POC) + else if (!rpl->isRefPicLongterm(jj) && (((rpl->getRefPicIdentifier(jj) - rpl->getRefPicIdentifier(jj - 1)) >= 0 && lastSign == false) || ((rpl->getRefPicIdentifier(jj) - rpl->getRefPicIdentifier(jj - 1)) < 0 && lastSign == true))) { - slice->setRPSidx(extraNum); - rIdx = extraNum; + isAllEntriesinRPLHasSameSignFlag = false; } } - else + } + //Check RPLL1. Skip it if it is already found out that this flag is not true for RPL0 or if RPL1 is the same as RPL0 + numberOfRPL = sps.getNumRPL1(); + isFirstEntry = true; + lastSign = true; + for (uint32_t ii = 0; isAllEntriesinRPLHasSameSignFlag && !sps.getRPL1CopyFromRPL0Flag() && ii < numberOfRPL; ii++) + { + isFirstEntry = true; + const ReferencePictureList* rpl = rplList1->getReferencePictureList(ii); + for (uint32_t jj = 0; isAllEntriesinRPLHasSameSignFlag && jj < rpl->getNumberOfActivePictures(); jj++) { - if(POCCurr==m_GOPList[extraNum].m_POC) + if (!rpl->isRefPicLongterm(jj) && isFirstEntry) + { + lastSign = (rpl->getRefPicIdentifier(jj) >= 0) ? true : false; + isFirstEntry = false; + } + else if (!rpl->isRefPicLongterm(jj) && (((rpl->getRefPicIdentifier(jj) - rpl->getRefPicIdentifier(jj - 1)) >= 0 && lastSign == false) || ((rpl->getRefPicIdentifier(jj) - rpl->getRefPicIdentifier(jj - 1)) < 0 && lastSign == true))) { - slice->setRPSidx(extraNum); - rIdx = extraNum; + isAllEntriesinRPLHasSameSignFlag = false; } } } + sps.setAllActiveRplEntriesHasSameSignFlag(isAllEntriesinRPLHasSameSignFlag); +} - if(POCCurr == 1 && slice->getPic()->fieldPic) +void EncLib::getActiveRefPicListNumForPOC(const SPS *sps, int POCCurr, int GOPid, uint32_t *activeL0, uint32_t *activeL1) +{ + if (m_uiIntraPeriod < 0) //Only for RA { - slice->setRPSidx(m_iGOPSize+m_extraRPSs); - rIdx = m_iGOPSize + m_extraRPSs; + *activeL0 = *activeL1 = 0; + return; } + uint32_t rpl0Idx = GOPid; + uint32_t rpl1Idx = GOPid; - ReferencePictureSet *rps = const_cast<ReferencePictureSet *>(slice->getSPS()->getRPSList()->getReferencePictureSet(slice->getRPSidx())); - if (m_compositeRefEnabled && ltPoc != -1 && !isEncodeLtRef) + int fullListNum = m_iGOPSize; + int partialListNum = getRPLCandidateSize(0) - m_iGOPSize; + int extraNum = fullListNum; + if (m_uiIntraPeriod < 0) { - if (ltPoc != -1 && rps->getNumberOfLongtermPictures() != 1 && !isEncodeLtRef) + if (POCCurr < (2 * m_iGOPSize + 2)) { - int idx = rps->getNumberOfPictures(); - int maxPicOrderCntLSB = 1 << slice->getSPS()->getBitsForPOC(); - int ltPocLsb = ltPoc % maxPicOrderCntLSB; - - rps->setNumberOfPictures(rps->getNumberOfPictures() + 1); - rps->setNumberOfLongtermPictures(1); - rps->setPOC(idx, ltPoc); - rps->setPocLSBLT(idx, ltPocLsb); - rps->setDeltaPOC(idx, -POCCurr + ltPoc); - rps->setUsed(idx, true); + rpl0Idx = POCCurr + m_iGOPSize - 1; + rpl1Idx = POCCurr + m_iGOPSize - 1; } - } - else if (m_compositeRefEnabled && isEncodeLtRef) - { - ReferencePictureSet* localRPS = slice->getLocalRPS(); - (*localRPS) = ReferencePictureSet(); - int refPics = rps->getNumberOfPictures(); - localRPS->setNumberOfPictures(rps->getNumberOfPictures()); - for (int i = 0; i < refPics; i++) + else { - localRPS->setDeltaPOC(i, rps->getDeltaPOC(i) + 1); - localRPS->setUsed(i, rps->getUsed(i)); + rpl0Idx = (POCCurr%m_iGOPSize == 0) ? m_iGOPSize - 1 : POCCurr%m_iGOPSize - 1; + rpl1Idx = (POCCurr%m_iGOPSize == 0) ? m_iGOPSize - 1 : POCCurr%m_iGOPSize - 1; } - localRPS->setNumberOfNegativePictures(rps->getNumberOfNegativePictures()); - localRPS->setNumberOfPositivePictures(rps->getNumberOfPositivePictures()); - localRPS->setInterRPSPrediction(true); - int deltaRPS = 1; - int newIdc = 0; - for (int i = 0; i < refPics; i++) - { - int deltaPOC = ((i != refPics) ? rps->getDeltaPOC(i) : 0); // check if the reference abs POC is >= 0 - int refIdc = 0; - for (int j = 0; j < localRPS->getNumberOfPictures(); j++) // loop through the pictures in the new RPS - { - if ((deltaPOC + deltaRPS) == localRPS->getDeltaPOC(j)) - { - if (localRPS->getUsed(j)) - { - refIdc = 1; - } - else - { - refIdc = 2; - } - } - } - localRPS->setRefIdc(i, refIdc); - newIdc++; - } - localRPS->setNumRefIdc(newIdc + 1); - localRPS->setRefIdc(newIdc, 0); - localRPS->setDeltaRPS(deltaRPS); - localRPS->setDeltaRIdxMinus1(slice->getSPS()->getRPSList()->getNumberOfReferencePictureSets() - 1 - rIdx); - slice->setRPS(localRPS); - slice->setRPSidx(-1); - return; + extraNum = fullListNum + partialListNum; } - slice->setRPS(rps); -} - -int EncLib::getReferencePictureSetIdxForSOP(int POCCurr, int GOPid ) -{ - int rpsIdx = GOPid; - - for(int extraNum=m_iGOPSize; extraNum<m_extraRPSs+m_iGOPSize; extraNum++) + for (; extraNum<fullListNum + partialListNum; extraNum++) { - if(m_uiIntraPeriod > 0 && getDecodingRefreshType() > 0) + if (m_uiIntraPeriod > 0 && getDecodingRefreshType() > 0) { int POCIndex = POCCurr%m_uiIntraPeriod; - if(POCIndex == 0) - { + if (POCIndex == 0) POCIndex = m_uiIntraPeriod; - } - if(POCIndex == m_GOPList[extraNum].m_POC) + if (POCIndex == m_RPLList0[extraNum].m_POC) { - rpsIdx = extraNum; + rpl0Idx = extraNum; + rpl1Idx = extraNum; + extraNum++; } } - else - { - if(POCCurr==m_GOPList[extraNum].m_POC) - { - rpsIdx = extraNum; - } - } - } - - return rpsIdx; -} - -#if HEVC_TILES_WPP -void EncLib::xInitPPSforTiles(PPS &pps) -{ - pps.setTileUniformSpacingFlag( m_tileUniformSpacingFlag ); - pps.setNumTileColumnsMinus1( m_iNumColumnsMinus1 ); - pps.setNumTileRowsMinus1( m_iNumRowsMinus1 ); - if( !m_tileUniformSpacingFlag ) - { - pps.setTileColumnWidth( m_tileColumnWidth ); - pps.setTileRowHeight( m_tileRowHeight ); } - pps.setLoopFilterAcrossTilesEnabledFlag( m_loopFilterAcrossTilesEnabledFlag ); - // # substreams is "per tile" when tiles are independent. + const ReferencePictureList *rpl0 = sps->getRPLList0()->getReferencePictureList(rpl0Idx); + *activeL0 = rpl0->getNumberOfActivePictures(); + const ReferencePictureList *rpl1 = sps->getRPLList1()->getReferencePictureList(rpl1Idx); + *activeL1 = rpl1->getNumberOfActivePictures(); } -#endif -void EncCfg::xCheckGSParameters() +void EncLib::selectReferencePictureList(Slice* slice, int POCCurr, int GOPid, int ltPoc) { -#if HEVC_TILES_WPP - int iWidthInCU = ( m_iSourceWidth%m_maxCUWidth ) ? m_iSourceWidth/m_maxCUWidth + 1 : m_iSourceWidth/m_maxCUWidth; - int iHeightInCU = ( m_iSourceHeight%m_maxCUHeight ) ? m_iSourceHeight/m_maxCUHeight + 1 : m_iSourceHeight/m_maxCUHeight; - uint32_t uiCummulativeColumnWidth = 0; - uint32_t uiCummulativeRowHeight = 0; - - //check the column relative parameters - if( m_iNumColumnsMinus1 >= (1<<(LOG2_MAX_NUM_COLUMNS_MINUS1+1)) ) + bool isEncodeLtRef = (POCCurr == ltPoc); + if (m_compositeRefEnabled && isEncodeLtRef) { - EXIT( "The number of columns is larger than the maximum allowed number of columns." ); + POCCurr++; } - if( m_iNumColumnsMinus1 >= iWidthInCU ) - { - EXIT( "The current picture can not have so many columns." ); - } + slice->setRPL0idx(GOPid); + slice->setRPL1idx(GOPid); - if( m_iNumColumnsMinus1 && !m_tileUniformSpacingFlag ) + int fullListNum = m_iGOPSize; + int partialListNum = getRPLCandidateSize(0) - m_iGOPSize; + int extraNum = fullListNum; + if (m_uiIntraPeriod < 0) { - for(int i=0; i<m_iNumColumnsMinus1; i++) + if (POCCurr < (2 * m_iGOPSize + 2)) { - uiCummulativeColumnWidth += m_tileColumnWidth[i]; + slice->setRPL0idx(POCCurr + m_iGOPSize - 1); + slice->setRPL1idx(POCCurr + m_iGOPSize - 1); } - - if( uiCummulativeColumnWidth >= iWidthInCU ) + else { - EXIT( "The width of the column is too large." ); + slice->setRPL0idx((POCCurr%m_iGOPSize == 0) ? m_iGOPSize - 1 : POCCurr%m_iGOPSize - 1); + slice->setRPL1idx((POCCurr%m_iGOPSize == 0) ? m_iGOPSize - 1 : POCCurr%m_iGOPSize - 1); } + extraNum = fullListNum + partialListNum; } - - //check the row relative parameters - if( m_iNumRowsMinus1 >= (1<<(LOG2_MAX_NUM_ROWS_MINUS1+1)) ) - { - EXIT( "The number of rows is larger than the maximum allowed number of rows." ); - } - - if( m_iNumRowsMinus1 >= iHeightInCU ) + for (; extraNum < fullListNum + partialListNum; extraNum++) { - EXIT( "The current picture can not have so many rows." ); + if (m_uiIntraPeriod > 0 && getDecodingRefreshType() > 0) + { + int POCIndex = POCCurr%m_uiIntraPeriod; + if (POCIndex == 0) + POCIndex = m_uiIntraPeriod; + if (POCIndex == m_RPLList0[extraNum].m_POC) + { + slice->setRPL0idx(extraNum); + slice->setRPL1idx(extraNum); + extraNum++; + } + } } - if( m_iNumRowsMinus1 && !m_tileUniformSpacingFlag ) + if (slice->getPic()->fieldPic) { - for(int i=0; i<m_iNumRowsMinus1; i++) + // To set RPL index of POC1 (first bottom field) + if (POCCurr == 1) { - uiCummulativeRowHeight += m_tileRowHeight[i]; + slice->setRPL0idx(getRPLCandidateSize(0)); + slice->setRPL1idx(getRPLCandidateSize(0)); } - - if( uiCummulativeRowHeight >= iHeightInCU ) + else if (m_uiIntraPeriod < 0) { - EXIT( "The height of the row is too large." ); + // To set RPL indexes for LD + int numRPLCandidates = getRPLCandidateSize(0); + if (POCCurr < numRPLCandidates - m_iGOPSize + 2) + { + slice->setRPL0idx(POCCurr + m_iGOPSize - 2); + slice->setRPL1idx(POCCurr + m_iGOPSize - 2); + } + else + { + if (POCCurr%m_iGOPSize == 0) + { + slice->setRPL0idx(m_iGOPSize - 2); + slice->setRPL1idx(m_iGOPSize - 2); + } + else if (POCCurr%m_iGOPSize == 1) + { + slice->setRPL0idx(m_iGOPSize - 1); + slice->setRPL1idx(m_iGOPSize - 1); + } + else + { + slice->setRPL0idx(POCCurr % m_iGOPSize - 2); + slice->setRPL1idx(POCCurr % m_iGOPSize - 2); + } + } } } -#endif + + const ReferencePictureList *rpl0 = (slice->getSPS()->getRPLList0()->getReferencePictureList(slice->getRPL0idx())); + const ReferencePictureList *rpl1 = (slice->getSPS()->getRPLList1()->getReferencePictureList(slice->getRPL1idx())); + slice->setRPL0(rpl0); + slice->setRPL1(rpl1); } -#if JCTVC_Y0038_PARAMS + void EncLib::setParamSetChanged(int spsId, int ppsId) { m_ppsMap.setChangedFlag(ppsId); m_spsMap.setChangedFlag(spsId); } -#endif bool EncLib::APSNeedsWriting(int apsId) { bool isChanged = m_apsMap.getChangedFlag(apsId); @@ -1892,6 +1915,37 @@ bool EncLib::SPSNeedsWriting(int spsId) return bChanged; } +void EncLib::checkPltStats( Picture* pic ) +{ + int totalArea = 0; + int pltArea = 0; + for (auto apu : pic->cs->pus) + { + for (int i = 0; i < MAX_NUM_TBLOCKS; ++i) + { + int puArea = apu->blocks[i].width * apu->blocks[i].height; + if (apu->blocks[i].width > 0 && apu->blocks[i].height > 0) + { + totalArea += puArea; + if (CU::isPLT(*apu->cu) || CU::isIBC(*apu->cu)) + { + pltArea += puArea; + } + break; + } + + } + } + if (pltArea * PLT_FAST_RATIO < totalArea) + { + m_doPlt = false; + } + else + { + m_doPlt = true; + } +} + #if X0038_LAMBDA_FROM_QP_CAPABILITY int EncCfg::getQPForPicture(const uint32_t gopIndex, const Slice *pSlice) const { @@ -1900,7 +1954,15 @@ int EncCfg::getQPForPicture(const uint32_t gopIndex, const Slice *pSlice) const if (getCostMode()==COST_LOSSLESS_CODING) { +#if JVET_AHG14_LOSSLESS +#if JVET_AHG14_LOSSLESS_ENC_QP_FIX + qp = getBaseQP(); +#else + qp = LOSSLESS_AND_MIXED_LOSSLESS_RD_COST_TEST_QP - ( ( pSlice->getSPS()->getBitDepth( CHANNEL_TYPE_LUMA ) - 8 ) * 6 ); +#endif +#else qp=LOSSLESS_AND_MIXED_LOSSLESS_RD_COST_TEST_QP; +#endif } else { @@ -1930,14 +1992,6 @@ int EncCfg::getQPForPicture(const uint32_t gopIndex, const Slice *pSlice) const } else { -#if SHARP_LUMA_DELTA_QP - // Only adjust QP when not lossless - if (!(( getMaxDeltaQP() == 0 ) && (!getLumaLevelToDeltaQPMapping().isEnabled()) && (qp == -lumaQpBDOffset ) && (pSlice->getPPS()->getTransquantBypassEnabledFlag()))) -#else - if (!(( getMaxDeltaQP() == 0 ) && (qp == -lumaQpBDOffset ) && (pSlice->getPPS()->getTransquantBypassEnabledFlag()))) -#endif - - { const GOPEntry &gopEntry=getGOPEntry(gopIndex); // adjust QP according to the QP offset for the GOP entry. qp +=gopEntry.m_QPOffset; @@ -1947,7 +2001,6 @@ int EncCfg::getQPForPicture(const uint32_t gopIndex, const Slice *pSlice) const int qpOffset = (int)floor(Clip3<double>(0.0, 3.0, dqpOffset)); qp += qpOffset ; } - } #if !QP_SWITCHING_FOR_PARALLEL // modify QP if a fractional QP was originally specified, cause dQPs to be 0 or 1. @@ -1963,4 +2016,5 @@ int EncCfg::getQPForPicture(const uint32_t gopIndex, const Slice *pSlice) const } #endif + //! \} diff --git a/source/Lib/EncoderLib/EncLib.h b/source/Lib/EncoderLib/EncLib.h index 137b70ffac0be012c8204ba6099d8a931fe6ac97..f9b13233e597e85169e374573f23fea701a2e419 100644 --- a/source/Lib/EncoderLib/EncLib.h +++ b/source/Lib/EncoderLib/EncLib.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -48,6 +48,7 @@ #include "EncCfg.h" #include "EncGOP.h" #include "EncSlice.h" +#include "EncHRD.h" #include "VLCWriter.h" #include "CABACWriter.h" #include "InterSearch.h" @@ -57,6 +58,7 @@ #include "EncAdaptiveLoopFilter.h" #include "RateCtrl.h" +class EncLibCommon; //! \ingroup EncoderLib //! \{ @@ -72,11 +74,12 @@ private: // picture int m_iPOCLast; ///< time index (POC) int m_iNumPicRcvd; ///< number of received pictures - uint32_t m_uiNumAllPicCoded; ///< number of coded pictures - PicList m_cListPic; ///< dynamic list of pictures + uint32_t m_uiNumAllPicCoded; ///< number of coded pictures + PicList& m_cListPic; ///< dynamic list of pictures + int m_layerId; // encoder search -#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM +#if ENABLE_SPLIT_PARALLELISM InterSearch *m_cInterSearch; ///< encoder search class IntraSearch *m_cIntraSearch; ///< encoder search class #else @@ -84,7 +87,7 @@ private: IntraSearch m_cIntraSearch; ///< encoder search class #endif // coding tool -#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM +#if ENABLE_SPLIT_PARALLELISM TrQuant *m_cTrQuant; ///< transform & quantization class #else TrQuant m_cTrQuant; ///< transform & quantization class @@ -93,13 +96,13 @@ private: EncSampleAdaptiveOffset m_cEncSAO; ///< sample adaptive offset class EncAdaptiveLoopFilter m_cEncALF; HLSWriter m_HLSWriter; ///< CAVLC encoder -#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM +#if ENABLE_SPLIT_PARALLELISM CABACEncoder *m_CABACEncoder; #else CABACEncoder m_CABACEncoder; #endif -#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM +#if ENABLE_SPLIT_PARALLELISM EncReshape *m_cReshaper; ///< reshaper class #else EncReshape m_cReshaper; ///< reshaper class @@ -108,17 +111,18 @@ private: // processing unit EncGOP m_cGOPEncoder; ///< GOP encoder EncSlice m_cSliceEncoder; ///< slice encoder -#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM +#if ENABLE_SPLIT_PARALLELISM EncCu *m_cCuEncoder; ///< CU encoder #else EncCu m_cCuEncoder; ///< CU encoder #endif // SPS - ParameterSetMap<SPS> m_spsMap; ///< SPS. This is the base value. This is copied to PicSym - ParameterSetMap<PPS> m_ppsMap; ///< PPS. This is the base value. This is copied to PicSym - ParameterSetMap<APS> m_apsMap; ///< APS. This is the base value. This is copied to PicSym + ParameterSetMap<SPS>& m_spsMap; ///< SPS. This is the base value. This is copied to PicSym + ParameterSetMap<PPS>& m_ppsMap; ///< PPS. This is the base value. This is copied to PicSym + ParameterSetMap<APS>& m_apsMap; ///< APS. This is the base value. This is copied to PicSym + PicHeader m_picHeader; ///< picture header // RD cost computation -#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM +#if ENABLE_SPLIT_PARALLELISM RdCost *m_cRdCost; ///< RD cost computation class CtxCache *m_CtxCache; ///< buffer for temporarily stored context models #else @@ -130,7 +134,7 @@ private: AUWriterIf* m_AUWriterIf; -#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM +#if ENABLE_SPLIT_PARALLELISM int m_numCuEncStacks; #endif @@ -138,36 +142,43 @@ private: CacheModel m_cacheModel; #endif + APS* m_apss[ALF_CTB_MAX_NUM_APS]; + + APS* m_lmcsAPS; + APS* m_scalinglistAPS; + + EncHRD m_encHRD; + + bool m_doPlt; +#if JVET_O0756_CALCULATE_HDRMETRICS + std::chrono::duration<long long, ratio<1, 1000000000>> m_metricTime; +#endif + int m_picIdInGOP; + public: + SPS* getSPS( int spsId ) { return m_spsMap.getPS( spsId ); }; + APS** getApss() { return m_apss; } Ctx m_entropyCodingSyncContextState; ///< leave in addition to vector for compatibility -#if ENABLE_WPP_PARALLELISM - std::vector<Ctx> m_entropyCodingSyncContextStateVec; ///< context storage for state of contexts at the wavefront/WPP/entropy-coding-sync second CTU of tile-row -#endif protected: void xGetNewPicBuffer ( std::list<PelUnitBuf*>& rcListPicYuvRecOut, Picture*& rpcPic, int ppsId ); ///< get picture buffer which will be processed. If ppsId<0, then the ppsMap will be queried for the first match. -#if HEVC_VPS - void xInitVPS (VPS &vps, const SPS &sps); ///< initialize VPS from encoder options -#endif - void xInitSPS (SPS &sps); ///< initialize SPS from encoder options + void xInitVPS(VPS& vps, const SPS& sps); ///< initialize VPS from encoder options + void xInitDPS (DPS &dps, const SPS &sps, const int dpsId); ///< initialize DPS from encoder options + void xInitSPS ( SPS& sps, VPS& vps ); ///< initialize SPS from encoder options void xInitPPS (PPS &pps, const SPS &sps); ///< initialize PPS from encoder options + void xInitPicHeader (PicHeader &picHeader, const SPS &sps, const PPS &pps); ///< initialize Picture Header from encoder options void xInitAPS (APS &aps); ///< initialize APS from encoder options -#if HEVC_USE_SCALING_LISTS - void xInitScalingLists (SPS &sps, PPS &pps); ///< initialize scaling lists -#endif + void xInitScalingLists ( SPS &sps, APS &aps ); ///< initialize scaling lists void xInitPPSforLT(PPS& pps); - void xInitHrdParameters(SPS &sps); ///< initialize HRD parameters + void xInitHrdParameters(SPS &sps); ///< initialize HRDParameters parameters -#if HEVC_TILES_WPP - void xInitPPSforTiles (PPS &pps); -#endif - void xInitRPS (SPS &sps, bool isFieldCoding); ///< initialize PPS from encoder options + void xInitRPL(SPS &sps, bool isFieldCoding); ///< initialize SPS from encoder options public: - EncLib(); + EncLib( EncLibCommon* encLibCommon ); virtual ~EncLib(); - void create (); + void create ( const int layerId ); void destroy (); void init ( bool isFieldCoding, AUWriterIf* auWriterIf ); void deletePicBuffer (); @@ -178,7 +189,7 @@ public: AUWriterIf* getAUWriterIf () { return m_AUWriterIf; } PicList* getListPic () { return &m_cListPic; } -#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM +#if ENABLE_SPLIT_PARALLELISM InterSearch* getInterSearch ( int jId = 0 ) { return &m_cInterSearch[jId]; } IntraSearch* getIntraSearch ( int jId = 0 ) { return &m_cIntraSearch[jId]; } @@ -194,13 +205,14 @@ public: EncAdaptiveLoopFilter* getALF () { return &m_cEncALF; } EncGOP* getGOPEncoder () { return &m_cGOPEncoder; } EncSlice* getSliceEncoder () { return &m_cSliceEncoder; } -#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM + EncHRD* getHRD () { return &m_encHRD; } +#if ENABLE_SPLIT_PARALLELISM EncCu* getCuEncoder ( int jId = 0 ) { return &m_cCuEncoder[jId]; } #else EncCu* getCuEncoder () { return &m_cCuEncoder; } #endif HLSWriter* getHLSWriter () { return &m_HLSWriter; } -#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM +#if ENABLE_SPLIT_PARALLELISM CABACEncoder* getCABACEncoder ( int jId = 0 ) { return &m_CABACEncoder[jId]; } RdCost* getRdCost ( int jId = 0 ) { return &m_cRdCost[jId]; } @@ -214,51 +226,63 @@ public: RateCtrl* getRateCtrl () { return &m_cRateCtrl; } - void selectReferencePictureSet(Slice* slice, int POCCurr, int GOPid - , int ltPoc - ); - int getReferencePictureSetIdxForSOP(int POCCurr, int GOPid ); + void getActiveRefPicListNumForPOC(const SPS *sps, int POCCurr, int GOPid, uint32_t *activeL0, uint32_t *activeL1); + void selectReferencePictureList(Slice* slice, int POCCurr, int GOPid, int ltPoc); -#if JCTVC_Y0038_PARAMS void setParamSetChanged(int spsId, int ppsId); -#endif bool APSNeedsWriting(int apsId); bool PPSNeedsWriting(int ppsId); bool SPSNeedsWriting(int spsId); const PPS* getPPS( int Id ) { return m_ppsMap.getPS( Id); } const APS* getAPS(int Id) { return m_apsMap.getPS(Id); } -#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM +#if ENABLE_SPLIT_PARALLELISM void setNumCuEncStacks( int n ) { m_numCuEncStacks = n; } int getNumCuEncStacks() const { return m_numCuEncStacks; } #endif -#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM +#if ENABLE_SPLIT_PARALLELISM EncReshape* getReshaper( int jId = 0 ) { return &m_cReshaper[jId]; } #else EncReshape* getReshaper() { return &m_cReshaper; } #endif + + ParameterSetMap<APS>* getApsMap() { return &m_apsMap; } + + bool getPltEnc() const { return m_doPlt; } + void checkPltStats( Picture* pic ); +#if JVET_O0756_CALCULATE_HDRMETRICS + std::chrono::duration<long long, ratio<1, 1000000000>> getMetricTime() const { return m_metricTime; }; +#endif // ------------------------------------------------------------------------------------------------------------------- // encoder function // ------------------------------------------------------------------------------------------------------------------- /// encode several number of pictures until end-of-sequence - void encode( bool bEos, + bool encodePrep( bool bEos, PelStorage* pcPicYuvOrg, PelStorage* pcPicYuvTrueOrg, const InputColourSpaceConversion snrCSC, // used for SNR calculations. Picture in original colour space. std::list<PelUnitBuf*>& rcListPicYuvRecOut, int& iNumEncoded ); - /// encode several number of pictures until end-of-sequence - void encode( bool bEos, + bool encode( const InputColourSpaceConversion snrCSC, // used for SNR calculations. Picture in original colour space. + std::list<PelUnitBuf*>& rcListPicYuvRecOut, + int& iNumEncoded ); + + bool encodePrep( bool bEos, PelStorage* pcPicYuvOrg, PelStorage* pcPicYuvTrueOrg, const InputColourSpaceConversion snrCSC, // used for SNR calculations. Picture in original colour space. std::list<PelUnitBuf*>& rcListPicYuvRecOut, int& iNumEncoded, bool isTff ); + bool encode( const InputColourSpaceConversion snrCSC, // used for SNR calculations. Picture in original colour space. + std::list<PelUnitBuf*>& rcListPicYuvRecOut, + int& iNumEncoded, bool isTff ); + - void printSummary(bool isField) { m_cGOPEncoder.printOutSummary (m_uiNumAllPicCoded, isField, m_printMSEBasedSequencePSNR, m_printSequenceMSE, m_printHexPsnr, m_spsMap.getFirstPS()->getBitDepths()); } + void printSummary( bool isField ) { m_cGOPEncoder.printOutSummary( m_uiNumAllPicCoded, isField, m_printMSEBasedSequencePSNR, m_printSequenceMSE, m_printHexPsnr, m_rprEnabled, m_spsMap.getFirstPS()->getBitDepths() ); } + int getLayerId() const { return m_layerId; } }; //! \} diff --git a/source/Lib/EncoderLib/EncLibCommon.cpp b/source/Lib/EncoderLib/EncLibCommon.cpp new file mode 100644 index 0000000000000000000000000000000000000000..2cbdc396efd15ff906123f2d7a73a2d429a48f1e --- /dev/null +++ b/source/Lib/EncoderLib/EncLibCommon.cpp @@ -0,0 +1,51 @@ +/* The copyright in this software is being made available under the BSD + * License, included below. This software may be subject to other third party + * and contributor rights, including patent rights, and no such rights are + * granted under this license. + * + * Copyright (c) 2010-2020, ITU/ISO/IEC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * * Neither the name of the ITU/ISO/IEC nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file EncLibCommon.cpp + \brief Common encoder library class +*/ + +#include "CommonDef.h" +#include "EncLibCommon.h" + +EncLibCommon::EncLibCommon() + : m_apsIdStart( ALF_CTB_MAX_NUM_APS ) + , m_spsMap( MAX_NUM_SPS ) + , m_ppsMap( MAX_NUM_PPS ) + , m_apsMap( MAX_NUM_APS * MAX_NUM_APS_TYPE ) +{ +} + +EncLibCommon::~EncLibCommon() +{ +} diff --git a/source/Lib/EncoderLib/EncLibCommon.h b/source/Lib/EncoderLib/EncLibCommon.h new file mode 100644 index 0000000000000000000000000000000000000000..989fdf7e48a6c660bde88fe4418a916ae3f6fa84 --- /dev/null +++ b/source/Lib/EncoderLib/EncLibCommon.h @@ -0,0 +1,63 @@ +/* The copyright in this software is being made available under the BSD + * License, included below. This software may be subject to other third party + * and contributor rights, including patent rights, and no such rights are + * granted under this license. + * + * Copyright (c) 2010-2020, ITU/ISO/IEC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * * Neither the name of the ITU/ISO/IEC nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file EncLibCommon.h + \brief Common encoder library class (header) +*/ + +#pragma once +#include <list> +#include <fstream> +#include "CommonLib/Slice.h" + +class EncLibCommon +{ +private: + int m_apsIdStart; ///< ALF APS id, APS id space is shared across all layers + ParameterSetMap<SPS> m_spsMap; ///< SPS, it is shared across all layers + ParameterSetMap<PPS> m_ppsMap; ///< PPS, it is shared across all layers + ParameterSetMap<APS> m_apsMap; ///< APS, it is shared across all layers + PicList m_cListPic; ///< DPB, it is shared across all layers + +public: + EncLibCommon(); + virtual ~EncLibCommon(); + + int& getApsIdStart() { return m_apsIdStart; } + PicList& getPictureBuffer() { return m_cListPic; } + ParameterSetMap<SPS>& getSpsMap() { return m_spsMap; } + ParameterSetMap<PPS>& getPpsMap() { return m_ppsMap; } + ParameterSetMap<APS>& getApsMap() { return m_apsMap; } + +}; + diff --git a/source/Lib/EncoderLib/EncModeCtrl.cpp b/source/Lib/EncoderLib/EncModeCtrl.cpp index 0c8a5f7c6d4711c90895a3dd1cceb67cad5563a9..922835d58ac9ce7643923b6436c7b5e87d726262 100644 --- a/source/Lib/EncoderLib/EncModeCtrl.cpp +++ b/source/Lib/EncoderLib/EncModeCtrl.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -143,7 +143,7 @@ void EncModeCtrl::xGetMinMaxQP( int& minQP, int& maxQP, const CodingStructure& c const unsigned subdivIncr = (splitMode == CU_QUAD_SPLIT) ? 2 : (splitMode == CU_BT_SPLIT) ? 1 : 0; const bool qgEnable = partitioner.currQgEnable(); // QG possible at current level - const bool qgEnableChildren = qgEnable && ((partitioner.currSubdiv + subdivIncr) <= pps.getCuQpDeltaSubdiv()) && (subdivIncr > 0); // QG possible at next level + const bool qgEnableChildren = qgEnable && ((partitioner.currSubdiv + subdivIncr) <= cs.slice->getCuQpDeltaSubdiv()) && (subdivIncr > 0); // QG possible at next level const bool isLeafQG = (qgEnable && !qgEnableChildren); if( isLeafQG ) // QG at deepest level @@ -436,6 +436,14 @@ bool CacheBlkInfoCtrl::isSkip( const UnitArea& area ) return m_codedCUInfo[idx1][idx2][idx3][idx4]->isSkip; } +char CacheBlkInfoCtrl::getSelectColorSpaceOption(const UnitArea& area) +{ + unsigned idx1, idx2, idx3, idx4; + getAreaIdx(area.Y(), *m_slice_chblk->getPPS()->pcv, idx1, idx2, idx3, idx4); + + return m_codedCUInfo[idx1][idx2][idx3][idx4]->selectColorSpaceOption; +} + bool CacheBlkInfoCtrl::isMMVDSkip(const UnitArea& area) { unsigned idx1, idx2, idx3, idx4; @@ -586,19 +594,19 @@ bool CacheBlkInfoCtrl::getInter(const UnitArea& area) return m_codedCUInfo[idx1][idx2][idx3][idx4]->isInter; } -void CacheBlkInfoCtrl::setGbiIdx(const UnitArea& area, uint8_t gBiIdx) +void CacheBlkInfoCtrl::setBcwIdx(const UnitArea& area, uint8_t gBiIdx) { unsigned idx1, idx2, idx3, idx4; getAreaIdx(area.Y(), *m_slice_chblk->getPPS()->pcv, idx1, idx2, idx3, idx4); - m_codedCUInfo[idx1][idx2][idx3][idx4]->GBiIdx = gBiIdx; + m_codedCUInfo[idx1][idx2][idx3][idx4]->BcwIdx = gBiIdx; } -uint8_t CacheBlkInfoCtrl::getGbiIdx(const UnitArea& area) +uint8_t CacheBlkInfoCtrl::getBcwIdx(const UnitArea& area) { unsigned idx1, idx2, idx3, idx4; getAreaIdx(area.Y(), *m_slice_chblk->getPPS()->pcv, idx1, idx2, idx3, idx4); - return m_codedCUInfo[idx1][idx2][idx3][idx4]->GBiIdx; + return m_codedCUInfo[idx1][idx2][idx3][idx4]->BcwIdx; } #if REUSE_CU_RESULTS @@ -625,29 +633,6 @@ static bool isTheSameNbHood( const CodingUnit &cu, const CodingStructure& cs, co const UnitArea &cmnAnc = ps[i - 1].parts[ps[i - 1].idx]; const UnitArea cuArea = CS::getArea( cs, cu, partitioner.chType ); - bool sharedListReuseMode = true; - if( - pu.mergeFlag == true && - cu.affine == false && - cu.predMode == MODE_INTER - ) - { - sharedListReuseMode = false; - - if ((cu.lumaSize().width*cu.lumaSize().height) >= MRG_SHARELIST_SHARSIZE) - { - sharedListReuseMode = true; - } - - if (((cmnAnc.lumaSize().width)*(cmnAnc.lumaSize().height) <= MRG_SHARELIST_SHARSIZE)) - { - sharedListReuseMode = true; - } - } - else - { - sharedListReuseMode = true; - } //#endif for( int i = 0; i < cmnAnc.blocks.size(); i++ ) @@ -657,11 +642,6 @@ static bool isTheSameNbHood( const CodingUnit &cu, const CodingStructure& cs, co return false; } } - if(!sharedListReuseMode) - { - return false; - } - return true; } @@ -756,6 +736,12 @@ void BestEncInfoCache::destroy() delete[] m_pCoeff; delete[] m_pPcmBuf; + + if (m_runType != nullptr) + { + delete[] m_runType; + m_runType = nullptr; + } } void BestEncInfoCache::init( const Slice &slice ) @@ -796,14 +782,22 @@ void BestEncInfoCache::init( const Slice &slice ) #if REUSE_CU_RESULTS_WITH_MULTIPLE_TUS m_pCoeff = new TCoeff[numCoeff*MAX_NUM_TUS]; m_pPcmBuf = new Pel [numCoeff*MAX_NUM_TUS]; + if (slice.getSPS()->getPLTMode()) + { + m_runType = new bool[numCoeff*MAX_NUM_TUS]; + } #else m_pCoeff = new TCoeff[numCoeff]; m_pPcmBuf = new Pel [numCoeff]; + if (slice.getSPS()->getPLTMode()) + { + m_runType = new bool[numCoeff]; + } #endif TCoeff *coeffPtr = m_pCoeff; Pel *pcmPtr = m_pPcmBuf; - + bool *runTypePtr = m_runType; m_dummyCS.pcv = m_slice_bencinf->getPPS()->pcv; for( unsigned x = 0; x < numPos; x++ ) @@ -818,6 +812,7 @@ void BestEncInfoCache::init( const Slice &slice ) { TCoeff *coeff[MAX_NUM_TBLOCKS] = { 0, }; Pel *pcmbf[MAX_NUM_TBLOCKS] = { 0, }; + bool *runType[MAX_NUM_TBLOCKS - 1] = { 0, }; #if REUSE_CU_RESULTS_WITH_MULTIPLE_TUS for( int i = 0; i < MAX_NUM_TUS; i++ ) @@ -829,10 +824,14 @@ void BestEncInfoCache::init( const Slice &slice ) { coeff[i] = coeffPtr; coeffPtr += area.blocks[i].area(); pcmbf[i] = pcmPtr; pcmPtr += area.blocks[i].area(); + if (i < 2) + { + runType[i] = runTypePtr; runTypePtr += area.blocks[i].area(); + } } tu.cs = &m_dummyCS; - tu.init(coeff, pcmbf); + tu.init(coeff, pcmbf, runType); } #else const UnitArea &area = m_bestEncInfo[x][y][wIdx][hIdx]->tu; @@ -841,10 +840,12 @@ void BestEncInfoCache::init( const Slice &slice ) { coeff[i] = coeffPtr; coeffPtr += area.blocks[i].area(); pcmbf[i] = pcmPtr; pcmPtr += area.blocks[i].area(); + runType[i] = runTypePtr; runTypePtr += area.blocks[i].area(); + runLength[i] = runLengthPtr; runLengthPtr += area.blocks[i].area(); } m_bestEncInfo[x][y][wIdx][hIdx]->tu.cs = &m_dummyCS; - m_bestEncInfo[x][y][wIdx][hIdx]->tu.init( coeff, pcmbf ); + m_bestEncInfo[x][y][wIdx][hIdx]->tu.init(coeff, pcmbf, runLength, runType); #endif } } @@ -909,11 +910,19 @@ bool BestEncInfoCache::setFromCs( const CodingStructure& cs, const Partitioner& bool BestEncInfoCache::isValid( const CodingStructure& cs, const Partitioner& partitioner, int qp ) { + if( partitioner.treeType == TREE_C ) + { + return false; //if save & load is allowed for chroma CUs, we should check whether luma info (pred, recon, etc) is the same, which is quite complex + } unsigned idx1, idx2, idx3, idx4; getAreaIdx( cs.area.Y(), *m_slice_bencinf->getPPS()->pcv, idx1, idx2, idx3, idx4 ); BestEncodingInfo& encInfo = *m_bestEncInfo[idx1][idx2][idx3][idx4]; + if( encInfo.cu.treeType != partitioner.treeType || encInfo.cu.modeType != partitioner.modeType ) + { + return false; + } if( encInfo.cu.qp != qp ) return false; if( cs.picture->poc != encInfo.poc || CS::getArea( cs, cs.area, partitioner.chType ) != CS::getArea( cs, encInfo.cu, partitioner.chType ) || !isTheSameNbHood( encInfo.cu, cs, partitioner @@ -1111,12 +1120,11 @@ void EncModeCtrlMTnoRQT::initCTUEncoding( const Slice &slice ) } } - void EncModeCtrlMTnoRQT::initCULevel( Partitioner &partitioner, const CodingStructure& cs ) { // Min/max depth unsigned minDepth = 0; - unsigned maxDepth = g_aucLog2[cs.sps->getCTUSize()] - g_aucLog2[cs.sps->getMinQTSize( m_slice->getSliceType(), partitioner.chType )]; + unsigned maxDepth = floorLog2(cs.sps->getCTUSize()) - floorLog2(cs.sps->getMinQTSize( m_slice->getSliceType(), partitioner.chType )); if( m_pcEncCfg->getUseFastLCTU() ) { if( auto adPartitioner = dynamic_cast<AdaptiveDepthPartitioner*>( &partitioner ) ) @@ -1168,25 +1176,21 @@ void EncModeCtrlMTnoRQT::initCULevel( Partitioner &partitioner, const CodingStru // QP int baseQP = cs.baseQP; - if (!CS::isDualITree (cs) || isLuma (partitioner.chType)) + if (!partitioner.isSepTree(cs) || isLuma(partitioner.chType)) { if (m_pcEncCfg->getUseAdaptiveQP()) { baseQP = Clip3(-cs.sps->getQpBDOffset(CHANNEL_TYPE_LUMA), MAX_QP, baseQP + xComputeDQP(cs, partitioner)); } #if ENABLE_QPA_SUB_CTU - else if (m_pcEncCfg->getUsePerceptQPA() && !m_pcEncCfg->getUseRateCtrl() && cs.pps->getUseDQP() && cs.pps->getCuQpDeltaSubdiv() > 0) + else if (m_pcEncCfg->getUsePerceptQPA() && !m_pcEncCfg->getUseRateCtrl() && cs.pps->getUseDQP() && cs.slice->getCuQpDeltaSubdiv() > 0) { const PreCalcValues &pcv = *cs.pcv; if ((partitioner.currArea().lwidth() < pcv.maxCUWidth) && (partitioner.currArea().lheight() < pcv.maxCUHeight) && cs.picture) { const Position &pos = partitioner.currQgPos; -#if MAX_TB_SIZE_SIGNALLING - const unsigned mtsLog2 = (unsigned)g_aucLog2[std::min (cs.sps->getMaxTbSize(), pcv.maxCUWidth)]; -#else - const unsigned mtsLog2 = (unsigned)g_aucLog2[std::min<uint32_t> (MAX_TB_SIZEY, pcv.maxCUWidth)]; -#endif + const unsigned mtsLog2 = (unsigned)floorLog2(std::min (cs.sps->getMaxTbSize(), pcv.maxCUWidth)); const unsigned stride = pcv.maxCUWidth >> mtsLog2; baseQP = cs.picture->m_subCtuQP[((pos.x & pcv.maxCUWidthMask) >> mtsLog2) + stride * ((pos.y & pcv.maxCUHeightMask) >> mtsLog2)]; @@ -1209,25 +1213,9 @@ void EncModeCtrlMTnoRQT::initCULevel( Partitioner &partitioner, const CodingStru xGetMinMaxQP( minQP, maxQP, cs, partitioner, baseQP, *cs.sps, *cs.pps, CU_QUAD_SPLIT ); bool checkIbc = true; - if (cs.chType == CHANNEL_TYPE_CHROMA) + if (partitioner.chType == CHANNEL_TYPE_CHROMA) { - IbcLumaCoverage ibcLumaCoverage = cs.getIbcLumaCoverage(cs.area.Cb()); - switch (ibcLumaCoverage) - { - case IBC_LUMA_COVERAGE_FULL: - // check IBC - break; - case IBC_LUMA_COVERAGE_PARTIAL: - // do not check IBC - checkIbc = false; - break; - case IBC_LUMA_COVERAGE_NONE: - // do not check IBC - checkIbc = false; - break; - default: - THROW("Unknown IBC luma coverage type"); - } + checkIbc = false; } // Add coding modes here // NOTE: Working back to front, as a stack, which is more efficient with the container @@ -1240,7 +1228,7 @@ void EncModeCtrlMTnoRQT::initCULevel( Partitioner &partitioner, const CodingStru { for( int qp = maxQP; qp >= minQP; qp-- ) { - m_ComprCUCtxList.back().testModes.push_back( { ETM_SPLIT_QT, ETO_STANDARD, qp, false } ); + m_ComprCUCtxList.back().testModes.push_back( { ETM_SPLIT_QT, ETO_STANDARD, qp } ); } } @@ -1249,7 +1237,7 @@ void EncModeCtrlMTnoRQT::initCULevel( Partitioner &partitioner, const CodingStru // add split modes for( int qp = maxQP; qp >= minQP; qp-- ) { - m_ComprCUCtxList.back().testModes.push_back( { ETM_SPLIT_TT_V, ETO_STANDARD, qp, false } ); + m_ComprCUCtxList.back().testModes.push_back( { ETM_SPLIT_TT_V, ETO_STANDARD, qp } ); } } @@ -1258,7 +1246,7 @@ void EncModeCtrlMTnoRQT::initCULevel( Partitioner &partitioner, const CodingStru // add split modes for( int qp = maxQP; qp >= minQP; qp-- ) { - m_ComprCUCtxList.back().testModes.push_back( { ETM_SPLIT_TT_H, ETO_STANDARD, qp, false } ); + m_ComprCUCtxList.back().testModes.push_back( { ETM_SPLIT_TT_H, ETO_STANDARD, qp } ); } } @@ -1270,7 +1258,7 @@ void EncModeCtrlMTnoRQT::initCULevel( Partitioner &partitioner, const CodingStru // add split modes for( int qp = maxQP; qp >= minQP; qp-- ) { - m_ComprCUCtxList.back().testModes.push_back( { ETM_SPLIT_BT_V, ETO_STANDARD, qp, false } ); + m_ComprCUCtxList.back().testModes.push_back( { ETM_SPLIT_BT_V, ETO_STANDARD, qp } ); } m_ComprCUCtxList.back().set( DID_VERT_SPLIT, true ); } @@ -1284,7 +1272,7 @@ void EncModeCtrlMTnoRQT::initCULevel( Partitioner &partitioner, const CodingStru // add split modes for( int qp = maxQP; qp >= minQP; qp-- ) { - m_ComprCUCtxList.back().testModes.push_back( { ETM_SPLIT_BT_H, ETO_STANDARD, qp, false } ); + m_ComprCUCtxList.back().testModes.push_back( { ETM_SPLIT_BT_H, ETO_STANDARD, qp } ); } m_ComprCUCtxList.back().set( DID_HORZ_SPLIT, true ); } @@ -1297,7 +1285,7 @@ void EncModeCtrlMTnoRQT::initCULevel( Partitioner &partitioner, const CodingStru { for( int qp = maxQPq; qp >= minQPq; qp-- ) { - m_ComprCUCtxList.back().testModes.push_back( { ETM_SPLIT_QT, ETO_STANDARD, qp, false } ); + m_ComprCUCtxList.back().testModes.push_back( { ETM_SPLIT_QT, ETO_STANDARD, qp } ); } } @@ -1305,93 +1293,107 @@ void EncModeCtrlMTnoRQT::initCULevel( Partitioner &partitioner, const CodingStru xGetMinMaxQP( minQP, maxQP, cs, partitioner, baseQP, *cs.sps, *cs.pps, CU_DONT_SPLIT ); - bool useLossless = false; int lowestQP = minQP; - if( cs.pps->getTransquantBypassEnabledFlag() ) - { - useLossless = true; // mark that the first iteration is to cost TQB mode. - minQP = minQP - 1; // increase loop variable range by 1, to allow testing of TQB mode along with other QPs - - if( m_pcEncCfg->getCUTransquantBypassFlagForceValue() ) - { - maxQP = minQP; - } - } ////////////////////////////////////////////////////////////////////////// // Add unit coding modes: Intra, InterME, InterMerge ... + bool tryIntraRdo = true; + bool tryInterRdo = true; + bool tryIBCRdo = true; + if( partitioner.isConsIntra() ) + { + tryInterRdo = false; + } + else if( partitioner.isConsInter() ) + { + tryIntraRdo = tryIBCRdo = false; + } + checkIbc &= tryIBCRdo; for( int qpLoop = maxQP; qpLoop >= minQP; qpLoop-- ) { const int qp = std::max( qpLoop, lowestQP ); - const bool lossless = useLossless && qpLoop == minQP; #if REUSE_CU_RESULTS const bool isReusingCu = isValid( cs, partitioner, qp ); cuECtx.set( IS_REUSING_CU, isReusingCu ); if( isReusingCu ) { - m_ComprCUCtxList.back().testModes.push_back( {ETM_RECO_CACHED, ETO_STANDARD, qp, lossless} ); + m_ComprCUCtxList.back().testModes.push_back( {ETM_RECO_CACHED, ETO_STANDARD, qp} ); } #endif // add intra modes - m_ComprCUCtxList.back().testModes.push_back( { ETM_IPCM, ETO_STANDARD, qp, lossless } ); - m_ComprCUCtxList.back().testModes.push_back( { ETM_INTRA, ETO_STANDARD, qp, lossless } ); + if( tryIntraRdo ) + { + if (cs.slice->getSPS()->getPLTMode() && ( cs.slice->isIRAP() || (cs.area.lwidth() == 4 && cs.area.lheight() == 4) ) && getPltEnc() ) + { + m_ComprCUCtxList.back().testModes.push_back({ ETM_PALETTE, ETO_STANDARD, qp }); + } + m_ComprCUCtxList.back().testModes.push_back( { ETM_INTRA, ETO_STANDARD, qp } ); + if (cs.slice->getSPS()->getPLTMode() && !cs.slice->isIRAP() && !(cs.area.lwidth() == 4 && cs.area.lheight() == 4) && getPltEnc() ) + { + m_ComprCUCtxList.back().testModes.push_back({ ETM_PALETTE, ETO_STANDARD, qp }); + } + } // add ibc mode to intra path if (cs.sps->getIBCFlag() && checkIbc) { - m_ComprCUCtxList.back().testModes.push_back({ ETM_IBC, ETO_STANDARD, qp, lossless }); - if (cs.chType == CHANNEL_TYPE_LUMA) + m_ComprCUCtxList.back().testModes.push_back({ ETM_IBC, ETO_STANDARD, qp }); + if (partitioner.chType == CHANNEL_TYPE_LUMA) { - m_ComprCUCtxList.back().testModes.push_back({ ETM_IBC_MERGE, ETO_STANDARD, qp, lossless }); + m_ComprCUCtxList.back().testModes.push_back({ ETM_IBC_MERGE, ETO_STANDARD, qp }); } } } // add first pass modes - if( !m_slice->isIRAP() ) + if ( !m_slice->isIRAP() && !( cs.area.lwidth() == 4 && cs.area.lheight() == 4 ) && tryInterRdo ) { for( int qpLoop = maxQP; qpLoop >= minQP; qpLoop-- ) { const int qp = std::max( qpLoop, lowestQP ); - const bool lossless = useLossless && qpLoop == minQP; + if (m_pcEncCfg->getIMV()) + { + m_ComprCUCtxList.back().testModes.push_back({ ETM_INTER_ME, EncTestModeOpts( 4 << ETO_IMV_SHIFT ), qp }); + } if( m_pcEncCfg->getIMV() || m_pcEncCfg->getUseAffineAmvr() ) { int imv = m_pcEncCfg->getIMV4PelFast() ? 3 : 2; - m_ComprCUCtxList.back().testModes.push_back( { ETM_INTER_ME, EncTestModeOpts( imv << ETO_IMV_SHIFT ), qp, lossless } ); - m_ComprCUCtxList.back().testModes.push_back( { ETM_INTER_ME, EncTestModeOpts( 1 << ETO_IMV_SHIFT ), qp, lossless } ); + m_ComprCUCtxList.back().testModes.push_back( { ETM_INTER_ME, EncTestModeOpts( imv << ETO_IMV_SHIFT ), qp } ); + m_ComprCUCtxList.back().testModes.push_back( { ETM_INTER_ME, EncTestModeOpts( 1 << ETO_IMV_SHIFT ), qp } ); } // add inter modes if( m_pcEncCfg->getUseEarlySkipDetection() ) { if( cs.sps->getUseTriangle() && cs.slice->isInterB() ) { - m_ComprCUCtxList.back().testModes.push_back( { ETM_MERGE_TRIANGLE, ETO_STANDARD, qp, lossless } ); + m_ComprCUCtxList.back().testModes.push_back( { ETM_MERGE_TRIANGLE, ETO_STANDARD, qp } ); } - m_ComprCUCtxList.back().testModes.push_back( { ETM_MERGE_SKIP, ETO_STANDARD, qp, lossless } ); + m_ComprCUCtxList.back().testModes.push_back( { ETM_MERGE_SKIP, ETO_STANDARD, qp } ); if ( cs.sps->getUseAffine() || cs.sps->getSBTMVPEnabledFlag() ) { - m_ComprCUCtxList.back().testModes.push_back( { ETM_AFFINE, ETO_STANDARD, qp, lossless } ); + m_ComprCUCtxList.back().testModes.push_back( { ETM_AFFINE, ETO_STANDARD, qp } ); } - m_ComprCUCtxList.back().testModes.push_back( { ETM_INTER_ME, ETO_STANDARD, qp, lossless } ); + m_ComprCUCtxList.back().testModes.push_back( { ETM_INTER_ME, ETO_STANDARD, qp } ); } else { - m_ComprCUCtxList.back().testModes.push_back( { ETM_INTER_ME, ETO_STANDARD, qp, lossless } ); + m_ComprCUCtxList.back().testModes.push_back( { ETM_INTER_ME, ETO_STANDARD, qp } ); if( cs.sps->getUseTriangle() && cs.slice->isInterB() ) { - m_ComprCUCtxList.back().testModes.push_back( { ETM_MERGE_TRIANGLE, ETO_STANDARD, qp, lossless } ); + m_ComprCUCtxList.back().testModes.push_back( { ETM_MERGE_TRIANGLE, ETO_STANDARD, qp } ); } - m_ComprCUCtxList.back().testModes.push_back( { ETM_MERGE_SKIP, ETO_STANDARD, qp, lossless } ); + m_ComprCUCtxList.back().testModes.push_back( { ETM_MERGE_SKIP, ETO_STANDARD, qp } ); if ( cs.sps->getUseAffine() || cs.sps->getSBTMVPEnabledFlag() ) { - m_ComprCUCtxList.back().testModes.push_back( { ETM_AFFINE, ETO_STANDARD, qp, lossless } ); + m_ComprCUCtxList.back().testModes.push_back( { ETM_AFFINE, ETO_STANDARD, qp } ); } } if (m_pcEncCfg->getUseHashME()) { - if ((cs.area.lwidth() == cs.area.lheight() && cs.area.lwidth() <= 64 && cs.area.lwidth() >= 4) || (cs.area.lwidth() == 4 && cs.area.lheight() == 8) || (cs.area.lwidth() == 8 && cs.area.lheight() == 4)) + int minSize = min(cs.area.lwidth(), cs.area.lheight()); + if (minSize < 128 && minSize >= 4) { - m_ComprCUCtxList.back().testModes.push_back({ ETM_HASH_INTER, ETO_STANDARD, qp, lossless }); + m_ComprCUCtxList.back().testModes.push_back({ ETM_HASH_INTER, ETO_STANDARD, qp }); } } } @@ -1417,7 +1419,7 @@ bool EncModeCtrlMTnoRQT::tryMode( const EncTestMode& encTestmode, const CodingSt ComprCUCtx& cuECtx = m_ComprCUCtxList.back(); // Fast checks, partitioning depended - if (cuECtx.isHashPerfectMatch && encTestmode.type != ETM_MERGE_SKIP && encTestmode.type != ETM_AFFINE && encTestmode.type != ETM_MERGE_TRIANGLE) + if (cuECtx.isHashPerfectMatch && encTestmode.type != ETM_MERGE_SKIP && encTestmode.type != ETM_INTER_ME && encTestmode.type != ETM_AFFINE && encTestmode.type != ETM_MERGE_TRIANGLE) { return false; } @@ -1459,9 +1461,6 @@ bool EncModeCtrlMTnoRQT::tryMode( const EncTestMode& encTestmode, const CodingSt const SPS& sps = *slice.getSPS(); const uint32_t numComp = getNumberValidComponents( slice.getSPS()->getChromaFormatIdc() ); const uint32_t width = partitioner.currArea().lumaSize().width; -#if FIX_PCM - const uint32_t height = partitioner.currArea().lumaSize().height; -#endif const CodingStructure *bestCS = cuECtx.bestCS; const CodingUnit *bestCU = cuECtx.bestCU; const EncTestMode bestMode = bestCS ? getCSEncMode( *bestCS ) : EncTestMode(); @@ -1513,6 +1512,14 @@ bool EncModeCtrlMTnoRQT::tryMode( const EncTestMode& encTestmode, const CodingSt // INTRA MODES if (cs.sps->getIBCFlag() && !cuECtx.bestTU) return true; + if( partitioner.isConsIntra() && !cuECtx.bestTU ) + { + return true; + } + if ( partitioner.currArea().lumaSize().width == 4 && partitioner.currArea().lumaSize().height == 4 && !slice.isIntra() && !cuECtx.bestTU ) + { + return true; + } if( !( slice.isIRAP() || bestMode.type == ETM_INTRA || !cuECtx.bestTU || ((!m_pcEncCfg->getDisableIntraPUsInInterSlices()) && (!relatedCU.isInter || !relatedCU.isIBC) && ( ( cuECtx.bestTU->cbf[0] != 0 ) || @@ -1533,46 +1540,57 @@ bool EncModeCtrlMTnoRQT::tryMode( const EncTestMode& encTestmode, const CodingSt if( lastTestMode().type != ETM_INTRA && cuECtx.bestCS && cuECtx.bestCU && interHadActive( cuECtx ) ) { // Get SATD threshold from best Inter-CU - if( !cs.slice->isIRAP() && m_pcEncCfg->getUsePbIntraFast() ) + if (!cs.slice->isIRAP() && m_pcEncCfg->getUsePbIntraFast() && !cs.slice->getDisableSATDForRD()) { CodingUnit* bestCU = cuECtx.bestCU; if (bestCU && !CU::isIntra(*bestCU)) { DistParam distParam; - const bool useHad = !bestCU->transQuantBypass; + const bool useHad = true; m_pcRdCost->setDistParam( distParam, cs.getOrgBuf( COMPONENT_Y ), cuECtx.bestCS->getPredBuf( COMPONENT_Y ), cs.sps->getBitDepth( CHANNEL_TYPE_LUMA ), COMPONENT_Y, useHad ); cuECtx.interHad = distParam.distFunc( distParam ); } } } - + if (bestMode.type == ETM_PALETTE && !slice.isIRAP() && !( partitioner.currArea().lumaSize().width == 4 && partitioner.currArea().lumaSize().height == 4) ) // inter slice + { + return false; + } + if ( m_pcEncCfg->getUseFastISP() && relatedCU.relatedCuIsValid ) + { + cuECtx.ispPredModeVal = relatedCU.ispPredModeVal; + cuECtx.bestDCT2NonISPCost = relatedCU.bestDCT2NonISPCost; + cuECtx.relatedCuIsValid = relatedCU.relatedCuIsValid; + cuECtx.bestNonDCT2Cost = relatedCU.bestNonDCT2Cost; + cuECtx.bestISPIntraMode = relatedCU.bestISPIntraMode; + } return true; } - else if( encTestmode.type == ETM_IPCM ) + else if (encTestmode.type == ETM_PALETTE) { - if( getFastDeltaQp() ) + if (partitioner.currArea().lumaSize().width > 64 || partitioner.currArea().lumaSize().height > 64) { - const SPS &sps = *cs.sps; - const uint32_t fastDeltaQPCuMaxPCMSize = Clip3( ( uint32_t ) 1 << sps.getPCMLog2MinSize(), ( uint32_t ) 1 << sps.getPCMLog2MaxSize(), 32u ); - - if( cs.area.lumaSize().width > fastDeltaQPCuMaxPCMSize ) + return false; + } + const Area curr_cu = CS::getArea(cs, cs.area, partitioner.chType).blocks[getFirstComponentOfChannel(partitioner.chType)]; + try + { + double stored_cost = slice.m_mapPltCost.at(curr_cu.pos()).at(curr_cu.size()); + if (bestMode.type != ETM_INVALID && stored_cost > cuECtx.bestCS->cost) { - return false; // only check necessary PCM in fast deltaqp mode + return false; } } - - // PCM MODES -#if FIX_PCM - return sps.getPCMEnabledFlag() && width <= ( 1 << sps.getPCMLog2MaxSize() ) && width >= ( 1 << sps.getPCMLog2MinSize() ) - && height <= ( 1 << sps.getPCMLog2MaxSize() ) && height >= ( 1 << sps.getPCMLog2MinSize() ); -#else - return sps.getPCMEnabledFlag() && width <= ( 1 << sps.getPCMLog2MaxSize() ) && width >= ( 1 << sps.getPCMLog2MinSize() ); -#endif + catch (const std::out_of_range &) + { + // do nothing if no stored cost value was found. + } + return true; } else if (encTestmode.type == ETM_IBC || encTestmode.type == ETM_IBC_MERGE) { // IBC MODES - return sps.getIBCFlag() && width <= IBC_MAX_CAND_SIZE && partitioner.currArea().lumaSize().height <= IBC_MAX_CAND_SIZE; + return sps.getIBCFlag() && (partitioner.currArea().lumaSize().width < 128 && partitioner.currArea().lumaSize().height < 128); } else if( isModeInter( encTestmode ) ) { @@ -1817,7 +1835,15 @@ bool EncModeCtrlMTnoRQT::tryMode( const EncTestMode& encTestmode, const CodingSt else { CHECK( encTestmode.type != ETM_POST_DONT_SPLIT, "Unknown mode" ); - + if ((cuECtx.get<double>(BEST_NO_IMV_COST) == (MAX_DOUBLE * .5) || cuECtx.get<bool>(IS_REUSING_CU)) && !slice.isIntra()) + { + unsigned idx1, idx2, idx3, idx4; + getAreaIdx(partitioner.currArea().Y(), *slice.getPPS()->pcv, idx1, idx2, idx3, idx4); + if (g_isReusedUniMVsFilled[idx1][idx2][idx3][idx4]) + { + m_pcInterSearch->insertUniMvCands(partitioner.currArea().Y(), g_reusedUniMVs[idx1][idx2][idx3][idx4]); + } + } if( !bestCS || ( bestCS && isModeSplit( bestMode ) ) ) { return false; @@ -1828,30 +1854,104 @@ bool EncModeCtrlMTnoRQT::tryMode( const EncTestMode& encTestmode, const CodingSt setFromCs( *bestCS, partitioner ); #endif + if( partitioner.modeType == MODE_TYPE_INTRA && partitioner.chType == CHANNEL_TYPE_LUMA ) + { + return false; //not set best coding mode for intra coding pass + } // assume the non-split modes are done and set the marks for the best found mode if( bestCS && bestCU ) { if( CU::isInter( *bestCU ) ) { relatedCU.isInter = true; -#if HM_CODED_CU_INFO relatedCU.isSkip |= bestCU->skip; relatedCU.isMMVDSkip |= bestCU->mmvdSkip; -#else - relatedCU.isSkip = bestCU->skip; -#endif - relatedCU.GBiIdx = bestCU->GBiIdx; + relatedCU.BcwIdx = bestCU->BcwIdx; + if (bestCU->slice->getSPS()->getUseColorTrans()) + { + if (m_pcEncCfg->getRGBFormatFlag()) + { + if (bestCU->colorTransform && bestCU->rootCbf) + { + relatedCU.selectColorSpaceOption = 1; + } + else + { + relatedCU.selectColorSpaceOption = 2; + } + } + else + { + if (!bestCU->colorTransform || !bestCU->rootCbf) + { + relatedCU.selectColorSpaceOption = 1; + } + else + { + relatedCU.selectColorSpaceOption = 2; + } + } + } } else if (CU::isIBC(*bestCU)) { relatedCU.isIBC = true; -#if HM_CODED_CU_INFO relatedCU.isSkip |= bestCU->skip; -#endif + if (bestCU->slice->getSPS()->getUseColorTrans()) + { + if (m_pcEncCfg->getRGBFormatFlag()) + { + if (bestCU->colorTransform && bestCU->rootCbf) + { + relatedCU.selectColorSpaceOption = 1; + } + else + { + relatedCU.selectColorSpaceOption = 2; + } + } + else + { + if (!bestCU->colorTransform || !bestCU->rootCbf) + { + relatedCU.selectColorSpaceOption = 1; + } + else + { + relatedCU.selectColorSpaceOption = 2; + } + } + } } else if( CU::isIntra( *bestCU ) ) { relatedCU.isIntra = true; + if ( m_pcEncCfg->getUseFastISP() && cuECtx.ispWasTested && ( !relatedCU.relatedCuIsValid || bestCS->cost < relatedCU.bestCost ) ) + { + // Compact data + int bit0 = true; + int bit1 = cuECtx.ispMode == NOT_INTRA_SUBPARTITIONS ? 1 : 0; + int bit2 = cuECtx.ispMode == VER_INTRA_SUBPARTITIONS; + int bit3 = cuECtx.ispLfnstIdx > 0; + int bit4 = cuECtx.ispLfnstIdx == 2; + int bit5 = cuECtx.mipFlag; + int bit6 = cuECtx.bestCostIsp < cuECtx.bestNonDCT2Cost * 0.95; + int val = + (bit0) | + (bit1 << 1) | + (bit2 << 2) | + (bit3 << 3) | + (bit4 << 4) | + (bit5 << 5) | + (bit6 << 6) | + ( cuECtx.bestPredModeDCT2 << 9 ); + relatedCU.ispPredModeVal = val; + relatedCU.bestDCT2NonISPCost = cuECtx.bestDCT2NonISPCost; + relatedCU.bestCost = bestCS->cost; + relatedCU.bestNonDCT2Cost = cuECtx.bestNonDCT2Cost; + relatedCU.bestISPIntraMode = cuECtx.bestISPIntraMode; + relatedCU.relatedCuIsValid = true; + } } #if ENABLE_SPLIT_PARALLELISM #if REUSE_CU_RESULTS @@ -1867,6 +1967,25 @@ bool EncModeCtrlMTnoRQT::tryMode( const EncTestMode& encTestmode, const CodingSt } } +bool EncModeCtrlMTnoRQT::checkSkipOtherLfnst( const EncTestMode& encTestmode, CodingStructure*& tempCS, Partitioner& partitioner ) +{ + xExtractFeatures( encTestmode, *tempCS ); + + ComprCUCtx& cuECtx = m_ComprCUCtxList.back(); + bool skipOtherLfnst = false; + + if( encTestmode.type == ETM_INTRA ) + { + if( !cuECtx.bestCS || ( tempCS->cost >= cuECtx.bestCS->cost && cuECtx.bestCS->cus.size() == 1 && CU::isIntra( *cuECtx.bestCS->cus[ 0 ] ) ) + || ( tempCS->cost < cuECtx.bestCS->cost && CU::isIntra( *tempCS->cus[ 0 ] ) ) ) + { + skipOtherLfnst = !tempCS->cus[ 0 ]->rootCbf; + } + } + + return skipOtherLfnst; +} + bool EncModeCtrlMTnoRQT::useModeResult( const EncTestMode& encTestmode, CodingStructure*& tempCS, Partitioner& partitioner ) { xExtractFeatures( encTestmode, *tempCS ); @@ -1890,6 +2009,19 @@ bool EncModeCtrlMTnoRQT::useModeResult( const EncTestMode& encTestmode, CodingSt { cuECtx.set( BEST_TRIV_SPLIT_COST, tempCS->cost ); } + else if( encTestmode.type == ETM_INTRA ) + { + const CodingUnit cu = *tempCS->getCU( partitioner.chType ); + + if( !cu.mtsFlag ) + { + cuECtx.bestMtsSize2Nx2N1stPass = tempCS->cost; + } + if( !cu.ispMode ) + { + cuECtx.bestCostMtsFirstPassNoIsp = tempCS->cost; + } + } if( m_pcEncCfg->getIMV4PelFast() && m_pcEncCfg->getIMV() && encTestmode.type == ETM_INTER_ME ) { @@ -2027,7 +2159,7 @@ bool EncModeCtrlMTnoRQT::isParallelSplit( const CodingStructure &cs, Partitioner const int parlAt = m_pcEncCfg->getNumSplitThreads() <= 3 ? 1024 : 256; if( cs.slice->isIntra() && numJobs > 2 && ( numPxl == parlAt || !partitioner.canSplit( CU_QUAD_SPLIT, cs ) ) ) return true; if( !cs.slice->isIntra() && numJobs > 1 && ( numPxl == parlAt || !partitioner.canSplit( CU_QUAD_SPLIT, cs ) ) ) return true; - return false; + return false; } bool EncModeCtrlMTnoRQT::parallelJobSelector( const EncTestMode& encTestmode, const CodingStructure &cs, Partitioner& partitioner ) const diff --git a/source/Lib/EncoderLib/EncModeCtrl.h b/source/Lib/EncoderLib/EncModeCtrl.h index f8ae006d18aa7f44f82aa8df76e972259d5a5f44..3ab1b298bafc255d2754a5eb69f176fec5b7918a 100644 --- a/source/Lib/EncoderLib/EncModeCtrl.h +++ b/source/Lib/EncoderLib/EncModeCtrl.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -43,6 +43,7 @@ #include "CommonLib/CommonDef.h" #include "CommonLib/CodingStructure.h" +#include "InterSearch.h" #include <typeinfo> #include <vector> @@ -60,7 +61,7 @@ enum EncTestModeType ETM_AFFINE, ETM_MERGE_TRIANGLE, ETM_INTRA, - ETM_IPCM, + ETM_PALETTE, ETM_SPLIT_QT, ETM_SPLIT_BT_H, ETM_SPLIT_BT_V, @@ -97,18 +98,18 @@ static void getAreaIdx(const Area& area, const PreCalcValues &pcv, unsigned &idx struct EncTestMode { EncTestMode() - : type( ETM_INVALID ), opts( ETO_INVALID ), qp( -1 ), lossless( false ) {} + : type( ETM_INVALID ), opts( ETO_INVALID ), qp( -1 ) {} EncTestMode( EncTestModeType _type ) - : type( _type ), opts( ETO_STANDARD ), qp( -1 ), lossless( false ) {} - EncTestMode( EncTestModeType _type, int _qp, bool _lossless ) - : type( _type ), opts( ETO_STANDARD ), qp( _qp ), lossless( _lossless ) {} - EncTestMode( EncTestModeType _type, EncTestModeOpts _opts, int _qp, bool _lossless ) - : type( _type ), opts( _opts ), qp( _qp ), lossless( _lossless ) {} + : type( _type ), opts( ETO_STANDARD ), qp( -1 ) {} + EncTestMode( EncTestModeType _type, int _qp ) + : type( _type ), opts( ETO_STANDARD ), qp( _qp ) {} + EncTestMode( EncTestModeType _type, EncTestModeOpts _opts, int _qp ) + : type( _type ), opts( _opts ), qp( _qp ) {} EncTestModeType type; EncTestModeOpts opts; int qp; - bool lossless; + double maxCostAllowed; }; @@ -188,12 +189,35 @@ struct ComprCUCtx , extraFeatures ( ) , extraFeaturesd( ) , bestInterCost ( MAX_DOUBLE ) + , bestMtsSize2Nx2N1stPass + ( MAX_DOUBLE ) + , skipSecondMTSPass + ( false ) , interHad (std::numeric_limits<Distortion>::max()) #if ENABLE_SPLIT_PARALLELISM , isLevelSplitParallel ( false ) #endif , bestCostWithoutSplitFlags( MAX_DOUBLE ) + , bestCostMtsFirstPassNoIsp( MAX_DOUBLE ) + , bestCostIsp ( MAX_DOUBLE ) + , ispWasTested ( false ) + , bestPredModeDCT2 + ( UINT8_MAX ) + , relatedCuIsValid + ( false ) + , ispPredModeVal( 0 ) + , bestDCT2NonISPCost + ( MAX_DOUBLE ) + , bestNonDCT2Cost + ( MAX_DOUBLE ) + , bestISPIntraMode + ( UINT8_MAX ) + , mipFlag ( false ) + , ispMode ( NOT_INTRA_SUBPARTITIONS ) + , ispLfnstIdx ( 0 ) + , stopNonDCT2Transforms + ( false ) { getAreaIdx( cs.area.Y(), *cs.pcv, cuX, cuY, cuW, cuH ); partIdx = ( ( cuX << 8 ) | cuY ); @@ -218,11 +242,26 @@ struct ComprCUCtx static_vector<int64_t, 30> extraFeatures; static_vector<double, 30> extraFeaturesd; double bestInterCost; + double bestMtsSize2Nx2N1stPass; + bool skipSecondMTSPass; Distortion interHad; #if ENABLE_SPLIT_PARALLELISM bool isLevelSplitParallel; #endif double bestCostWithoutSplitFlags; + double bestCostMtsFirstPassNoIsp; + double bestCostIsp; + bool ispWasTested; + uint16_t bestPredModeDCT2; + bool relatedCuIsValid; + uint16_t ispPredModeVal; + double bestDCT2NonISPCost; + double bestNonDCT2Cost; + uint8_t bestISPIntraMode; + bool mipFlag; + uint8_t ispMode; + uint8_t ispLfnstIdx; + bool stopNonDCT2Transforms; template<typename T> T get( int ft ) const { return typeid(T) == typeid(double) ? (T&)extraFeaturesd[ft] : T(extraFeatures[ft]); } template<typename T> void set( int ft, T val ) { extraFeatures [ft] = int64_t( val ); } @@ -250,6 +289,9 @@ protected: #if ENABLE_SPLIT_PARALLELISM int m_runNextInParallel; #endif + InterSearch* m_pcInterSearch; + + bool m_doPlt; public: @@ -268,6 +310,7 @@ protected: public: virtual bool useModeResult ( const EncTestMode& encTestmode, CodingStructure*& tempCS, Partitioner& partitioner ) = 0; + virtual bool checkSkipOtherLfnst ( const EncTestMode& encTestmode, CodingStructure*& tempCS, Partitioner& partitioner ) = 0; #if ENABLE_SPLIT_PARALLELISM virtual void copyState ( const EncModeCtrl& other, const UnitArea& area ); virtual int getNumParallelJobs ( const CodingStructure &cs, Partitioner& partitioner ) const { return 1; } @@ -299,8 +342,37 @@ public: double getBestInterCost () const { return m_ComprCUCtxList.back().bestInterCost; } Distortion getInterHad () const { return m_ComprCUCtxList.back().interHad; } void enforceInterHad ( Distortion had ) { m_ComprCUCtxList.back().interHad = had; } + double getMtsSize2Nx2NFirstPassCost () const { return m_ComprCUCtxList.back().bestMtsSize2Nx2N1stPass; } + bool getSkipSecondMTSPass () const { return m_ComprCUCtxList.back().skipSecondMTSPass; } + void setSkipSecondMTSPass ( bool b ) { m_ComprCUCtxList.back().skipSecondMTSPass = b; } double getBestCostWithoutSplitFlags () const { return m_ComprCUCtxList.back().bestCostWithoutSplitFlags; } void setBestCostWithoutSplitFlags ( double cost ) { m_ComprCUCtxList.back().bestCostWithoutSplitFlags = cost; } + double getMtsFirstPassNoIspCost () const { return m_ComprCUCtxList.back().bestCostMtsFirstPassNoIsp; } + void setMtsFirstPassNoIspCost ( double cost ) { m_ComprCUCtxList.back().bestCostMtsFirstPassNoIsp = cost; } + double getIspCost () const { return m_ComprCUCtxList.back().bestCostIsp; } + void setIspCost ( double val ) { m_ComprCUCtxList.back().bestCostIsp = val; } + bool getISPWasTested () const { return m_ComprCUCtxList.back().ispWasTested; } + void setISPWasTested ( bool val ) { m_ComprCUCtxList.back().ispWasTested = val; } + void setBestPredModeDCT2 ( uint16_t val ) { m_ComprCUCtxList.back().bestPredModeDCT2 = val; } + uint16_t getBestPredModeDCT2 () const { return m_ComprCUCtxList.back().bestPredModeDCT2; } + bool getRelatedCuIsValid () const { return m_ComprCUCtxList.back().relatedCuIsValid; } + void setRelatedCuIsValid ( bool val ) { m_ComprCUCtxList.back().relatedCuIsValid = val; } + uint16_t getIspPredModeValRelCU () const { return m_ComprCUCtxList.back().ispPredModeVal; } + void setIspPredModeValRelCU ( uint16_t val ) { m_ComprCUCtxList.back().ispPredModeVal = val; } + double getBestDCT2NonISPCostRelCU () const { return m_ComprCUCtxList.back().bestDCT2NonISPCost; } + void setBestDCT2NonISPCostRelCU ( double val ) { m_ComprCUCtxList.back().bestDCT2NonISPCost = val; } + double getBestNonDCT2Cost () const { return m_ComprCUCtxList.back().bestNonDCT2Cost; } + void setBestNonDCT2Cost ( double val ) { m_ComprCUCtxList.back().bestNonDCT2Cost = val; } + uint8_t getBestISPIntraModeRelCU () const { return m_ComprCUCtxList.back().bestISPIntraMode; } + void setBestISPIntraModeRelCU ( uint8_t val ) { m_ComprCUCtxList.back().bestISPIntraMode = val; } + void setMIPFlagISPPass ( bool val ) { m_ComprCUCtxList.back().mipFlag = val; } + void setISPMode ( uint8_t val ) { m_ComprCUCtxList.back().ispMode = val; } + void setISPLfnstIdx ( uint8_t val ) { m_ComprCUCtxList.back().ispLfnstIdx = val; } + bool getStopNonDCT2Transforms () const { return m_ComprCUCtxList.back().stopNonDCT2Transforms; } + void setStopNonDCT2Transforms ( bool val ) { m_ComprCUCtxList.back().stopNonDCT2Transforms = val; } + void setInterSearch (InterSearch* pcInterSearch) { m_pcInterSearch = pcInterSearch; } + void setPltEnc ( bool b ) { m_doPlt = b; } + bool getPltEnc() const { return m_doPlt; } protected: void xExtractFeatures ( const EncTestMode encTestmode, CodingStructure& cs ); @@ -359,7 +431,14 @@ struct CodedCUInfo bool validMv[NUM_REF_PIC_LIST_01][MAX_STORED_CU_INFO_REFS]; Mv saveMv [NUM_REF_PIC_LIST_01][MAX_STORED_CU_INFO_REFS]; - uint8_t GBiIdx; + uint8_t BcwIdx; + char selectColorSpaceOption; // 0 - test both two color spaces; 1 - only test the first color spaces; 2 - only test the second color spaces + uint16_t ispPredModeVal; + double bestDCT2NonISPCost; + double bestCost; + double bestNonDCT2Cost; + bool relatedCuIsValid; + uint8_t bestISPIntraMode; #if ENABLE_SPLIT_PARALLELISM @@ -409,8 +488,10 @@ public: void setMv ( const UnitArea& area, const RefPicList refPicList, const int iRefIdx, const Mv& rMv ); bool getInter( const UnitArea& area ); - void setGbiIdx( const UnitArea& area, uint8_t gBiIdx ); - uint8_t getGbiIdx( const UnitArea& area ); + void setBcwIdx( const UnitArea& area, uint8_t gBiIdx ); + uint8_t getBcwIdx( const UnitArea& area ); + + char getSelectColorSpaceOption(const UnitArea& area); }; #if REUSE_CU_RESULTS @@ -442,6 +523,7 @@ private: BestEncodingInfo ***m_bestEncInfo[MAX_CU_SIZE >> MIN_CU_LOG2][MAX_CU_SIZE >> MIN_CU_LOG2]; TCoeff *m_pCoeff; Pel *m_pPcmBuf; + bool *m_runType; CodingStructure m_dummyCS; XUCache m_dummyCache; #if ENABLE_SPLIT_PARALLELISM @@ -527,6 +609,7 @@ public: virtual bool isParallelSplit ( const CodingStructure &cs, Partitioner& partitioner ) const; virtual bool parallelJobSelector( const EncTestMode& encTestmode, const CodingStructure &cs, Partitioner& partitioner ) const; #endif + virtual bool checkSkipOtherLfnst( const EncTestMode& encTestmode, CodingStructure*& tempCS, Partitioner& partitioner ); }; diff --git a/source/Lib/EncoderLib/EncReshape.cpp b/source/Lib/EncoderLib/EncReshape.cpp index b09eb70d244916af5572a5b95c31fb7b701393bb..27d5ae535f845188a40c21b020262bf7cf6d89ea 100644 --- a/source/Lib/EncoderLib/EncReshape.cpp +++ b/source/Lib/EncoderLib/EncReshape.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * -* Copyright (c) 2010-2019, ITU/ISO/IEC +* Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -78,6 +78,12 @@ void EncReshape::createEnc(int picWidth, int picHeight, uint32_t maxCUWidth, ui m_binImportance.resize(PIC_ANALYZE_CW_BINS); if (m_reshapePivot.empty()) m_reshapePivot.resize(PIC_CODE_CW_BINS + 1, 0); + if (m_inputPivot.empty()) + m_inputPivot.resize(PIC_CODE_CW_BINS + 1, 0); + if (m_fwdScaleCoef.empty()) + m_fwdScaleCoef.resize(PIC_CODE_CW_BINS, 1 << FP_PREC); + if (m_invScaleCoef.empty()) + m_invScaleCoef.resize(PIC_CODE_CW_BINS, 1 << FP_PREC); if (m_chromaAdjHelpLUT.empty()) m_chromaAdjHelpLUT.resize(PIC_CODE_CW_BINS, 1<<CSCALE_FP_PREC); @@ -87,6 +93,7 @@ void EncReshape::createEnc(int picWidth, int picHeight, uint32_t maxCUWidth, ui m_sliceReshapeInfo.reshaperModelMinBinIdx = 0; m_sliceReshapeInfo.reshaperModelMaxBinIdx = PIC_CODE_CW_BINS - 1; memset(m_sliceReshapeInfo.reshaperModelBinCWDelta, 0, (PIC_CODE_CW_BINS) * sizeof(int)); + m_sliceReshapeInfo.chrResScalingOffset = 0; m_picWidth = picWidth; m_picHeight = picHeight; @@ -95,6 +102,9 @@ void EncReshape::createEnc(int picWidth, int picHeight, uint32_t maxCUWidth, ui m_widthInCtus = (m_picWidth + m_maxCUWidth - 1) / m_maxCUWidth; m_heightInCtus = (m_picHeight + m_maxCUHeight - 1) / m_maxCUHeight; m_numCtuInFrame = m_widthInCtus * m_heightInCtus; + m_binNum = PIC_CODE_CW_BINS; + initSeqStats(m_srcSeqStats); + initSeqStats(m_rspSeqStats); } void EncReshape::destroy() @@ -111,18 +121,9 @@ void EncReshape::preAnalyzerHDR(Picture *pcPic, const SliceType sliceType, const if (m_lumaBD >= 10) { m_sliceReshapeInfo.sliceReshaperEnableFlag = true; - if (reshapeCW.rspIntraPeriod == 1) - { - if (pcPic->getPOC() == 0) { m_sliceReshapeInfo.sliceReshaperModelPresentFlag = true; } - else { m_sliceReshapeInfo.sliceReshaperModelPresentFlag = false; } - } - else - { if (sliceType == I_SLICE ) { m_sliceReshapeInfo.sliceReshaperModelPresentFlag = true; } else { m_sliceReshapeInfo.sliceReshaperModelPresentFlag = false; } - } - if (sliceType == I_SLICE && isDualT) { m_sliceReshapeInfo.enableChromaAdj = 0; } - else { m_sliceReshapeInfo.enableChromaAdj = 1; } + { m_sliceReshapeInfo.enableChromaAdj = 1; } } else { @@ -137,343 +138,414 @@ void EncReshape::preAnalyzerHDR(Picture *pcPic, const SliceType sliceType, const \param sliceType describe the slice type \param reshapeCW describe some input info */ -void EncReshape::preAnalyzerSDR(Picture *pcPic, const SliceType sliceType, const ReshapeCW& reshapeCW, bool isDualT) +void EncReshape::initSeqStats(SeqInfo &stats) { - m_sliceReshapeInfo.sliceReshaperModelPresentFlag = true; - m_sliceReshapeInfo.sliceReshaperEnableFlag = true; - - int modIP = pcPic->getPOC() - pcPic->getPOC() / reshapeCW.rspFpsToIp * reshapeCW.rspFpsToIp; - if (sliceType == I_SLICE || (reshapeCW.rspIntraPeriod == -1 && modIP == 0)) + for (int i = 0; i < m_binNum; i++) { - if (m_sliceReshapeInfo.sliceReshaperModelPresentFlag == true) + stats.binVar[i] = 0.0; + stats.binHist[i] = 0.0; + stats.normVar[i] = 0.0; + } + stats.nonZeroCnt = 0; + stats.weightVar = 0.0; + stats.weightNorm = 0.0; + stats.minBinVar = 0.0; + stats.maxBinVar = 0.0; + stats.meanBinVar = 0.0; + stats.ratioStdU = 0.0; + stats.ratioStdV = 0.0; +} +void EncReshape::calcSeqStats(Picture *pcPic, SeqInfo &stats) +{ + PelBuf picY = pcPic->getOrigBuf(COMPONENT_Y); + const int width = picY.width; + const int height = picY.height; + const int stride = picY.stride; + uint32_t winLens = (m_binNum == PIC_CODE_CW_BINS) ? (std::min(height, width) / 240) : 2; + winLens = winLens > 0 ? winLens : 1; + + int64_t tempSq = 0; + int64_t topSum = 0, topSumSq = 0; + int64_t leftSum = 0, leftSumSq = 0; + int64_t *leftColSum = new int64_t[width]; + int64_t *leftColSumSq = new int64_t[width]; + int64_t *topRowSum = new int64_t[height]; + int64_t *topRowSumSq = new int64_t[height]; + int64_t *topColSum = new int64_t[width]; + int64_t *topColSumSq = new int64_t[width]; + uint32_t *binCnt = new uint32_t[m_binNum]; + memset(leftColSum, 0, width * sizeof(int64_t)); + memset(leftColSumSq, 0, width * sizeof(int64_t)); + memset(topRowSum, 0, height * sizeof(int64_t)); + memset(topRowSumSq, 0, height * sizeof(int64_t)); + memset(topColSum, 0, width * sizeof(int64_t)); + memset(topColSumSq, 0, width * sizeof(int64_t)); + memset(binCnt, 0, m_binNum * sizeof(uint32_t)); + + initSeqStats(stats); + for (uint32_t y = 0; y < height; y++) + { + for (uint32_t x = 0; x < width; x++) { - int stdMin = 16 <<(m_lumaBD-8); - int stdMax = 235 << (m_lumaBD - 8); - int binLen = m_reshapeLUTSize / PIC_ANALYZE_CW_BINS; - - m_reshapeCW = reshapeCW; - m_initCWAnalyze = binLen; - - for (int b = 0; b < PIC_ANALYZE_CW_BINS; b++) + const Pel pxlY = picY.buf[x]; + int64_t sum = 0, sumSq = 0; + uint32_t numPixInPart = 0; + uint32_t y1 = std::max((int)(y - winLens), 0); + uint32_t y2 = std::min((int)(y + winLens), (height - 1)); + uint32_t x1 = std::max((int)(x - winLens), 0); + uint32_t x2 = std::min((int)(x + winLens), (width - 1)); + uint32_t bx = 0, by = 0; + const Pel *pWinY = &picY.buf[0]; + numPixInPart = (x2 - x1 + 1) * (y2 - y1 + 1); + + if (x == 0 && y == 0) { - m_binImportance[b] = 0; - m_binCW[b] = binLen; + for (by = y1; by <= y2; by++) + { + for (bx = x1; bx <= x2; bx++) + { + tempSq = pWinY[bx] * pWinY[bx]; + leftSum += pWinY[bx]; + leftSumSq += tempSq; + leftColSum[bx] += pWinY[bx]; + leftColSumSq[bx] += tempSq; + topColSum[bx] += pWinY[bx]; + topColSumSq[bx] += tempSq; + topRowSum[by] += pWinY[bx]; + topRowSumSq[by] += tempSq; + } + pWinY += stride; + } + topSum = leftSum; + topSumSq = leftSumSq; + sum = leftSum; + sumSq = leftSumSq; } - - int startBinIdx = stdMin / binLen; - int endBinIdx = stdMax / binLen; - m_sliceReshapeInfo.reshaperModelMinBinIdx = startBinIdx; - m_sliceReshapeInfo.reshaperModelMaxBinIdx = endBinIdx; - - PelBuf picY = pcPic->getOrigBuf(COMPONENT_Y); - const int width = picY.width; - const int height = picY.height; - const int stride = picY.stride; - - double blockBinVarSum[PIC_ANALYZE_CW_BINS] = { 0.0 }; - uint32_t bockBinCnt[PIC_ANALYZE_CW_BINS] = { 0 }; - - const int PIC_ANALYZE_WIN_SIZE = 5; - const uint32_t winSize = PIC_ANALYZE_WIN_SIZE; - const uint32_t winLens = (winSize - 1) >> 1; - - int64_t tempSq = 0; - int64_t leftSum = 0, leftSumSq = 0; - int64_t *leftColSum = new int64_t[width]; - int64_t *leftColSumSq = new int64_t[width]; - memset(leftColSum, 0, width * sizeof(int64_t)); - memset(leftColSumSq, 0, width * sizeof(int64_t)); - int64_t topSum = 0, topSumSq = 0; - int64_t *topRowSum = new int64_t[height]; - int64_t *topRowSumSq = new int64_t[height]; - memset(topRowSum, 0, height * sizeof(int64_t)); - memset(topRowSumSq, 0, height * sizeof(int64_t)); - int64_t *topColSum = new int64_t[width]; - int64_t *topColSumSq = new int64_t[width]; - memset(topColSum, 0, width * sizeof(int64_t)); - memset(topColSumSq, 0, width * sizeof(int64_t)); - - for (uint32_t y = 0; y < height; y++) + else if (x == 0 && y > 0) { - for (uint32_t x = 0; x < width; x++) + if (y < height - winLens) { - const Pel pxlY = picY.buf[x]; - int64_t sum = 0; - int64_t sumSq = 0; - uint32_t numPixInPart = 0; - - uint32_t y1 = std::max((int)(y - winLens), 0); - uint32_t y2 = std::min((int)(y + winLens), (height - 1)); - uint32_t x1 = std::max((int)(x - winLens), 0); - uint32_t x2 = std::min((int)(x + winLens), (width - 1)); - - - uint32_t bx = 0, by = 0; - const Pel *pWinY = &picY.buf[0]; - numPixInPart = (x2 - x1 + 1) * (y2 - y1 + 1); - - if (x == 0 && y == 0) // for the 1st Pixel, calc all points + pWinY += winLens*stride; + topRowSum[y + winLens] = 0; + topRowSumSq[y + winLens] = 0; + for (bx = x1; bx <= x2; bx++) { - for (by = y1; by <= y2; by++) - { - for (bx = x1; bx <= x2; bx++) - { - tempSq = pWinY[bx] * pWinY[bx]; - leftSum += pWinY[bx]; - leftSumSq += tempSq; - leftColSum[bx] += pWinY[bx]; - leftColSumSq[bx] += tempSq; - topColSum[bx] += pWinY[bx]; - topColSumSq[bx] += tempSq; - topRowSum[by] += pWinY[bx]; - topRowSumSq[by] += tempSq; - } - pWinY += stride; - } - topSum = leftSum; - topSumSq = leftSumSq; - sum = leftSum; - sumSq = leftSumSq; + topRowSum[y + winLens] += pWinY[bx]; + topRowSumSq[y + winLens] += pWinY[bx] * pWinY[bx]; } - else if (x == 0 && y > 0) // for the 1st column, calc the bottom stripe + topSum += topRowSum[y + winLens]; + topSumSq += topRowSumSq[y + winLens]; + } + if (y > winLens) + { + topSum -= topRowSum[y - 1 - winLens]; + topSumSq -= topRowSumSq[y - 1 - winLens]; + } + memset(leftColSum, 0, width * sizeof(int64_t)); + memset(leftColSumSq, 0, width * sizeof(int64_t)); + pWinY = &picY.buf[0]; + pWinY -= (y <= winLens ? y : winLens)*stride; + for (by = y1; by <= y2; by++) + { + for (bx = x1; bx <= x2; bx++) { - if (y < height - winLens) - { - pWinY += winLens*stride; - topRowSum[y + winLens] = 0; - topRowSumSq[y + winLens] = 0; - for (bx = x1; bx <= x2; bx++) - { - topRowSum[y + winLens] += pWinY[bx]; - topRowSumSq[y + winLens] += pWinY[bx] * pWinY[bx]; - } - topSum += topRowSum[y + winLens]; - topSumSq += topRowSumSq[y + winLens]; - } - if (y > winLens) - { - topSum -= topRowSum[y - 1 - winLens]; - topSumSq -= topRowSumSq[y - 1 - winLens]; - } - - memset(leftColSum, 0, width * sizeof(int64_t)); - memset(leftColSumSq, 0, width * sizeof(int64_t)); - pWinY = &picY.buf[0]; - pWinY -= (y <= winLens ? y : winLens)*stride; + leftColSum[bx] += pWinY[bx]; + leftColSumSq[bx] += pWinY[bx] * pWinY[bx]; + } + pWinY += stride; + } + leftSum = topSum; + leftSumSq = topSumSq; + sum = topSum; + sumSq = topSumSq; + } + else if (x > 0) + { + if (x < width - winLens) + { + pWinY -= (y <= winLens ? y : winLens)*stride; + if (y == 0) + { + leftColSum[x + winLens] = 0; + leftColSumSq[x + winLens] = 0; for (by = y1; by <= y2; by++) { - for (bx = x1; bx <= x2; bx++) - { - leftColSum[bx] += pWinY[bx]; - leftColSumSq[bx] += pWinY[bx] * pWinY[bx]; - } + leftColSum[x + winLens] += pWinY[x + winLens]; + leftColSumSq[x + winLens] += pWinY[x + winLens] * pWinY[x + winLens]; pWinY += stride; } - - leftSum = topSum; - leftSumSq = topSumSq; - sum = topSum; - sumSq = topSumSq; } - - else if (x > 0) + else { - if (x < width - winLens) + leftColSum[x + winLens] = topColSum[x + winLens]; + leftColSumSq[x + winLens] = topColSumSq[x + winLens]; + if (y < height - winLens) { - pWinY -= (y <= winLens ? y : winLens)*stride; - if (y == 0) // for the 1st row, calc the right stripe - { - leftColSum[x + winLens] = 0; - leftColSumSq[x + winLens] = 0; - for (by = y1; by <= y2; by++) - { - leftColSum[x + winLens] += pWinY[x + winLens]; - leftColSumSq[x + winLens] += pWinY[x + winLens] * pWinY[x + winLens]; - pWinY += stride; - } - } - else // for the main area, calc the B-R point - { - leftColSum[x + winLens] = topColSum[x + winLens]; - leftColSumSq[x + winLens] = topColSumSq[x + winLens]; - if (y < height - winLens) - { - pWinY = &picY.buf[0]; - pWinY += winLens * stride; - leftColSum[x + winLens] += pWinY[x + winLens]; - leftColSumSq[x + winLens] += pWinY[x + winLens] * pWinY[x + winLens]; - } - if (y > winLens) - { - pWinY = &picY.buf[0]; - pWinY -= (winLens + 1) * stride; - leftColSum[x + winLens] -= pWinY[x + winLens]; - leftColSumSq[x + winLens] -= pWinY[x + winLens] * pWinY[x + winLens]; - } - } - topColSum[x + winLens] = leftColSum[x + winLens]; - topColSumSq[x + winLens] = leftColSumSq[x + winLens]; - leftSum += leftColSum[x + winLens]; - leftSumSq += leftColSumSq[x + winLens]; + pWinY = &picY.buf[0]; + pWinY += winLens * stride; + leftColSum[x + winLens] += pWinY[x + winLens]; + leftColSumSq[x + winLens] += pWinY[x + winLens] * pWinY[x + winLens]; } - if (x > winLens) + if (y > winLens) { - leftSum -= leftColSum[x - 1 - winLens]; - leftSumSq -= leftColSumSq[x - 1 - winLens]; + pWinY = &picY.buf[0]; + pWinY -= (winLens + 1) * stride; + leftColSum[x + winLens] -= pWinY[x + winLens]; + leftColSumSq[x + winLens] -= pWinY[x + winLens] * pWinY[x + winLens]; } - sum = leftSum; - sumSq = leftSumSq; - } - - double average = double(sum) / numPixInPart; - double variance = double(sumSq) / numPixInPart - average * average; - uint32_t binNum = (uint32_t)(pxlY/PIC_ANALYZE_CW_BINS); - - if (m_lumaBD > 10) - { - average = average / (double)(1<<(m_lumaBD - 10)); - variance = variance / (double)(1 << (2*m_lumaBD - 20)); - binNum = (uint32_t)((pxlY>>(m_lumaBD - 10)) / PIC_ANALYZE_CW_BINS); - } - else if (m_lumaBD < 10) - { - average = average * (double)(1 << (10 - m_lumaBD)); - variance = variance * (double)(1 << (20-2*m_lumaBD)); - binNum = (uint32_t)((pxlY << (10 - m_lumaBD)) / PIC_ANALYZE_CW_BINS); } - double varLog10 = log10(variance + 1.0); - blockBinVarSum[binNum] += varLog10; - bockBinCnt[binNum]++; + topColSum[x + winLens] = leftColSum[x + winLens]; + topColSumSq[x + winLens] = leftColSumSq[x + winLens]; + leftSum += leftColSum[x + winLens]; + leftSumSq += leftColSumSq[x + winLens]; + } + if (x > winLens) + { + leftSum -= leftColSum[x - 1 - winLens]; + leftSumSq -= leftColSumSq[x - 1 - winLens]; } - picY.buf += stride; + sum = leftSum; + sumSq = leftSumSq; } - delete[] topColSum; - delete[] topColSumSq; - delete[] topRowSum; - delete[] topRowSumSq; - delete[] leftColSum; - delete[] leftColSumSq; - - for (int b = 0; b < PIC_ANALYZE_CW_BINS; b++) + double average = double(sum) / numPixInPart; + double variance = double(sumSq) / numPixInPart - average * average; + int binLen = m_reshapeLUTSize / m_binNum; + uint32_t binIdx = (uint32_t)(pxlY / binLen); + if (m_lumaBD > 10) + { + average = average / (double)(1 << (m_lumaBD - 10)); + variance = variance / (double)(1 << (2 * m_lumaBD - 20)); + } + else if (m_lumaBD < 10) { - if (bockBinCnt[b] > 0) - blockBinVarSum[b] = blockBinVarSum[b] / bockBinCnt[b]; + average = average * (double)(1 << (10 - m_lumaBD)); + variance = variance * (double)(1 << (20 - 2 * m_lumaBD)); } + double varLog10 = log10(variance + 1.0); + stats.binVar[binIdx] += varLog10; + binCnt[binIdx]++; + } + picY.buf += stride; + } + + for (int b = 0; b < m_binNum; b++) + { + stats.binHist[b] = (double)binCnt[b] / (double)(m_reshapeCW.rspPicSize); + stats.binVar[b] = (binCnt[b] > 0) ? (stats.binVar[b] / binCnt[b]) : 0.0; + } + delete[] binCnt; + delete[] topColSum; + delete[] topColSumSq; + delete[] topRowSum; + delete[] topRowSumSq; + delete[] leftColSum; + delete[] leftColSumSq; + + stats.minBinVar = 5.0; + stats.maxBinVar = 0.0; + stats.meanBinVar = 0.0; + stats.nonZeroCnt = 0; + for (int b = 0; b < m_binNum; b++) + { + if (stats.binHist[b] > 0.001) + { + stats.nonZeroCnt++; + stats.meanBinVar += stats.binVar[b]; + if (stats.binVar[b] > stats.maxBinVar) { stats.maxBinVar = stats.binVar[b]; } + if (stats.binVar[b] < stats.minBinVar) { stats.minBinVar = stats.binVar[b]; } + } + } + stats.meanBinVar /= (double)stats.nonZeroCnt; + for (int b = 0; b < m_binNum; b++) + { + if (stats.meanBinVar > 0.0) + stats.normVar[b] = stats.binVar[b] / stats.meanBinVar; + stats.weightVar += stats.binHist[b] * stats.binVar[b]; + stats.weightNorm += stats.binHist[b] * stats.normVar[b]; + } + + picY = pcPic->getOrigBuf(COMPONENT_Y); + PelBuf picU = pcPic->getOrigBuf(COMPONENT_Cb); + PelBuf picV = pcPic->getOrigBuf(COMPONENT_Cr); + const int widthC = picU.width; + const int heightC = picU.height; + const int strideC = picU.stride; + double avgY = 0.0, avgU = 0.0, avgV = 0.0; + double varY = 0.0, varU = 0.0, varV = 0.0; + for (int y = 0; y < height; y++) + { + for (int x = 0; x < width; x++) + { + avgY += picY.buf[x]; + varY += picY.buf[x] * picY.buf[x]; + } + picY.buf += stride; + } + for (int y = 0; y < heightC; y++) + { + for (int x = 0; x < widthC; x++) + { + avgU += picU.buf[x]; + avgV += picV.buf[x]; + varU += picU.buf[x] * picU.buf[x]; + varV += picV.buf[x] * picV.buf[x]; + } + picU.buf += strideC; + picV.buf += strideC; + } + avgY = avgY / (width * height); + avgU = avgU / (widthC * heightC); + avgV = avgV / (widthC * heightC); + varY = varY / (width * height) - avgY * avgY; + varU = varU / (widthC * heightC) - avgU * avgU; + varV = varV / (widthC * heightC) - avgV * avgV; + if (varY > 0) + { + stats.ratioStdU = sqrt(varU) / sqrt(varY); + stats.ratioStdV = sqrt(varV) / sqrt(varY); + } +} +void EncReshape::preAnalyzerLMCS(Picture *pcPic, const uint32_t signalType, const SliceType sliceType, const ReshapeCW& reshapeCW) +{ + m_sliceReshapeInfo.sliceReshaperModelPresentFlag = true; + m_sliceReshapeInfo.sliceReshaperEnableFlag = true; + int modIP = pcPic->getPOC() - pcPic->getPOC() / reshapeCW.rspFpsToIp * reshapeCW.rspFpsToIp; + if (sliceType == I_SLICE || (reshapeCW.updateCtrl == 2 && modIP == 0)) + { + if (m_sliceReshapeInfo.sliceReshaperModelPresentFlag == true) + { + m_reshapeCW = reshapeCW; + m_binNum = PIC_CODE_CW_BINS; + int stdMin = 16 << (m_lumaBD - 8); + int stdMax = 235 << (m_lumaBD - 8); + int binLen = m_reshapeLUTSize / m_binNum; + int startBinIdx = stdMin / binLen; + int endBinIdx = stdMax / binLen; + m_sliceReshapeInfo.reshaperModelMinBinIdx = startBinIdx; + m_sliceReshapeInfo.reshaperModelMaxBinIdx = endBinIdx; + m_initCWAnalyze = m_lumaBD > 10 ? (binLen >> (m_lumaBD - 10)) : m_lumaBD < 10 ? (binLen << (10 - m_lumaBD)) : binLen; + for (int b = 0; b < m_binNum; b++) { m_binCW[b] = m_initCWAnalyze; } m_reshape = true; - m_exceedSTD = false; m_useAdpCW = false; + m_exceedSTD = false; m_chromaWeight = 1.0; m_sliceReshapeInfo.enableChromaAdj = 1; - m_rateAdpMode = 0; - m_tcase = 0; - bool intraAdp = false; - bool interAdp = true; - double reshapeTH1 = 0.0; - double reshapeTH2 = 5.0; - deriveReshapeParametersSDRfromStats(bockBinCnt, blockBinVarSum, &reshapeTH1, &reshapeTH2, &intraAdp, &interAdp); + m_rateAdpMode = 0; m_tcase = 0; + bool intraAdp = true, interAdp = true; - if (m_rateAdpMode == 2 && reshapeCW.rspBaseQP <= 22) + calcSeqStats(pcPic, m_srcSeqStats); + if (m_binNum == PIC_CODE_CW_BINS) { - intraAdp = false; - interAdp = false; + if ((m_srcSeqStats.binHist[0] + m_srcSeqStats.binHist[m_binNum - 1]) > 0.005) { m_exceedSTD = true; } + if (m_srcSeqStats.binHist[m_binNum - 1] > 0.0003) { intraAdp = false; interAdp = false; } + if (m_srcSeqStats.binHist[0] > 0.03) { intraAdp = false; interAdp = false; } } - - m_sliceReshapeInfo.sliceReshaperEnableFlag = intraAdp; - - if (!intraAdp && !interAdp) + else if (m_binNum == PIC_ANALYZE_CW_BINS) { - m_sliceReshapeInfo.sliceReshaperModelPresentFlag = false; - m_reshape = false; - return; + if ((m_srcSeqStats.binHist[0] + m_srcSeqStats.binHist[1] + m_srcSeqStats.binHist[m_binNum - 2] + m_srcSeqStats.binHist[m_binNum - 1]) > 0.01) { m_exceedSTD = true; } + if ((m_srcSeqStats.binHist[m_binNum - 2] + m_srcSeqStats.binHist[m_binNum - 1]) > 0.0003) { intraAdp = false; interAdp = false; } + if ((m_srcSeqStats.binHist[0] + m_srcSeqStats.binHist[1]) > 0.03) { intraAdp = false; interAdp = false; } } - if (m_exceedSTD) { - startBinIdx = 2; - endBinIdx = 29; - for (int b = 0; b < PIC_ANALYZE_CW_BINS; b++) + for (int i = 0; i < m_binNum; i++) { - if (bockBinCnt[b] > 0 && b < startBinIdx) - startBinIdx = b; - if (bockBinCnt[b] > 0 && b > endBinIdx) - endBinIdx = b; + if (m_srcSeqStats.binHist[i] > 0 && i < startBinIdx) { startBinIdx = i; } + if (m_srcSeqStats.binHist[i] > 0 && i > endBinIdx) { endBinIdx = i; } } m_sliceReshapeInfo.reshaperModelMinBinIdx = startBinIdx; m_sliceReshapeInfo.reshaperModelMaxBinIdx = endBinIdx; } - m_initCWAnalyze = m_lumaBD > 10 ? (m_initCWAnalyze >> (m_lumaBD - 10)) : m_lumaBD < 10 ? (m_initCWAnalyze << (10 - m_lumaBD)) : m_initCWAnalyze; - if (reshapeCW.rspBaseQP <= 22 && m_rateAdpMode == 1) + if ((m_srcSeqStats.ratioStdU + m_srcSeqStats.ratioStdV) > 1.5 && m_srcSeqStats.binHist[1] > 0.5) { intraAdp = false; interAdp = false; } + if (m_srcSeqStats.ratioStdU > 0.36 && m_srcSeqStats.ratioStdV > 0.2 && m_reshapeCW.rspPicSize > 5184000) + { + m_sliceReshapeInfo.enableChromaAdj = 0; m_chromaWeight = 1.05; + if ((m_srcSeqStats.ratioStdU + m_srcSeqStats.ratioStdV) < 0.69) { m_chromaWeight = 0.95; } + } + + if (interAdp) { - for (int i = 0; i < PIC_ANALYZE_CW_BINS; i++) + if (m_reshapeCW.adpOption) { - if (i >= startBinIdx && i <= endBinIdx) - m_binCW[i] = m_initCWAnalyze + 1; + m_reshapeCW.binCW[0] = 0; m_reshapeCW.binCW[1] = m_reshapeCW.initialCW; + m_rateAdpMode = m_reshapeCW.adpOption - 2 * (m_reshapeCW.adpOption / 2); + if (m_reshapeCW.adpOption == 2) { m_tcase = 9; } + else if (m_reshapeCW.adpOption > 2) { intraAdp = false; } + } + else if (signalType == RESHAPE_SIGNAL_SDR) + { + m_reshapeCW.binCW[0] = 0; m_reshapeCW.binCW[1] = 1022; + deriveReshapeParametersSDR(&intraAdp, &interAdp); + } + else if (signalType == RESHAPE_SIGNAL_HLG) + { + if (m_reshapeCW.updateCtrl == 0) + { + m_rateAdpMode = 0; m_tcase = 9; + m_reshapeCW.binCW[1] = 952; + if (m_srcSeqStats.meanBinVar < 2.5) { m_reshapeCW.binCW[1] = 840; } + } else - m_binCW[i] = 0; + { + m_useAdpCW = true; + m_rateAdpMode = 2; + if (m_binNum == PIC_CODE_CW_BINS) { m_reshapeCW.binCW[0] = 72; m_reshapeCW.binCW[1] = 58; } + else if (m_binNum == PIC_ANALYZE_CW_BINS) { m_reshapeCW.binCW[0] = 36; m_reshapeCW.binCW[1] = 30; } + if (m_srcSeqStats.meanBinVar < 2.5) { intraAdp = false; interAdp = false; } + } } } - else if (m_useAdpCW) + + if (m_rateAdpMode == 2 && reshapeCW.rspBaseQP <= 22) { intraAdp = false; interAdp = false; } + m_sliceReshapeInfo.sliceReshaperEnableFlag = intraAdp; + if (!intraAdp && !interAdp) { - double Alpha = 1.0, Beta = 0.0; - deriveReshapeParameters(blockBinVarSum, startBinIdx, endBinIdx, m_reshapeCW, Alpha, Beta); - for (int i = 0; i < PIC_ANALYZE_CW_BINS; i++) + m_sliceReshapeInfo.sliceReshaperModelPresentFlag = false; + m_reshape = false; + return; + } + + if (m_rateAdpMode == 1 && reshapeCW.rspBaseQP <= 22) + { + for (int i = 0; i < m_binNum; i++) { - if (i >= startBinIdx && i <= endBinIdx) - m_binCW[i] = (uint32_t)round(Alpha*blockBinVarSum[i] + Beta); - else - m_binCW[i] = 0; + if (i >= startBinIdx && i <= endBinIdx) { m_binCW[i] = m_initCWAnalyze + 2; } + else { m_binCW[i] = 0; } } } - else + else if (m_useAdpCW) { - for (int b = startBinIdx; b <= endBinIdx; b++) + if (signalType == RESHAPE_SIGNAL_SDR && m_reshapeCW.updateCtrl == 2) { - if (blockBinVarSum[b] < reshapeTH1) - m_binImportance[b] = 2; - else if (blockBinVarSum[b] > reshapeTH2) - m_binImportance[b] = 3; - else - m_binImportance[b] = 1; + m_binNum = PIC_ANALYZE_CW_BINS; + startBinIdx = startBinIdx * 2; + endBinIdx = endBinIdx * 2 + 1; + calcSeqStats(pcPic, m_srcSeqStats); } - - for (int i = 0; i < PIC_ANALYZE_CW_BINS; i++) + double alpha = 1.0, beta = 0.0; + deriveReshapeParameters(m_srcSeqStats.binVar, startBinIdx, endBinIdx, m_reshapeCW, alpha, beta); + for (int i = 0; i < m_binNum; i++) { - if (m_binImportance[i] == 0) - m_binCW[i] = 0; - else if (m_binImportance[i] == 1) - m_binCW[i] = m_initCWAnalyze + 1; - else if (m_binImportance[i] == 2) - m_binCW[i] = m_reshapeCW.binCW[0]; - else if (m_binImportance[i] == 3) - m_binCW[i] = m_reshapeCW.binCW[1]; - else - THROW("SDR Reshape Bin Importance not supported"); + if (i >= startBinIdx && i <= endBinIdx) { m_binCW[i] = (uint32_t)round(alpha*m_srcSeqStats.binVar[i] + beta); } + else { m_binCW[i] = 0; } } } - if (m_reshapeCW.rspPicSize <= 1497600 && reshapeCW.rspIntraPeriod == -1 && modIP == 0 && sliceType != I_SLICE) + else { - m_sliceReshapeInfo.sliceReshaperEnableFlag = false; + cwPerturbation(startBinIdx, endBinIdx, (uint16_t)m_reshapeCW.binCW[1]); } - + cwReduction(startBinIdx, endBinIdx); } m_chromaAdj = m_sliceReshapeInfo.enableChromaAdj; - if (sliceType == I_SLICE && isDualT) - { - m_sliceReshapeInfo.enableChromaAdj = 0; - } } else // Inter slices { m_sliceReshapeInfo.sliceReshaperModelPresentFlag = false; m_sliceReshapeInfo.enableChromaAdj = m_chromaAdj; - - if (!m_reshape) - { - m_sliceReshapeInfo.sliceReshaperEnableFlag = false; - } + if (!m_reshape) { m_sliceReshapeInfo.sliceReshaperEnableFlag = false; } else { const int cTid = m_reshapeCW.rspTid; @@ -505,482 +577,303 @@ void EncReshape::bubbleSortDsd(double* array, int * idx, int n) } } -void EncReshape::deriveReshapeParametersSDRfromStats(uint32_t * blockBinCnt, double *blockBinVarSum, double* reshapeTH1, double* reshapeTH2, bool *intraAdp, bool *interAdp) +void EncReshape::cwPerturbation(int startBinIdx, int endBinIdx, uint16_t maxCW) +{ + for (int i = 0; i < m_binNum; i++) + { + if (i >= startBinIdx && i <= endBinIdx) { m_binCW[i] = (uint32_t)round((double)maxCW / (endBinIdx - startBinIdx + 1)); } + else { m_binCW[i] = 0; } + } + + double hist = 0.0; + uint16_t delta1 = 0, delta2 = 0; + for (int i = 0; i < m_binNum; i++) + { + if (m_srcSeqStats.binHist[i] > 0.001) + { + hist = m_srcSeqStats.binHist[i] > 0.4 ? 0.4 : m_srcSeqStats.binHist[i]; + delta1 = (uint16_t)(10.0 * hist + 0.5); + delta2 = (uint16_t)(20.0 * hist + 0.5); + if (m_srcSeqStats.normVar[i] < 0.8) { m_binCW[i] = m_binCW[i] + delta2; } + else if (m_srcSeqStats.normVar[i] < 0.9) { m_binCW[i] = m_binCW[i] + delta1; } + if (m_srcSeqStats.normVar[i] > 1.2) { m_binCW[i] = m_binCW[i] - delta2; } + else if (m_srcSeqStats.normVar[i] > 1.1) { m_binCW[i] = m_binCW[i] - delta1; } + } + } +} +void EncReshape::cwReduction(int startBinIdx, int endBinIdx) +{ + int bdShift = m_lumaBD - 10; + int totCW = bdShift != 0 ? (bdShift > 0 ? m_reshapeLUTSize / (1 << bdShift) : m_reshapeLUTSize * (1 << (-bdShift))) : m_reshapeLUTSize; + int maxAllowedCW = totCW - 1, usedCW = 0; + for (int i = 0; i < m_binNum; i++) { usedCW += m_binCW[i]; } + if (usedCW > maxAllowedCW) + { + int deltaCW = usedCW - maxAllowedCW; + int divCW = deltaCW / (endBinIdx - startBinIdx + 1); + int modCW = deltaCW - divCW * (endBinIdx - startBinIdx + 1); + if (divCW > 0) + { + for (int i = startBinIdx; i <= endBinIdx; i++) { m_binCW[i] -= divCW; } + } + for (int i = startBinIdx; i <= endBinIdx; i++) + { + if (modCW == 0) break; + if (m_binCW[i] > 0) { m_binCW[i]--; modCW--; } + } + } +} +void EncReshape::deriveReshapeParametersSDR(bool *intraAdp, bool *interAdp) { - int binIdxSortDsd[PIC_ANALYZE_CW_BINS] = { 0 }; - double binVarSortDsd[PIC_ANALYZE_CW_BINS] = { 0.0 }; - double binHist[PIC_ANALYZE_CW_BINS] = { 0.0 }; - double binVarSortDsdCDF[PIC_ANALYZE_CW_BINS] = { 0.0 }; - double maxBinVar = 0.0, meanBinVar = 0.0, minBinVar = 5.0; - int nonZeroBinCt = 0; + bool isSkipCase = false; + bool isLowCase = false; int firstBinVarLessThanVal1 = 0; int firstBinVarLessThanVal2 = 0; int firstBinVarLessThanVal3 = 0; - int firstBinVarLessThanVal4 = 0; - - for (int b = 0; b < PIC_ANALYZE_CW_BINS; b++) + double percBinVarLessThenVal1 = 0.0; + double percBinVarLessThenVal2 = 0.0; + double percBinVarLessThenVal3 = 0.0; + int *binIdxSortDsd = new int[m_binNum]; + double *binVarSortDsd = new double[m_binNum]; + double *binVarSortDsdCDF = new double[m_binNum]; + double ratioWeiVar = 0.0, ratioWeiVarNorm = 0.0; + int startBinIdx = m_sliceReshapeInfo.reshaperModelMinBinIdx; + int endBinIdx = m_sliceReshapeInfo.reshaperModelMaxBinIdx; + + for (int b = 0; b < m_binNum; b++) { - binHist[b] = (double) blockBinCnt[b] / (double)(m_reshapeCW.rspPicSize); - if (binHist[b] > 0.001) + binVarSortDsd[b] = m_srcSeqStats.binVar[b]; + binIdxSortDsd[b] = b; + } + bubbleSortDsd(binVarSortDsd, binIdxSortDsd, m_binNum); + binVarSortDsdCDF[0] = m_srcSeqStats.binHist[binIdxSortDsd[0]]; + for (int b = 1; b < m_binNum; b++) { binVarSortDsdCDF[b] = binVarSortDsdCDF[b - 1] + m_srcSeqStats.binHist[binIdxSortDsd[b]]; } + for (int b = 0; b < m_binNum - 1; b++) + { + if (binVarSortDsd[b] > 3.4) { firstBinVarLessThanVal1 = b + 1; } + if (binVarSortDsd[b] > 2.8) { firstBinVarLessThanVal2 = b + 1; } + if (binVarSortDsd[b] > 2.5) { firstBinVarLessThanVal3 = b + 1; } + } + percBinVarLessThenVal1 = binVarSortDsdCDF[firstBinVarLessThanVal1]; + percBinVarLessThenVal2 = binVarSortDsdCDF[firstBinVarLessThanVal2]; + percBinVarLessThenVal3 = binVarSortDsdCDF[firstBinVarLessThanVal3]; + delete[] binIdxSortDsd; + delete[] binVarSortDsd; + delete[] binVarSortDsdCDF; + + cwPerturbation(startBinIdx, endBinIdx, (uint16_t)m_reshapeCW.binCW[1]); + cwReduction(startBinIdx, endBinIdx); + initSeqStats(m_rspSeqStats); + for (int b = 0; b < m_binNum; b++) + { + double scale = (m_binCW[b] > 0) ? ((double)m_binCW[b] / (double)m_initCWAnalyze) : 1.0; + m_rspSeqStats.binHist[b] = m_srcSeqStats.binHist[b]; + m_rspSeqStats.binVar[b] = m_srcSeqStats.binVar[b] + 2.0 * log10(scale); + } + m_rspSeqStats.minBinVar = 5.0; + m_rspSeqStats.maxBinVar = 0.0; + m_rspSeqStats.meanBinVar = 0.0; + m_rspSeqStats.nonZeroCnt = 0; + for (int b = 0; b < m_binNum; b++) + { + if (m_rspSeqStats.binHist[b] > 0.001) { - nonZeroBinCt++; - meanBinVar += blockBinVarSum[b]; - if (blockBinVarSum[b] > maxBinVar) { maxBinVar = blockBinVarSum[b]; } - if (blockBinVarSum[b] < minBinVar) { minBinVar = blockBinVarSum[b]; } + m_rspSeqStats.nonZeroCnt++; + m_rspSeqStats.meanBinVar += m_rspSeqStats.binVar[b]; + if (m_rspSeqStats.binVar[b] > m_rspSeqStats.maxBinVar) { m_rspSeqStats.maxBinVar = m_rspSeqStats.binVar[b]; } + if (m_rspSeqStats.binVar[b] < m_rspSeqStats.minBinVar) { m_rspSeqStats.minBinVar = m_rspSeqStats.binVar[b]; } } - binVarSortDsd[b] = blockBinVarSum[b]; - binIdxSortDsd[b] = b; } - if ((binHist[0] + binHist[1] + binHist[PIC_ANALYZE_CW_BINS - 2] + binHist[PIC_ANALYZE_CW_BINS - 1]) > 0.01) { m_exceedSTD = true; } - if ((binHist[PIC_ANALYZE_CW_BINS - 2] + binHist[PIC_ANALYZE_CW_BINS - 1]) > 0.01) { *interAdp = false; return; } - else { *interAdp = true; } - - meanBinVar = meanBinVar / (double)nonZeroBinCt; - bubbleSortDsd(binVarSortDsd, binIdxSortDsd, PIC_ANALYZE_CW_BINS); - binVarSortDsdCDF[0] = binHist[binIdxSortDsd[0]]; - - for (int b = 1; b < PIC_ANALYZE_CW_BINS; b++) + m_rspSeqStats.meanBinVar /= (double)m_rspSeqStats.nonZeroCnt; + for (int b = 0; b < m_binNum; b++) { - binVarSortDsdCDF[b] = binVarSortDsdCDF[b - 1] + binHist[binIdxSortDsd[b]]; + if (m_rspSeqStats.meanBinVar > 0.0) + m_rspSeqStats.normVar[b] = m_rspSeqStats.binVar[b] / m_rspSeqStats.meanBinVar; + m_rspSeqStats.weightVar += m_rspSeqStats.binHist[b] * m_rspSeqStats.binVar[b]; + m_rspSeqStats.weightNorm += m_rspSeqStats.binHist[b] * m_rspSeqStats.normVar[b]; } + ratioWeiVar = m_rspSeqStats.weightVar / m_srcSeqStats.weightVar; + ratioWeiVarNorm = m_rspSeqStats.weightNorm / m_srcSeqStats.weightNorm; - for (int b = 0; b < PIC_ANALYZE_CW_BINS - 1; b++) + if ((m_srcSeqStats.binHist[0] + m_srcSeqStats.binHist[m_binNum - 1]) > 0.0001 && m_srcSeqStats.binHist[m_binNum - 2] < 0.001) { - if (binVarSortDsd[b] > 3.5) { firstBinVarLessThanVal1 = b + 1; } - if (binVarSortDsd[b] > 3.0) { firstBinVarLessThanVal2 = b + 1; } - if (binVarSortDsd[b] > 2.5) { firstBinVarLessThanVal3 = b + 1; } - if (binVarSortDsd[b] > 2.0) { firstBinVarLessThanVal4 = b + 1; } + if (percBinVarLessThenVal3 > 0.8 && percBinVarLessThenVal2 > 0.4 && m_srcSeqStats.binVar[m_binNum - 2] > 4.8) { isSkipCase = true; } + else if (percBinVarLessThenVal3 < 0.1 && percBinVarLessThenVal1 < 0.05 && m_srcSeqStats.binVar[m_binNum - 2] < 4.0) { isSkipCase = true; } } + if (isSkipCase) { *intraAdp = false; *interAdp = false; return; } - m_reshapeCW.binCW[0] = 38; - m_reshapeCW.binCW[1] = 28; + if (m_reshapeCW.rspPicSize > 5184000) { isLowCase = true; } + else if (m_srcSeqStats.binVar[1] > 4.0) { isLowCase = true; } + else if (m_rspSeqStats.meanBinVar > 3.4 && ratioWeiVarNorm > 1.005 && ratioWeiVar > 1.02) { isLowCase = true; } + else if (m_rspSeqStats.meanBinVar > 3.1 && ratioWeiVarNorm > 1.005 && ratioWeiVar > 1.04) { isLowCase = true; } + else if (m_rspSeqStats.meanBinVar > 2.8 && ratioWeiVarNorm > 1.01 && ratioWeiVar > 1.04) { isLowCase = true; } - if (m_reshapeCW.rspIntraPeriod == -1) + if (m_reshapeCW.updateCtrl == 0) { - *intraAdp = true; - if (m_reshapeCW.rspPicSize > 1497600) - { - m_reshapeCW.binCW[0] = 36; - *reshapeTH1 = 2.4; - *reshapeTH2 = 4.5; - m_rateAdpMode = 2; - - if (meanBinVar >= 2.52) - { - if (binVarSortDsdCDF[firstBinVarLessThanVal2] > 0.5) - { - *reshapeTH1 = 2.5; - *reshapeTH2 = 3.0; - } - else if (binVarSortDsdCDF[firstBinVarLessThanVal2] < 0.1 && binVarSortDsdCDF[firstBinVarLessThanVal1] > 0.02) - { - *reshapeTH1 = 2.2; - } - else if (binVarSortDsdCDF[firstBinVarLessThanVal2] > 0.25) - { - m_reshapeCW.binCW[1] = 30; - *reshapeTH1 = 2.0; - m_rateAdpMode = 0; - } - else - { - m_reshapeCW.binCW[1] = 30; - m_rateAdpMode = 1; - } - } - } - else if (m_reshapeCW.rspPicSize > 660480) + m_reshapeCW.binCW[1] = 1022; + if (isLowCase) { - m_reshapeCW.binCW[0] = 34; - *reshapeTH1 = 3.4; - *reshapeTH2 = 4.0; - m_rateAdpMode = 2; - - if (binVarSortDsdCDF[firstBinVarLessThanVal4] > 0.6) + *intraAdp = false; + m_rateAdpMode = 1; + m_reshapeCW.binCW[1] = 980; + if (m_srcSeqStats.binHist[m_binNum - 2] > 0.05) { - if (maxBinVar < 3.5) - { - m_useAdpCW = true; - m_reshapeCW.binCW[0] = 38; - } - else - { - m_reshapeCW.binCW[0] = 40; - *reshapeTH1 = 2.2; - *reshapeTH2 = 4.5; - m_rateAdpMode = 0; - } + m_reshapeCW.binCW[1] = 896; + if (m_srcSeqStats.binVar[m_binNum - 2] < 1.2) { m_reshapeCW.binCW[1] = 938; } } - else + else if (percBinVarLessThenVal2 < 0.8 && percBinVarLessThenVal3 == 1.0) { - if (maxBinVar > 3.3) - { - m_reshapeCW.binCW[1] = 30; - } - else - { - m_reshapeCW.binCW[1] = 28; - } + m_rateAdpMode = 1; + m_reshapeCW.binCW[1] = 938; } } - else if (m_reshapeCW.rspPicSize > 249600) + if (m_srcSeqStats.binHist[m_binNum - 2] < 0.001) { - m_reshapeCW.binCW[0] = 36; - *reshapeTH1 = 2.5; - *reshapeTH2 = 4.5; - - if (m_exceedSTD) + if (m_srcSeqStats.binHist[1] > 0.05 && m_srcSeqStats.binVar[1] > 3.0) { - m_reshapeCW.binCW[0] = 36; - m_reshapeCW.binCW[1] = 30; + *intraAdp = true; + m_rateAdpMode = 1; + m_reshapeCW.binCW[1] = 784; } - if (minBinVar > 2.6) + else if (m_srcSeqStats.binHist[1] < 0.006) { - *reshapeTH1 = 3.0; + *intraAdp = false; + m_rateAdpMode = 0; + m_reshapeCW.binCW[1] = 1008; } - else { - double diff1 = binVarSortDsdCDF[firstBinVarLessThanVal4] - binVarSortDsdCDF[firstBinVarLessThanVal3]; - double diff2 = binVarSortDsdCDF[firstBinVarLessThanVal2] - binVarSortDsdCDF[firstBinVarLessThanVal1]; - if (diff1 > 0.4 || binVarSortDsdCDF[firstBinVarLessThanVal1] > 0.1) - { - m_useAdpCW = true; - m_rateAdpMode = 1; - } - else if (diff2 <= 0.1 && binVarSortDsdCDF[firstBinVarLessThanVal4] > 0.99 && binVarSortDsdCDF[firstBinVarLessThanVal3] > 0.642 && binVarSortDsdCDF[firstBinVarLessThanVal2] > 0.03) - { - m_useAdpCW = true; - m_rateAdpMode = 1; - } - else - { - m_rateAdpMode = 2; - } + else if (percBinVarLessThenVal3 < 0.5) + { + *intraAdp = true; + m_rateAdpMode = 0; + m_reshapeCW.binCW[1] = 1022; } } - else + else if ((m_srcSeqStats.maxBinVar > 4.0 && m_rspSeqStats.meanBinVar > 3.2 && percBinVarLessThenVal2 < 0.25) || ratioWeiVar < 1.03) { - m_reshapeCW.binCW[0] = 36; - *reshapeTH1 = 2.6; - *reshapeTH2 = 4.5; - - if (binVarSortDsdCDF[firstBinVarLessThanVal2] > 0.5 && maxBinVar < 4.7) - { - *reshapeTH1 = 3.2; - m_rateAdpMode = 1; - } + *intraAdp = true; + m_rateAdpMode = 0; + m_reshapeCW.binCW[1] = 1022; } + if (*intraAdp == true && m_rateAdpMode == 0) { m_tcase = 9; } } - else if (m_reshapeCW.rspIntraPeriod == 1) + else if (m_reshapeCW.updateCtrl == 1) { - *intraAdp = true; - if (m_reshapeCW.rspPicSize > 5184000) + m_reshapeCW.binCW[1] = 952; + if (isLowCase) { - *reshapeTH1 = 2.0; - *reshapeTH2 = 3.0; - m_rateAdpMode = 2; - - if (maxBinVar > 2.4) + if (m_reshapeCW.rspPicSize > 5184000) { - if (binVarSortDsdCDF[firstBinVarLessThanVal4] > 0.88) - { - if (maxBinVar < 2.695) - { - *reshapeTH2 = 2.2; - } - else - { - if (binVarSortDsdCDF[firstBinVarLessThanVal3] < 0.45) - { - *reshapeTH1 = 2.5; - *reshapeTH2 = 4.0; - m_reshapeCW.binCW[0] = 36; - m_sliceReshapeInfo.enableChromaAdj = 0; - m_rateAdpMode = 0; - } - else - { - m_useAdpCW = true; - m_reshapeCW.binCW[0] = 36; - m_reshapeCW.binCW[1] = 30; - } - } - } - else - { - if (maxBinVar > 2.8) - { - *reshapeTH1 = 2.2; - *reshapeTH2 = 4.0; - m_reshapeCW.binCW[0] = 36; - m_sliceReshapeInfo.enableChromaAdj = 0; - } - else - { - m_useAdpCW = true; - m_reshapeCW.binCW[0] = 38; - m_reshapeCW.binCW[1] = 28; - } - } + m_rateAdpMode = 1; + m_reshapeCW.binCW[1] = 812; } - else + if (m_srcSeqStats.binHist[m_binNum - 2] > 0.05) { - if (maxBinVar > 2.24) - { - m_useAdpCW = true; - m_reshapeCW.binCW[0] = 34; - m_reshapeCW.binCW[1] = 30; - } - } - } - else if (m_reshapeCW.rspPicSize > 1497600) - { - *reshapeTH1 = 2.0; - *reshapeTH2 = 4.5; - m_rateAdpMode = 2; - - if (binVarSortDsdCDF[firstBinVarLessThanVal2] > 0.25) - { - int firstVarCDFLargerThanVal = 1; - for (int b = 0; b < PIC_ANALYZE_CW_BINS; b++) - { - if (binVarSortDsdCDF[b] > 0.7) - { - firstVarCDFLargerThanVal = b; - break; - } - } - if (meanBinVar < 2.52 || binVarSortDsdCDF[firstBinVarLessThanVal2] > 0.5) - { - *reshapeTH1 = 2.2; - *reshapeTH2 = (binVarSortDsd[firstVarCDFLargerThanVal] + binVarSortDsd[firstVarCDFLargerThanVal - 1]) / 2.0; - } - else + m_rateAdpMode = 1; + m_reshapeCW.binCW[1] = 812; + if (m_srcSeqStats.binHist[m_binNum - 2] > 0.1 || m_srcSeqStats.binHist[1] > 0.1) { - m_reshapeCW.binCW[1] = 30; - *reshapeTH2 = 2.8; + m_rateAdpMode = 0; + m_reshapeCW.binCW[1] = 924; } } - else if (binVarSortDsdCDF[firstBinVarLessThanVal2] < 0.1 && binVarSortDsdCDF[firstBinVarLessThanVal1] > 0.02) + else if (percBinVarLessThenVal2 < 0.8 && percBinVarLessThenVal3 == 1.0) { - m_reshapeCW.binCW[0] = 36; - *reshapeTH1 = 3.5; m_rateAdpMode = 1; + m_reshapeCW.binCW[1] = 896; } - } - else if (m_reshapeCW.rspPicSize > 660480) - { - *reshapeTH1 = 2.5; - *reshapeTH2 = 4.5; - m_rateAdpMode = 1; - - if (binVarSortDsdCDF[firstBinVarLessThanVal4] > 0.6) + else if (percBinVarLessThenVal2 > 0.98 && m_srcSeqStats.binHist[1] > 0.05) { - if (maxBinVar < 3.5) - { - *reshapeTH1 = 2.0; - } + m_rateAdpMode = 0; + m_reshapeCW.binCW[1] = 784; } - else + else if (percBinVarLessThenVal2 < 0.1) { - if (maxBinVar > 3.3) - { - m_reshapeCW.binCW[0] = 35; - } - else - { - *reshapeTH1 = 2.8; - m_reshapeCW.binCW[0] = 35; - } + m_rateAdpMode = 0; + m_reshapeCW.binCW[1] = 1022; } } - else if (m_reshapeCW.rspPicSize > 249600) + if (m_srcSeqStats.binHist[1] > 0.1 && (m_srcSeqStats.binVar[1] > 1.8 && m_srcSeqStats.binVar[1] < 3.0)) { m_rateAdpMode = 1; - m_reshapeCW.binCW[0] = 36; - *reshapeTH1 = 2.5; - *reshapeTH2 = 4.5; + if (m_srcSeqStats.binVar[m_binNum - 2] > 1.2 && m_srcSeqStats.binVar[m_binNum - 2] < 4.0) { m_reshapeCW.binCW[1] = 784; } } - else + else if (m_srcSeqStats.binHist[m_binNum - 2] < 0.001) { - if (binVarSortDsdCDF[firstBinVarLessThanVal2] < 0.33 && m_reshapeCW.rspFps>40) + if (m_srcSeqStats.binHist[1] > 0.05 && m_srcSeqStats.binVar[1] > 3.0) { - *intraAdp = false; - *interAdp = false; + m_rateAdpMode = 1; + m_reshapeCW.binCW[1] = 784; } - else + else if (m_srcSeqStats.binHist[1] < 0.006) { - m_rateAdpMode = 1; - m_reshapeCW.binCW[0] = 36; - *reshapeTH1 = 3.0; - *reshapeTH2 = 4.0; + m_rateAdpMode = 0; + m_reshapeCW.binCW[1] = 980; + } + else if (percBinVarLessThenVal3 < 0.5) + { + m_rateAdpMode = 0; + m_reshapeCW.binCW[1] = 924; } } + else if ((m_srcSeqStats.maxBinVar > 4.0 && m_rspSeqStats.meanBinVar > 3.2 && percBinVarLessThenVal2 < 0.25) || ratioWeiVar < 1.03) + { + m_rateAdpMode = 0; + m_reshapeCW.binCW[1] = 980; + } } else { - if (m_reshapeCW.rspPicSize > 5184000) + m_useAdpCW = true; + m_reshapeCW.binCW[0] = 36; m_reshapeCW.binCW[1] = 30; + if (isLowCase) { - m_reshapeCW.binCW[0] = 40; - *reshapeTH2 = 4.0; - m_rateAdpMode = 2; - - if (maxBinVar < 2.4) - { - *reshapeTH1 = 3.0; - if (m_reshapeCW.rspBaseQP <= 22) - m_tcase = 3; - } - else if (maxBinVar > 3.0) + if (m_srcSeqStats.binHist[m_binNum - 2] > 0.05) { - if (minBinVar > 1) - { - m_reshapeCW.binCW[0] = 36; - *reshapeTH1 = 2.8; - *reshapeTH2 = 3.5; - m_sliceReshapeInfo.enableChromaAdj = 0; - m_chromaWeight = 1.05; - m_rateAdpMode = 0; - } - else - { - m_reshapeCW.binCW[0] = 36; - *reshapeTH1 = 2.2; - *reshapeTH2 = 3.5; - m_sliceReshapeInfo.enableChromaAdj = 0; - m_chromaWeight = 0.95; - } - } - else - { - *reshapeTH1 = 1.5; + m_useAdpCW = false; + m_rateAdpMode = 1; + m_reshapeCW.binCW[1] = 896; + if (m_srcSeqStats.binHist[1] > 0.005) { m_rateAdpMode = 0; } } + else if (percBinVarLessThenVal2 < 0.8 && percBinVarLessThenVal3 == 1.0) { m_reshapeCW.binCW[1] = 28; } } - else if (m_reshapeCW.rspPicSize > 1497600) + if (m_srcSeqStats.binHist[1] > 0.1 && m_srcSeqStats.binVar[1] > 1.8 && m_srcSeqStats.binVar[1] < 3.0) { - *reshapeTH1 = 2.5; - *reshapeTH2 = 4.5; + m_useAdpCW = false; m_rateAdpMode = 1; - - if (meanBinVar < 2.52) - { - *intraAdp = true; - m_rateAdpMode = 0; - m_tcase = 9; - } - else - { - if (binVarSortDsdCDF[firstBinVarLessThanVal2] > 0.5) - { - *reshapeTH2 = 3.0; - *intraAdp = true; - } - else if (binVarSortDsdCDF[firstBinVarLessThanVal2] < 0.1 && binVarSortDsdCDF[firstBinVarLessThanVal1] > 0.02) - { - *reshapeTH1 = 3.0; - *intraAdp = true; - m_rateAdpMode = 0; - m_tcase = 9; - } - else if (binVarSortDsdCDF[firstBinVarLessThanVal2] > 0.25) - { - *reshapeTH1 = 2.4; - m_reshapeCW.binCW[0] = 36; - } - else - { - *reshapeTH1 = 2.4; - m_reshapeCW.binCW[0] = 36; - } - } + m_reshapeCW.binCW[1] = 952; } - else if (m_reshapeCW.rspPicSize > 660480) + else if (m_srcSeqStats.binHist[1] > 0.05 && m_srcSeqStats.binHist[m_binNum - 2] < 0.001 && m_srcSeqStats.binVar[1] > 3.0) { - *intraAdp = true; + m_useAdpCW = false; m_rateAdpMode = 1; - - if (binVarSortDsdCDF[firstBinVarLessThanVal4] > 0.6) - { - if (maxBinVar < 3.5) - { - *reshapeTH1 = 2.1; - *reshapeTH2 = 3.5; - } - else - { - *reshapeTH1 = 2.4; - *reshapeTH2 = 4.5; - m_reshapeCW.binCW[0] = 40; - m_rateAdpMode = 0; - } - } - else - { - if (maxBinVar > 3.3) - { - *reshapeTH1 = 3.5; - *reshapeTH2 = 3.8; - } - else - { - *reshapeTH1 = 3.0; - *reshapeTH2 = 4.0; - m_reshapeCW.binCW[1] = 30; - } - } + m_reshapeCW.binCW[1] = 784; } - else if (m_reshapeCW.rspPicSize > 249600) + else if (m_srcSeqStats.binHist[1] > 0.05 && m_srcSeqStats.binHist[m_binNum - 2] < 0.005 && m_srcSeqStats.binVar[1] > 1.0 && m_srcSeqStats.binVar[1] < 1.5) { - m_reshapeCW.binCW[1] = 30; - *reshapeTH1 = 2.5; - *reshapeTH2 = 4.5; - *intraAdp = true; - m_rateAdpMode = 1; - - if (minBinVar > 2.6) - { - *reshapeTH1 = 3.2; - m_rateAdpMode = 0; - m_tcase = 9; - } - else { - double diff1 = binVarSortDsdCDF[firstBinVarLessThanVal4] - binVarSortDsdCDF[firstBinVarLessThanVal3]; - double diff2 = binVarSortDsdCDF[firstBinVarLessThanVal2] - binVarSortDsdCDF[firstBinVarLessThanVal1]; - if (diff1 > 0.4 || binVarSortDsdCDF[firstBinVarLessThanVal1] > 0.1) - { - *reshapeTH1 = 2.9; - *intraAdp = false; - } - else - { - if (diff2 > 0.1) - { - *reshapeTH1 = 2.5; - } - else - { - *reshapeTH1 = 2.9; - if (binVarSortDsdCDF[firstBinVarLessThanVal4] > 0.99 && binVarSortDsdCDF[firstBinVarLessThanVal3] > 0.642 && binVarSortDsdCDF[firstBinVarLessThanVal2] > 0.03) - { - m_rateAdpMode = 0; - m_tcase = 9; - } - } - } - } + m_rateAdpMode = 2; + m_reshapeCW.binCW[0] = 38; } - else + else if (m_srcSeqStats.binHist[1] < 0.005 && m_srcSeqStats.binHist[m_binNum - 2] > 0.05 && m_srcSeqStats.binVar[m_binNum - 2] > 1.0 && m_srcSeqStats.binVar[m_binNum - 2] < 1.5) { + m_rateAdpMode = 2; m_reshapeCW.binCW[0] = 36; - m_reshapeCW.binCW[1] = 30; - *reshapeTH1 = 2.6; - *reshapeTH2 = 4.5; - *intraAdp = true; + } + else if (m_srcSeqStats.binHist[1] > 0.02 && m_srcSeqStats.binHist[m_binNum - 2] > 0.04 && m_srcSeqStats.binVar[1] < 2.0 && m_srcSeqStats.binVar[m_binNum - 2] < 1.5) + { + m_rateAdpMode = 2; + m_reshapeCW.binCW[0] = 34; + } + else if ((m_srcSeqStats.binHist[1] > 0.05 && m_srcSeqStats.binHist[m_binNum - 2] > 0.2 && m_srcSeqStats.binVar[1] > 3.0 && m_srcSeqStats.binVar[1] < 4.0) || ratioWeiVar < 1.03) + { m_rateAdpMode = 1; - if (binVarSortDsdCDF[firstBinVarLessThanVal2] > 0.5 && maxBinVar < 4.7) - { - *reshapeTH1 = 3.4; - } + m_reshapeCW.binCW[0] = 34; + } + else if (m_srcSeqStats.binVar[1] < 4.0 && percBinVarLessThenVal2 == 1.0 && percBinVarLessThenVal3 == 1.0) + { + m_rateAdpMode = 0; + m_reshapeCW.binCW[0] = 34; } + if (m_useAdpCW && !isLowCase) { m_reshapeCW.binCW[1] = 66 - m_reshapeCW.binCW[0]; } } } @@ -1042,6 +935,12 @@ void EncReshape::initLUTfromdQPModel() { m_binCW[i] = m_reshapePivot[i + 1] - m_reshapePivot[i]; } + for (int i = 0; i <= PIC_CODE_CW_BINS; i++) + { + m_inputPivot[i] = m_initCW * i; + } + + adjustLmcsPivot(); int maxAbsDeltaCW = 0, absDeltaCW = 0, deltaCW = 0; for (int i = m_sliceReshapeInfo.reshaperModelMinBinIdx; i <= m_sliceReshapeInfo.reshaperModelMaxBinIdx; i++) @@ -1055,127 +954,50 @@ void EncReshape::initLUTfromdQPModel() for (int i = 0; i < pwlFwdLUTsize; i++) { - int16_t Y1 = m_reshapePivot[i]; - int16_t Y2 = m_reshapePivot[i + 1]; - m_fwdLUT[i*pwlFwdBinLen] = Clip3((Pel)0, (Pel)((1 << m_lumaBD) - 1), (Pel)Y1); - int log2PwlFwdBinLen = floorLog2(pwlFwdBinLen); - int32_t scale = ((int32_t)(Y2 - Y1) * (1 << FP_PREC) + (1 << (log2PwlFwdBinLen - 1))) >> (log2PwlFwdBinLen); - for (int j = 1; j < pwlFwdBinLen; j++) + m_fwdScaleCoef[i] = ((int32_t)m_binCW[i] * (1 << FP_PREC) + (1 << (floorLog2(pwlFwdBinLen) - 1))) >> floorLog2(pwlFwdBinLen); + if (m_binCW[i] == 0) { - int tempVal = Y1 + (((int32_t)scale * (int32_t)j + (1 << (FP_PREC - 1))) >> FP_PREC); - m_fwdLUT[i*pwlFwdBinLen + j] = Clip3((Pel)0, (Pel)((1<<m_lumaBD) -1), (Pel)tempVal); + m_invScaleCoef[i] = 0; + m_chromaAdjHelpLUT[i] = 1 << CSCALE_FP_PREC; + } + else + { + m_invScaleCoef[i] = (int32_t)(m_initCW * (1 << FP_PREC) / m_binCW[i]); + m_chromaAdjHelpLUT[i] = (int32_t)(m_initCW * (1 << FP_PREC) / (m_binCW[i] + m_sliceReshapeInfo.chrResScalingOffset)); } } - reverseLUT(m_fwdLUT, m_invLUT, m_reshapeLUTSize); - updateChromaScaleLUT(); + for (int lumaSample = 0; lumaSample < m_reshapeLUTSize; lumaSample++) + { + int idxY = lumaSample / m_initCW; + int tempVal = m_reshapePivot[idxY] + ((m_fwdScaleCoef[idxY] * (lumaSample - m_inputPivot[idxY]) + (1 << (FP_PREC - 1))) >> FP_PREC); + m_fwdLUT[lumaSample] = Clip3((Pel)0, (Pel)((1 << m_lumaBD) - 1), (Pel)(tempVal)); + + int idxYInv = getPWLIdxInv(lumaSample); + int invSample = m_inputPivot[idxYInv] + ((m_invScaleCoef[idxYInv] * (lumaSample - m_reshapePivot[idxYInv]) + (1 << (FP_PREC - 1))) >> FP_PREC); + m_invLUT[lumaSample] = Clip3((Pel)0, (Pel)((1 << m_lumaBD) - 1), (Pel)(invSample)); + } } -void EncReshape::constructReshaperSDR() +void EncReshape::constructReshaperLMCS() { int bdShift = m_lumaBD - 10; - int usedCW = 0; - int totCW = bdShift != 0 ? (bdShift > 0 ? m_reshapeLUTSize / (1<<bdShift) : m_reshapeLUTSize * (1 << (-bdShift))) : m_reshapeLUTSize; - int histBins = PIC_ANALYZE_CW_BINS; - int histLenth = totCW/histBins; + int totCW = bdShift != 0 ? (bdShift > 0 ? m_reshapeLUTSize / (1 << bdShift) : m_reshapeLUTSize * (1 << (-bdShift))) : m_reshapeLUTSize; + int histLenth = totCW / m_binNum; int log2HistLenth = floorLog2(histLenth); - int16_t *tempFwdLUT = new int16_t[m_reshapeLUTSize + 1](); - int i, j; - int cwScaleBins1, cwScaleBins2; - int maxAllowedCW = totCW-1; + int i; - cwScaleBins1 = m_reshapeCW.binCW[0]; - cwScaleBins2 = m_reshapeCW.binCW[1]; - - for (i = 0; i < histBins; i++) - usedCW += m_binCW[i]; - - if (usedCW > maxAllowedCW) + if (m_binNum == PIC_ANALYZE_CW_BINS) { - int cnt0 = 0, cnt1 = 0, cnt2 = 0; - for (i = 0; i < histBins; i++) - { - if (m_binCW[i] == histLenth + 1) cnt0++; - else if (m_binCW[i] == cwScaleBins1) cnt1++; - else if (m_binCW[i] == cwScaleBins2) cnt2++; - } - - int resCW = usedCW - maxAllowedCW; - int cwReduce1 = (cwScaleBins1 - histLenth - 1) * cnt1; - int cwReduce2 = (histLenth + 1 - cwScaleBins2) * cnt0; - - if (resCW <= cwReduce1) - { - int idx = 0; - while (resCW > 0) - { - if (m_binCW[idx] > (histLenth + 1)) - { - m_binCW[idx]--; - resCW--; - } - idx++; - if (idx == histBins) - idx = 0; - } - } - else if (resCW > cwReduce1 && resCW <= (cwReduce1 + cwReduce2)) - { - resCW -= cwReduce1; - int idx = 0; - while (resCW > 0) - { - if (m_binCW[idx] > cwScaleBins2 && m_binCW[idx] < cwScaleBins1) - { - m_binCW[idx]--; - resCW--; - } - idx++; - if (idx == histBins) - idx = 0; - } - for (i = 0; i < histBins; i++) - { - if (m_binCW[i] == cwScaleBins1) - m_binCW[i] = histLenth + 1; - } - } - else if (resCW > (cwReduce1 + cwReduce2)) + for (int i = 0; i < PIC_CODE_CW_BINS; i++) { - resCW -= (cwReduce1 + cwReduce2); - int idx = 0; - while (resCW > 0) - { - if (m_binCW[idx] > 0 && m_binCW[idx] < (histLenth + 1)) - { - m_binCW[idx]--; - resCW--; - } - idx++; - if (idx == histBins) - idx = 0; - } - for (i = 0; i < histBins; i++) - { - if (m_binCW[i] == histLenth + 1) - m_binCW[i] = cwScaleBins2; - if (m_binCW[i] == cwScaleBins1) - m_binCW[i] = histLenth + 1; - } + m_binCW[i] = m_binCW[2 * i] + m_binCW[2 * i + 1]; } } - - if (bdShift != 0) + for (int i = 0; i <= PIC_CODE_CW_BINS; i++) { - for (int i = 0; i < PIC_ANALYZE_CW_BINS; i++) - { - m_binCW[i] = bdShift > 0 ? m_binCW[i] * (1 << bdShift) : m_binCW[i] / (1 << (-bdShift)); - } + m_inputPivot[i] = m_initCW * i; } - for (int i = 0; i < PIC_CODE_CW_BINS; i++) - { - m_binCW[i] = m_binCW[2 * i] + m_binCW[2 * i + 1]; - } m_sliceReshapeInfo.reshaperModelMinBinIdx = 0; m_sliceReshapeInfo.reshaperModelMaxBinIdx = PIC_CODE_CW_BINS - 1; for (int i = 0; i < PIC_CODE_CW_BINS; i++) @@ -1195,13 +1017,23 @@ void EncReshape::constructReshaperSDR() } } + if (bdShift != 0) + { + for (int i = 0; i < PIC_ANALYZE_CW_BINS; i++) + { + m_binCW[i] = bdShift > 0 ? m_binCW[i] * (1 << bdShift) : m_binCW[i] / (1 << (-bdShift)); + } + } + + adjustLmcsPivot(); + int maxAbsDeltaCW = 0, absDeltaCW = 0, deltaCW = 0; for (int i = m_sliceReshapeInfo.reshaperModelMinBinIdx; i <= m_sliceReshapeInfo.reshaperModelMaxBinIdx; i++) { deltaCW = (int)m_binCW[i] - (int)m_initCW; m_sliceReshapeInfo.reshaperModelBinCWDelta[i] = deltaCW; absDeltaCW = (deltaCW < 0) ? (-deltaCW) : deltaCW; - if (absDeltaCW > maxAbsDeltaCW) { maxAbsDeltaCW = absDeltaCW; } + if (absDeltaCW > maxAbsDeltaCW) { maxAbsDeltaCW = absDeltaCW; } } m_sliceReshapeInfo.maxNbitsNeededDeltaCW = std::max(1, 1 + floorLog2(maxAbsDeltaCW)); @@ -1209,40 +1041,109 @@ void EncReshape::constructReshaperSDR() log2HistLenth = floorLog2(histLenth); int sumBins = 0; - for (i = 0; i < PIC_CODE_CW_BINS; i++) { sumBins += m_binCW[i]; } + for (i = 0; i < PIC_CODE_CW_BINS; i++) { sumBins += m_binCW[i]; } CHECK(sumBins >= m_reshapeLUTSize, "SDR CW assignment is wrong!!"); - memset(tempFwdLUT, 0, (m_reshapeLUTSize + 1) * sizeof(int16_t)); - tempFwdLUT[0] = 0; - - for (i = 0; i < PIC_CODE_CW_BINS; i++) + for (int i = 0; i < PIC_CODE_CW_BINS; i++) { - tempFwdLUT[(i + 1)*histLenth] = tempFwdLUT[i*histLenth] + m_binCW[i]; - int16_t Y1 = tempFwdLUT[i*histLenth]; - int16_t Y2 = tempFwdLUT[(i + 1)*histLenth]; - m_reshapePivot[i + 1] = Y2; - int32_t scale = ((int32_t)(Y2 - Y1) * (1 << FP_PREC) + (1 << (log2HistLenth - 1))) >> (log2HistLenth); - m_fwdLUT[i*histLenth] = Clip3((Pel)0, (Pel)((1 << m_lumaBD) - 1), (Pel)Y1); - for (j = 1; j < histLenth; j++) + m_reshapePivot[i + 1] = m_reshapePivot[i] + m_binCW[i]; + m_fwdScaleCoef[i] = ((int32_t)m_binCW[i] * (1 << FP_PREC) + (1 << (log2HistLenth - 1))) >> log2HistLenth; + if (m_binCW[i] == 0) + { + m_invScaleCoef[i] = 0; + m_chromaAdjHelpLUT[i] = 1 << CSCALE_FP_PREC; + } + else { - tempFwdLUT[i*histLenth + j] = Y1 + (((int32_t)scale * (int32_t)j + (1 << (FP_PREC - 1))) >> FP_PREC); - m_fwdLUT[i*histLenth + j] = Clip3((Pel)0, (Pel)((1 << m_lumaBD) - 1), (Pel)tempFwdLUT[i*histLenth + j]); + m_invScaleCoef[i] = (int32_t)(m_initCW * (1 << FP_PREC) / m_binCW[i]); + m_chromaAdjHelpLUT[i] = (int32_t)(m_initCW * (1 << FP_PREC) / (m_binCW[i] + m_sliceReshapeInfo.chrResScalingOffset)); } } + for (int lumaSample = 0; lumaSample < m_reshapeLUTSize; lumaSample++) + { + int idxY = lumaSample / m_initCW; + int tempVal = m_reshapePivot[idxY] + ((m_fwdScaleCoef[idxY] * (lumaSample - m_inputPivot[idxY]) + (1 << (FP_PREC - 1))) >> FP_PREC); + m_fwdLUT[lumaSample] = Clip3((Pel)0, (Pel)((1 << m_lumaBD) - 1), (Pel)(tempVal)); + int idxYInv = getPWLIdxInv(lumaSample); + int invSample = m_inputPivot[idxYInv] + ((m_invScaleCoef[idxYInv] * (lumaSample - m_reshapePivot[idxYInv]) + (1 << (FP_PREC - 1))) >> FP_PREC); + m_invLUT[lumaSample] = Clip3((Pel)0, (Pel)((1 << m_lumaBD) - 1), (Pel)(invSample)); + } for (i = 0; i < PIC_CODE_CW_BINS; i++) { int start = i*histLenth; int end = (i + 1)*histLenth - 1; m_cwLumaWeight[i] = m_fwdLUT[end] - m_fwdLUT[start]; } +} + +void EncReshape::adjustLmcsPivot() +{ + int bdShift = m_lumaBD - 10; + int totCW = bdShift != 0 ? (bdShift > 0 ? m_reshapeLUTSize / (1 << bdShift) : m_reshapeLUTSize * (1 << (-bdShift))) : m_reshapeLUTSize; + int orgCW = totCW / PIC_CODE_CW_BINS; + int log2SegSize = m_lumaBD - floorLog2(LMCS_SEG_NUM); + + m_reshapePivot[0] = 0; + for (int i = 0; i < PIC_CODE_CW_BINS; i++) + { + m_reshapePivot[i + 1] = m_reshapePivot[i] + m_binCW[i]; + } + int segIdxMax = (m_reshapePivot[m_sliceReshapeInfo.reshaperModelMaxBinIdx + 1] >> log2SegSize); + for (int i = m_sliceReshapeInfo.reshaperModelMinBinIdx; i <= m_sliceReshapeInfo.reshaperModelMaxBinIdx; i++) + { + m_reshapePivot[i + 1] = m_reshapePivot[i] + m_binCW[i]; + int segIdxCurr = (m_reshapePivot[i] >> log2SegSize); + int segIdxNext = (m_reshapePivot[i + 1] >> log2SegSize); - if (tempFwdLUT != nullptr) { delete[] tempFwdLUT; tempFwdLUT = nullptr; } + if ((segIdxCurr == segIdxNext) && (m_reshapePivot[i] != (segIdxCurr << log2SegSize))) + { + if (segIdxCurr == segIdxMax) + { + m_reshapePivot[i] = m_reshapePivot[m_sliceReshapeInfo.reshaperModelMaxBinIdx + 1]; + for (int j = i; j <= m_sliceReshapeInfo.reshaperModelMaxBinIdx; j++) + { + m_reshapePivot[j + 1] = m_reshapePivot[i]; + m_binCW[j] = 0; + } + m_binCW[i - 1] = m_reshapePivot[i] - m_reshapePivot[i - 1]; + break; + } + else + { + int16_t adjustVal = ((segIdxCurr + 1) << log2SegSize) - m_reshapePivot[i + 1]; + m_reshapePivot[i + 1] += adjustVal; + m_binCW[i] += adjustVal; + + for (int j = i + 1; j <= m_sliceReshapeInfo.reshaperModelMaxBinIdx; j++) + { + if (m_binCW[j] < (adjustVal + (orgCW >> 3))) + { + adjustVal -= (m_binCW[j] - (orgCW >> 3)); + m_binCW[j] = (orgCW >> 3); + } + else + { + m_binCW[j] -= adjustVal; + adjustVal = 0; + } + if (adjustVal == 0) + break; + } + } + } + } - reverseLUT(m_fwdLUT, m_invLUT, m_reshapeLUTSize); - updateChromaScaleLUT(); + for (int i = PIC_CODE_CW_BINS - 1; i >= 0; i--) + { + if (m_binCW[i] > 0) + { + m_sliceReshapeInfo.reshaperModelMaxBinIdx = i; + break; + } + } } -#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM +#if ENABLE_SPLIT_PARALLELISM void EncReshape::copyState(const EncReshape &other) { m_srcReshaped = other.m_srcReshaped; @@ -1274,6 +1175,9 @@ void EncReshape::copyState(const EncReshape &other) m_initCW = other.m_initCW; m_reshape = other.m_reshape; m_reshapePivot = other.m_reshapePivot; + m_inputPivot = other.m_inputPivot; + m_fwdScaleCoef = other.m_fwdScaleCoef; + m_invScaleCoef = other.m_invScaleCoef; m_lumaBD = other.m_lumaBD; m_reshapeLUTSize = other.m_reshapeLUTSize; } diff --git a/source/Lib/EncoderLib/EncReshape.h b/source/Lib/EncoderLib/EncReshape.h index 6e4871866a05ae33047cd22d06befe504902211c..ba9b5195058d2c216e599f37ab1c4d6ed83f90f3 100644 --- a/source/Lib/EncoderLib/EncReshape.h +++ b/source/Lib/EncoderLib/EncReshape.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -49,6 +49,20 @@ // ==================================================================================================================== // Class definition // ==================================================================================================================== +struct SeqInfo +{ + double binVar[PIC_ANALYZE_CW_BINS]; + double binHist[PIC_ANALYZE_CW_BINS]; + double normVar[PIC_ANALYZE_CW_BINS]; + int nonZeroCnt; + double weightVar; + double weightNorm; + double minBinVar; + double maxBinVar; + double meanBinVar; + double ratioStdU; + double ratioStdV; +}; class EncReshape : public Reshape { @@ -71,6 +85,9 @@ private: Pel m_cwLumaWeight[PIC_CODE_CW_BINS]; double m_chromaWeight; int m_chromaAdj; + int m_binNum; + SeqInfo m_srcSeqStats; + SeqInfo m_rspSeqStats; public: EncReshape(); @@ -81,20 +98,25 @@ public: bool getSrcReshaped() { return m_srcReshaped; } void setSrcReshaped(bool b) { m_srcReshaped = b; } - void preAnalyzerSDR(Picture *pcPic, const SliceType sliceType, const ReshapeCW& reshapeCW, bool isDualT); + void initSeqStats(SeqInfo &stats); + void calcSeqStats(Picture *pcPic, SeqInfo &stats); + void preAnalyzerLMCS(Picture *pcPic, const uint32_t signalType, const SliceType sliceType, const ReshapeCW& reshapeCW); void preAnalyzerHDR(Picture *pcPic, const SliceType sliceType, const ReshapeCW& reshapeCW, bool isDualT); void bubbleSortDsd(double *array, int * idx, int n); void swap(int *xp, int *yp) { int temp = *xp; *xp = *yp; *yp = temp; } void swap(double *xp, double *yp) { double temp = *xp; *xp = *yp; *yp = temp; } - void deriveReshapeParametersSDRfromStats(uint32_t *, double*, double* reshapeTH1, double* reshapeTH2, bool *intraAdp, bool *interAdp); + void cwPerturbation(int startBinIdx, int endBinIdx, uint16_t maxCW); + void cwReduction(int startBinIdx, int endBinIdx); + void deriveReshapeParametersSDR(bool *intraAdp, bool *interAdp); void deriveReshapeParameters(double *array, int start, int end, ReshapeCW respCW, double &alpha, double &beta); void initLUTfromdQPModel(); - void constructReshaperSDR(); + void constructReshaperLMCS(); ReshapeCW * getReshapeCW() { return &m_reshapeCW; } Pel * getWeightTable() { return m_cwLumaWeight; } double getCWeight() { return m_chromaWeight; } + void adjustLmcsPivot(); -#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM +#if ENABLE_SPLIT_PARALLELISM void copyState(const EncReshape& other); #endif };// END CLASS DEFINITION EncReshape diff --git a/source/Lib/EncoderLib/EncSampleAdaptiveOffset.cpp b/source/Lib/EncoderLib/EncSampleAdaptiveOffset.cpp index 8e467b2eec9fe3c335f9602e6efbe696cce1ab3a..15ed5b0e24d0cf823d5a44eeb7d3cb83702b4fbe 100644 --- a/source/Lib/EncoderLib/EncSampleAdaptiveOffset.cpp +++ b/source/Lib/EncoderLib/EncSampleAdaptiveOffset.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -79,6 +79,8 @@ inline double xRoundIbdi(int bitDepth, double x) EncSampleAdaptiveOffset::EncSampleAdaptiveOffset() { m_CABACEstimator = NULL; + + ::memset( m_saoDisabledRate, 0, sizeof( m_saoDisabledRate ) ); } EncSampleAdaptiveOffset::~EncSampleAdaptiveOffset() @@ -113,7 +115,6 @@ void EncSampleAdaptiveOffset::createEncData(bool isPreDBFSamplesUsed, uint32_t n } - ::memset(m_saoDisabledRate, 0, sizeof(m_saoDisabledRate)); for(int typeIdc=0; typeIdc < NUM_SAO_NEW_TYPES; typeIdc++) { @@ -210,11 +211,7 @@ void EncSampleAdaptiveOffset::SAOProcess( CodingStructure& cs, bool* sliceEnable #if ENABLE_QPA const double lambdaChromaWeight, #endif -#if K0238_SAO_GREEDY_MERGE_ENCODING const bool bTestSAODisableAtPictureLevel, const double saoEncodingRate, const double saoEncodingRateChroma, const bool isPreDBFSamplesUsed, bool isGreedyMergeEncoding ) -#else - const bool bTestSAODisableAtPictureLevel, const double saoEncodingRate, const double saoEncodingRateChroma, const bool isPreDBFSamplesUsed ) -#endif { PelUnitBuf org = cs.getOrgBuf(); PelUnitBuf res = cs.getRecoBuf(); @@ -239,11 +236,7 @@ void EncSampleAdaptiveOffset::SAOProcess( CodingStructure& cs, bool* sliceEnable #if ENABLE_QPA lambdaChromaWeight, #endif -#if K0238_SAO_GREEDY_MERGE_ENCODING saoEncodingRate, saoEncodingRateChroma, isGreedyMergeEncoding ); -#else - saoEncodingRate, saoEncodingRateChroma ); -#endif DTRACE_UPDATE(g_trace_ctx, (std::make_pair("poc", cs.slice->getPOC()))); DTRACE_PIC_COMP(D_REC_CB_LUMA_SAO, cs, cs.getRecoBuf(), COMPONENT_Y); @@ -253,7 +246,6 @@ void EncSampleAdaptiveOffset::SAOProcess( CodingStructure& cs, bool* sliceEnable DTRACE ( g_trace_ctx, D_CRC, "SAO" ); DTRACE_CRC( g_trace_ctx, D_CRC, cs, cs.getRecoBuf() ); - xPCMLFDisableProcess(cs); } @@ -311,6 +303,13 @@ void EncSampleAdaptiveOffset::getStatistics(std::vector<SAOStatData**>& blkStats isBelowAvail = (yPos + pcv.maxCUHeight < pcv.lumaHeight); isAboveRightAvail = ((yPos > 0) && (isRightAvail)); + int numHorVirBndry = 0, numVerVirBndry = 0; + int horVirBndryPos[] = { -1,-1,-1 }; + int verVirBndryPos[] = { -1,-1,-1 }; + int horVirBndryPosComp[] = { -1,-1,-1 }; + int verVirBndryPosComp[] = { -1,-1,-1 }; + bool isCtuCrossedByVirtualBoundaries = isCrossedByVirtualBoundaries(xPos, yPos, width, height, numHorVirBndry, numVerVirBndry, horVirBndryPos, verVirBndryPos, cs.picHeader ); + for(int compIdx = 0; compIdx < numberOfComponents; compIdx++) { const ComponentID compID = ComponentID(compIdx); @@ -322,10 +321,20 @@ void EncSampleAdaptiveOffset::getStatistics(std::vector<SAOStatData**>& blkStats int orgStride = orgYuv.get(compID).stride; Pel* orgBlk = orgYuv.get(compID).bufAt( compArea ); + for (int i = 0; i < numHorVirBndry; i++) + { + horVirBndryPosComp[i] = (horVirBndryPos[i] >> ::getComponentScaleY(compID, area.chromaFormat)) - compArea.y; + } + for (int i = 0; i < numVerVirBndry; i++) + { + verVirBndryPosComp[i] = (verVirBndryPos[i] >> ::getComponentScaleX(compID, area.chromaFormat)) - compArea.x; + } + getBlkStats(compID, cs.sps->getBitDepth(toChannelType(compID)), blkStats[ctuRsAddr][compID] , srcBlk, orgBlk, srcStride, orgStride, compArea.width, compArea.height , isLeftAvail, isRightAvail, isAboveAvail, isBelowAvail, isAboveLeftAvail, isAboveRightAvail , isCalculatePreDeblockSamples + , isCtuCrossedByVirtualBoundaries, horVirBndryPosComp, verVirBndryPosComp, numHorVirBndry, numVerVirBndry ); } ctuRsAddr++; @@ -604,7 +613,7 @@ void EncSampleAdaptiveOffset::deriveModeNewRDO(const BitDepths &bitDepths, int c m_CABACEstimator->resetBits(); m_CABACEstimator->sao_offset_pars( modeParam[compIdx], compIdx, sliceEnabled[compIdx], bitDepths.recon[CHANNEL_TYPE_LUMA] ); modeDist[compIdx] = 0; - minCost= m_lambda[compIdx]*(FracBitsScale*(double)m_CABACEstimator->getEstFracBits()); + minCost = m_lambda[compIdx] * (FRAC_BITS_SCALE * m_CABACEstimator->getEstFracBits()); ctxBestLuma = SAOCtx( m_CABACEstimator->getCtx() ); if(sliceEnabled[compIdx]) { @@ -626,7 +635,7 @@ void EncSampleAdaptiveOffset::deriveModeNewRDO(const BitDepths &bitDepths, int c m_CABACEstimator->getCtx() = SAOCtx( ctxStartLuma ); m_CABACEstimator->resetBits(); m_CABACEstimator->sao_offset_pars( testOffset[compIdx], compIdx, sliceEnabled[compIdx], bitDepths.recon[CHANNEL_TYPE_LUMA] ); - double rate = FracBitsScale*(double)m_CABACEstimator->getEstFracBits(); + double rate = FRAC_BITS_SCALE * m_CABACEstimator->getEstFracBits(); cost = (double)dist[compIdx] + m_lambda[compIdx]*rate; if(cost < minCost) { @@ -653,7 +662,7 @@ void EncSampleAdaptiveOffset::deriveModeNewRDO(const BitDepths &bitDepths, int c modeDist [component] = 0; m_CABACEstimator->sao_offset_pars( modeParam[component], component, sliceEnabled[component], bitDepths.recon[CHANNEL_TYPE_CHROMA] ); const uint64_t currentFracBits = m_CABACEstimator->getEstFracBits(); - cost += m_lambda[component] * FracBitsScale * double( currentFracBits - previousFracBits ); + cost += m_lambda[component] * FRAC_BITS_SCALE * (currentFracBits - previousFracBits); previousFracBits = currentFracBits; } @@ -686,7 +695,7 @@ void EncSampleAdaptiveOffset::deriveModeNewRDO(const BitDepths &bitDepths, int c dist[component] = getDistortion(bitDepths.recon[CHANNEL_TYPE_CHROMA], typeIdc, testOffset[component].typeAuxInfo, invQuantOffset, blkStats[ctuRsAddr][component][typeIdc]); m_CABACEstimator->sao_offset_pars( testOffset[component], component, sliceEnabled[component], bitDepths.recon[CHANNEL_TYPE_CHROMA] ); const uint64_t currentFracBits = m_CABACEstimator->getEstFracBits(); - cost += dist[component] + (m_lambda[component] * FracBitsScale * double(currentFracBits - previousFracBits)); + cost += dist[component] + (m_lambda[component] * FRAC_BITS_SCALE * (currentFracBits - previousFracBits)); previousFracBits = currentFracBits; } @@ -712,7 +721,7 @@ void EncSampleAdaptiveOffset::deriveModeNewRDO(const BitDepths &bitDepths, int c m_CABACEstimator->getCtx() = SAOCtx( ctxStartBlk ); m_CABACEstimator->resetBits(); m_CABACEstimator->sao_block_pars( modeParam, bitDepths, sliceEnabled, (mergeList[SAO_MERGE_LEFT]!= NULL), (mergeList[SAO_MERGE_ABOVE]!= NULL), false ); - modeNormCost += FracBitsScale*(double)m_CABACEstimator->getEstFracBits(); + modeNormCost += FRAC_BITS_SCALE * m_CABACEstimator->getEstFracBits(); } void EncSampleAdaptiveOffset::deriveModeMergeRDO(const BitDepths &bitDepths, int ctuRsAddr, SAOBlkParam* mergeList[NUM_SAO_MERGE_TYPES], bool* sliceEnabled, std::vector<SAOStatData**>& blkStats, SAOBlkParam& modeParam, double& modeNormCost ) @@ -755,7 +764,7 @@ void EncSampleAdaptiveOffset::deriveModeMergeRDO(const BitDepths &bitDepths, int m_CABACEstimator->getCtx() = SAOCtx( ctxStart ); m_CABACEstimator->resetBits(); m_CABACEstimator->sao_block_pars( testBlkParam, bitDepths, sliceEnabled, (mergeList[SAO_MERGE_LEFT]!= NULL), (mergeList[SAO_MERGE_ABOVE]!= NULL), false ); - double rate = FracBitsScale*(double)m_CABACEstimator->getEstFracBits(); + double rate = FRAC_BITS_SCALE * m_CABACEstimator->getEstFracBits(); cost = normDist+rate; if(cost < modeNormCost) @@ -776,11 +785,7 @@ void EncSampleAdaptiveOffset::decideBlkParams(CodingStructure& cs, bool* sliceEn #if ENABLE_QPA const double chromaWeight, #endif -#if K0238_SAO_GREEDY_MERGE_ENCODING const double saoEncodingRate, const double saoEncodingRateChroma, const bool isGreedymergeEncoding) -#else - const double saoEncodingRate, const double saoEncodingRateChroma) -#endif { const PreCalcValues& pcv = *cs.pcv; @@ -799,7 +804,6 @@ void EncSampleAdaptiveOffset::decideBlkParams(CodingStructure& cs, bool* sliceEn SAOBlkParam modeParam; double minCost, modeCost; -#if K0238_SAO_GREEDY_MERGE_ENCODING double minCost2 = 0; std::vector<SAOStatData**> groupBlkStat; if (isGreedymergeEncoding) @@ -824,13 +828,12 @@ void EncSampleAdaptiveOffset::decideBlkParams(CodingStructure& cs, bool* sliceEn double Cost[2] = { 0, 0 }; TempCtx ctxBeforeMerge(m_CtxCache); TempCtx ctxAfterMerge(m_CtxCache); -#endif double totalCost = 0; // Used if bTestSAODisableAtPictureLevel==true int ctuRsAddr = 0; #if ENABLE_QPA - CHECK ((chromaWeight > 0.0) && (cs.slice->getSliceCurStartCtuTsAddr() != 0), "incompatible start CTU address, must be 0"); + CHECK ((chromaWeight > 0.0) && (cs.slice->getFirstCtuRsAddrInSlice() != 0), "incompatible start CTU address, must be 0"); #endif for( uint32_t yPos = 0; yPos < pcv.lumaHeight; yPos += pcv.maxCUHeight ) @@ -850,12 +853,10 @@ void EncSampleAdaptiveOffset::decideBlkParams(CodingStructure& cs, bool* sliceEn const TempCtx ctxStart ( m_CtxCache, SAOCtx( m_CABACEstimator->getCtx() ) ); TempCtx ctxBest ( m_CtxCache ); -#if K0238_SAO_GREEDY_MERGE_ENCODING if (ctuRsAddr == (mergeCtuAddr - 1)) { ctxBeforeMerge = SAOCtx(m_CABACEstimator->getCtx()); } -#endif //get merge list SAOBlkParam* mergeList[NUM_SAO_MERGE_TYPES] = { NULL }; @@ -903,14 +904,10 @@ void EncSampleAdaptiveOffset::decideBlkParams(CodingStructure& cs, bool* sliceEn } } //mode -#if K0238_SAO_GREEDY_MERGE_ENCODING if (!isGreedymergeEncoding) { -#endif totalCost += minCost; -#if K0238_SAO_GREEDY_MERGE_ENCODING } -#endif m_CABACEstimator->getCtx() = SAOCtx( ctxBest ); @@ -919,7 +916,6 @@ void EncSampleAdaptiveOffset::decideBlkParams(CodingStructure& cs, bool* sliceEn reconParams[ctuRsAddr] = codedParams[ctuRsAddr]; reconstructBlkSAOParam(reconParams[ctuRsAddr], mergeList); -#if K0238_SAO_GREEDY_MERGE_ENCODING if (isGreedymergeEncoding) { if (ctuRsAddr == (mergeCtuAddr - 1)) @@ -964,7 +960,7 @@ void EncSampleAdaptiveOffset::decideBlkParams(CodingStructure& cs, bool* sliceEn testBlkParam[COMPONENT_Y].typeIdc = SAO_MERGE_LEFT; m_CABACEstimator->resetBits(); m_CABACEstimator->sao_block_pars(testBlkParam, cs.sps->getBitDepths(), sliceEnabled, true, false, true); - double rate = FracBitsScale * (double)m_CABACEstimator->getEstFracBits(); + double rate = FRAC_BITS_SCALE * m_CABACEstimator->getEstFracBits(); modeCost += rate * groupSize; if (modeCost < minCost2) { @@ -1035,11 +1031,8 @@ void EncSampleAdaptiveOffset::decideBlkParams(CodingStructure& cs, bool* sliceEn } else { -#endif offsetCTU(area, srcYuv, resYuv, reconParams[ctuRsAddr], cs); -#if K0238_SAO_GREEDY_MERGE_ENCODING } -#endif ctuRsAddr++; } //ctuRsAddr @@ -1050,7 +1043,6 @@ void EncSampleAdaptiveOffset::decideBlkParams(CodingStructure& cs, bool* sliceEn if (chromaWeight > 0.0) memcpy (m_lambda, cs.slice->getLambdas(), sizeof (m_lambda)); #endif -#if K0238_SAO_GREEDY_MERGE_ENCODING //reconstruct if (isGreedymergeEncoding) { @@ -1079,7 +1071,6 @@ void EncSampleAdaptiveOffset::decideBlkParams(CodingStructure& cs, bool* sliceEn } groupBlkStat.clear(); } -#endif if (!allBlksDisabled && (totalCost >= 0) && bTestSAODisableAtPictureLevel) //SAO has not beneficial in this case - disable it { for( ctuRsAddr = 0; ctuRsAddr < pcv.sizeInCtus; ctuRsAddr++) @@ -1135,6 +1126,7 @@ void EncSampleAdaptiveOffset::getBlkStats(const ComponentID compIdx, const int c , Pel* srcBlk, Pel* orgBlk, int srcStride, int orgStride, int width, int height , bool isLeftAvail, bool isRightAvail, bool isAboveAvail, bool isBelowAvail, bool isAboveLeftAvail, bool isAboveRightAvail , bool isCalculatePreDeblockSamples + , bool isCtuCrossedByVirtualBoundaries, int horVirBndryPos[], int verVirBndryPos[], int numHorVirBndry, int numVerVirBndry ) { int x,y, startX, startY, endX, endY, edgeType, firstLineStartX, firstLineEndX; @@ -1172,6 +1164,11 @@ void EncSampleAdaptiveOffset::getBlkStats(const ComponentID compIdx, const int c for (x=startX; x<endX; x++) { signRight = (int8_t)sgn(srcLine[x] - srcLine[x+1]); + if (isCtuCrossedByVirtualBoundaries && isProcessDisabled(x, y, numVerVirBndry, 0, verVirBndryPos, horVirBndryPos)) + { + signLeft = -signRight; + continue; + } edgeType = signRight + signLeft; signLeft = -signRight; @@ -1194,6 +1191,11 @@ void EncSampleAdaptiveOffset::getBlkStats(const ComponentID compIdx, const int c for (x=startX; x<endX; x++) { signRight = (int8_t)sgn(srcLine[x] - srcLine[x+1]); + if (isCtuCrossedByVirtualBoundaries && isProcessDisabled(x, endY + y, numVerVirBndry, 0, verVirBndryPos, horVirBndryPos)) + { + signLeft = -signRight; + continue; + } edgeType = signRight + signLeft; signLeft = -signRight; @@ -1241,6 +1243,11 @@ void EncSampleAdaptiveOffset::getBlkStats(const ComponentID compIdx, const int c for (x=startX; x<endX; x++) { signDown = (int8_t)sgn(srcLine[x] - srcLineBelow[x]); + if (isCtuCrossedByVirtualBoundaries && isProcessDisabled(x, y, 0, numHorVirBndry, verVirBndryPos, horVirBndryPos)) + { + signUpLine[x] = -signDown; + continue; + } edgeType = signDown + signUpLine[x]; signUpLine[x]= -signDown; @@ -1264,6 +1271,10 @@ void EncSampleAdaptiveOffset::getBlkStats(const ComponentID compIdx, const int c for (x=startX; x<endX; x++) { + if (isCtuCrossedByVirtualBoundaries && isProcessDisabled(x, y + endY, 0, numHorVirBndry, verVirBndryPos, horVirBndryPos)) + { + continue; + } edgeType = sgn(srcLine[x] - srcLineBelow[x]) + sgn(srcLine[x] - srcLineAbove[x]); diff [edgeType] += (orgLine[x] - srcLine[x]); count[edgeType] ++; @@ -1307,6 +1318,10 @@ void EncSampleAdaptiveOffset::getBlkStats(const ComponentID compIdx, const int c firstLineEndX = (!isCalculatePreDeblockSamples) ? (isAboveAvail ? endX : 1) : endX; for(x=firstLineStartX; x<firstLineEndX; x++) { + if (isCtuCrossedByVirtualBoundaries && isProcessDisabled(x, 0, numVerVirBndry, numHorVirBndry, verVirBndryPos, horVirBndryPos)) + { + continue; + } edgeType = sgn(srcLine[x] - srcLineAbove[x-1]) - signUpLine[x+1]; diff [edgeType] += (orgLine[x] - srcLine[x]); count[edgeType] ++; @@ -1323,6 +1338,11 @@ void EncSampleAdaptiveOffset::getBlkStats(const ComponentID compIdx, const int c for (x=startX; x<endX; x++) { signDown = (int8_t)sgn(srcLine[x] - srcLineBelow[x+1]); + if (isCtuCrossedByVirtualBoundaries && isProcessDisabled(x, y, numVerVirBndry, numHorVirBndry, verVirBndryPos, horVirBndryPos)) + { + signDownLine[x + 1] = -signDown; + continue; + } edgeType = signDown + signUpLine[x]; diff [edgeType] += (orgLine[x] - srcLine[x]); count[edgeType] ++; @@ -1352,6 +1372,10 @@ void EncSampleAdaptiveOffset::getBlkStats(const ComponentID compIdx, const int c for (x=startX; x< endX; x++) { + if (isCtuCrossedByVirtualBoundaries && isProcessDisabled(x, y + endY, numVerVirBndry, numHorVirBndry, verVirBndryPos, horVirBndryPos)) + { + continue; + } edgeType = sgn(srcLine[x] - srcLineBelow[x+1]) + sgn(srcLine[x] - srcLineAbove[x-1]); diff [edgeType] += (orgLine[x] - srcLine[x]); count[edgeType] ++; @@ -1395,6 +1419,10 @@ void EncSampleAdaptiveOffset::getBlkStats(const ComponentID compIdx, const int c ; for(x=firstLineStartX; x<firstLineEndX; x++) { + if (isCtuCrossedByVirtualBoundaries && isProcessDisabled(x, 0, numVerVirBndry, numHorVirBndry, verVirBndryPos, horVirBndryPos)) + { + continue; + } edgeType = sgn(srcLine[x] - srcLineAbove[x+1]) - signUpLine[x-1]; diff [edgeType] += (orgLine[x] - srcLine[x]); count[edgeType] ++; @@ -1411,6 +1439,11 @@ void EncSampleAdaptiveOffset::getBlkStats(const ComponentID compIdx, const int c for(x=startX; x<endX; x++) { signDown = (int8_t)sgn(srcLine[x] - srcLineBelow[x-1]); + if (isCtuCrossedByVirtualBoundaries && isProcessDisabled(x, y, numVerVirBndry, numHorVirBndry, verVirBndryPos, horVirBndryPos)) + { + signUpLine[x - 1] = -signDown; + continue; + } edgeType = signDown + signUpLine[x]; diff [edgeType] += (orgLine[x] - srcLine[x]); @@ -1436,6 +1469,10 @@ void EncSampleAdaptiveOffset::getBlkStats(const ComponentID compIdx, const int c for (x=startX; x<endX; x++) { + if (isCtuCrossedByVirtualBoundaries && isProcessDisabled(x, y + endY, numVerVirBndry, numHorVirBndry, verVirBndryPos, horVirBndryPos)) + { + continue; + } edgeType = sgn(srcLine[x] - srcLineBelow[x-1]) + sgn(srcLine[x] - srcLineAbove[x+1]); diff [edgeType] += (orgLine[x] - srcLine[x]); count[edgeType] ++; @@ -1503,9 +1540,8 @@ void EncSampleAdaptiveOffset::getBlkStats(const ComponentID compIdx, const int c void EncSampleAdaptiveOffset::deriveLoopFilterBoundaryAvailibility(CodingStructure& cs, const Position &pos, bool& isLeftAvail, bool& isAboveAvail, bool& isAboveLeftAvail) const { -#if HEVC_TILES_WPP + bool isLoopFiltAcrossSlicePPS = cs.pps->getLoopFilterAcrossSlicesEnabledFlag(); bool isLoopFiltAcrossTilePPS = cs.pps->getLoopFilterAcrossTilesEnabledFlag(); -#endif const int width = cs.pcv->maxCUWidth; const int height = cs.pcv->maxCUHeight; @@ -1514,20 +1550,25 @@ void EncSampleAdaptiveOffset::deriveLoopFilterBoundaryAvailibility(CodingStructu const CodingUnit* cuAbove = cs.getCU(pos.offset(0, -height), CH_L); const CodingUnit* cuAboveLeft = cs.getCU(pos.offset(-width, -height), CH_L); + if (!isLoopFiltAcrossSlicePPS) { - isLeftAvail = (cuLeft != NULL) ? ( !CU::isSameSlice(*cuCurr, *cuLeft) ? cuCurr->slice->getLFCrossSliceBoundaryFlag() : true ) : false; - isAboveAvail = (cuAbove != NULL) ? ( !CU::isSameSlice(*cuCurr, *cuAbove) ? cuCurr->slice->getLFCrossSliceBoundaryFlag() : true ) : false; - isAboveLeftAvail = (cuAboveLeft != NULL) ? ( !CU::isSameSlice(*cuCurr, *cuAboveLeft) ? cuCurr->slice->getLFCrossSliceBoundaryFlag() : true ) : false; + isLeftAvail = (cuLeft == NULL) ? false : CU::isSameTile(*cuCurr, *cuLeft); + isAboveAvail = (cuAbove == NULL) ? false : CU::isSameTile(*cuCurr, *cuAbove); + isAboveLeftAvail = (cuAboveLeft == NULL) ? false : CU::isSameTile(*cuCurr, *cuAboveLeft); + } + else + { + isLeftAvail = (cuLeft != NULL); + isAboveAvail = (cuAbove != NULL); + isAboveLeftAvail = (cuAboveLeft != NULL); } -#if HEVC_TILES_WPP if (!isLoopFiltAcrossTilePPS) { isLeftAvail = (!isLeftAvail) ? false : CU::isSameTile(*cuCurr, *cuLeft); isAboveAvail = (!isAboveAvail) ? false : CU::isSameTile(*cuCurr, *cuAbove); isAboveLeftAvail = (!isAboveLeftAvail) ? false : CU::isSameTile(*cuCurr, *cuAboveLeft); } -#endif } //! \} diff --git a/source/Lib/EncoderLib/EncSampleAdaptiveOffset.h b/source/Lib/EncoderLib/EncSampleAdaptiveOffset.h index 048f63db9e304a53d339d06a12201a94b219bd4f..8a0530ec791d63d711cbb65ac10b94b13e93cc42 100644 --- a/source/Lib/EncoderLib/EncSampleAdaptiveOffset.h +++ b/source/Lib/EncoderLib/EncSampleAdaptiveOffset.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -93,11 +93,7 @@ public: #if ENABLE_QPA const double lambdaChromaWeight, #endif -#if K0238_SAO_GREEDY_MERGE_ENCODING const bool bTestSAODisableAtPictureLevel, const double saoEncodingRate, const double saoEncodingRateChroma, const bool isPreDBFSamplesUsed, bool isGreedyMergeEncoding ); -#else - const bool bTestSAODisableAtPictureLevel, const double saoEncodingRate, const double saoEncodingRateChroma, const bool isPreDBFSamplesUsed ); -#endif void disabledRate( CodingStructure& cs, SAOBlkParam* reconParams, const double saoEncodingRate, const double saoEncodingRateChroma ); void getPreDBFStatistics(CodingStructure& cs); @@ -110,12 +106,10 @@ private: //methods #if ENABLE_QPA const double chromaWeight, #endif -#if K0238_SAO_GREEDY_MERGE_ENCODING const double saoEncodingRate, const double saoEncodingRateChroma, const bool isGreedymergeEncoding ); -#else - const double saoEncodingRate, const double saoEncodingRateChroma ); -#endif - void getBlkStats(const ComponentID compIdx, const int channelBitDepth, SAOStatData* statsDataTypes, Pel* srcBlk, Pel* orgBlk, int srcStride, int orgStride, int width, int height, bool isLeftAvail, bool isRightAvail, bool isAboveAvail, bool isBelowAvail, bool isAboveLeftAvail, bool isAboveRightAvail, bool isCalculatePreDeblockSamples); + void getBlkStats(const ComponentID compIdx, const int channelBitDepth, SAOStatData* statsDataTypes, Pel* srcBlk, Pel* orgBlk, int srcStride, int orgStride, int width, int height, bool isLeftAvail, bool isRightAvail, bool isAboveAvail, bool isBelowAvail, bool isAboveLeftAvail, bool isAboveRightAvail, bool isCalculatePreDeblockSamples + , bool isCtuCrossedByVirtualBoundaries, int horVirBndryPos[], int verVirBndryPos[], int numHorVirBndry, int numVerVirBndry + ); void deriveModeNewRDO(const BitDepths &bitDepths, int ctuRsAddr, SAOBlkParam* mergeList[NUM_SAO_MERGE_TYPES], bool* sliceEnabled, std::vector<SAOStatData**>& blkStats, SAOBlkParam& modeParam, double& modeNormCost ); void deriveModeMergeRDO(const BitDepths &bitDepths, int ctuRsAddr, SAOBlkParam* mergeList[NUM_SAO_MERGE_TYPES], bool* sliceEnabled, std::vector<SAOStatData**>& blkStats, SAOBlkParam& modeParam, double& modeNormCost ); int64_t getDistortion(const int channelBitDepth, int typeIdc, int typeAuxInfo, int* offsetVal, SAOStatData& statData); @@ -128,7 +122,6 @@ private: //members CABACWriter* m_CABACEstimator; CtxCache* m_CtxCache; double m_lambda[MAX_NUM_COMPONENT]; - const double FracBitsScale = 1.0 / double( 1 << SCALE_BITS ); //statistics std::vector<SAOStatData**> m_statData; //[ctu][comp][classes] diff --git a/source/Lib/EncoderLib/EncSlice.cpp b/source/Lib/EncoderLib/EncSlice.cpp index 82a08fa1206a03c52f5cfc86d14083ac13faa76d..ab194d148c76b8c169a871d9d2cf7335eb4fe844 100644 --- a/source/Lib/EncoderLib/EncSlice.cpp +++ b/source/Lib/EncoderLib/EncSlice.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -44,10 +44,6 @@ #include "CommonLib/dtrace_blockstatistics.h" #endif -#if ENABLE_WPP_PARALLELISM -#include <mutex> -extern recursive_mutex g_cache_mutex; -#endif #include <math.h> @@ -107,6 +103,8 @@ void EncSlice::init( EncLib* pcEncLib, const SPS& sps ) void EncSlice::setUpLambda( Slice* slice, const double dLambda, int iQP) { + m_pcRdCost->resetStore(); + m_pcTrQuant->resetStore(); // store lambda m_pcRdCost ->setLambda( dLambda, slice->getSPS()->getBitDepths() ); @@ -117,19 +115,17 @@ EncSlice::setUpLambda( Slice* slice, const double dLambda, int iQP) { const ComponentID compID = ComponentID( compIdx ); int chromaQPOffset = slice->getPPS()->getQpOffset( compID ) + slice->getSliceChromaQpDelta( compID ); - int qpc = ( iQP + chromaQPOffset < 0 ) ? iQP : getScaledChromaQP( iQP + chromaQPOffset, m_pcCfg->getChromaFormatIdc() ); + int qpc = slice->getSPS()->getMappedChromaQpValue(compID, iQP) + chromaQPOffset; double tmpWeight = pow( 2.0, ( iQP - qpc ) / 3.0 ); // takes into account of the chroma qp mapping and chroma qp Offset - if( m_pcCfg->getDepQuantEnabledFlag() ) +#if JVET_Q0433_MODIFIED_CHROMA_DIST_WEIGHT + if( m_pcCfg->getDepQuantEnabledFlag() ) +#else + if( m_pcCfg->getDepQuantEnabledFlag() && !( m_pcCfg->getLFNST() ) ) +#endif { tmpWeight *= ( m_pcCfg->getGOPSize() >= 8 ? pow( 2.0, 0.1/3.0 ) : pow( 2.0, 0.2/3.0 ) ); // increase chroma weight for dependent quantization (in order to reduce bit rate shift from chroma to luma) } m_pcRdCost->setDistortionWeight( compID, tmpWeight ); -#if ENABLE_WPP_PARALLELISM - for( int jId = 1; jId < ( m_pcLib->getNumWppThreads() + m_pcLib->getNumWppExtraLines() ); jId++ ) - { - m_pcLib->getRdCost( slice->getPic()->scheduler.getWppDataId( jId ) )->setDistortionWeight( compID, tmpWeight ); - } -#endif dLambdas[compIdx] = dLambda / tmpWeight; } @@ -199,7 +195,7 @@ static double getAveragePictureEnergy (const CPelBuf picOrig, const uint32_t uBi } #endif -static int getGlaringColorQPOffset (Picture* const pcPic, const int ctuAddr, const uint32_t startAddr, const uint32_t boundingAddr, +static int getGlaringColorQPOffset (Picture* const pcPic, const int ctuAddr, Slice* const pcSlice, const int bitDepth, uint32_t &avgLumaValue) { const PreCalcValues& pcv = *pcPic->cs->pcv; @@ -208,25 +204,20 @@ static int getGlaringColorQPOffset (Picture* const pcPic, const int ctuAddr, con const uint32_t chrHeight = pcv.maxCUHeight >> getChannelTypeScaleY (CH_C, chrFmt); const int midLevel = 1 << (bitDepth - 1); int chrValue = MAX_INT; - avgLumaValue = (startAddr < boundingAddr) ? 0 : (uint32_t)pcPic->getOrigBuf().Y().computeAvg(); + avgLumaValue = (pcSlice != nullptr) ? 0 : (uint32_t)pcPic->getOrigBuf().Y().computeAvg(); if (ctuAddr >= 0) // luma { avgLumaValue = (uint32_t)pcPic->m_iOffsetCtu[ctuAddr]; } - else if (startAddr < boundingAddr) + else if (pcSlice != nullptr) { - for (uint32_t ctuTsAddr = startAddr; ctuTsAddr < boundingAddr; ctuTsAddr++) + for (uint32_t ctuIdx = 0; ctuIdx < pcSlice->getNumCtuInSlice(); ctuIdx++) { -#if HEVC_TILES_WPP - const uint32_t ctuRsAddr = pcPic->tileMap->getCtuTsToRsAddrMap (ctuTsAddr); -#else - const uint32_t ctuRsAddr = ctuTsAddr; -#endif - + uint32_t ctuRsAddr = pcSlice->getCtuAddrInSlice( ctuIdx ); avgLumaValue += pcPic->m_iOffsetCtu[ctuRsAddr]; } - avgLumaValue = (avgLumaValue + ((boundingAddr - startAddr) >> 1)) / (boundingAddr - startAddr); + avgLumaValue = (avgLumaValue + (pcSlice->getNumCtuInSlice() >> 1)) / pcSlice->getNumCtuInSlice(); } for (uint32_t comp = COMPONENT_Cb; comp < MAX_NUM_COMPONENT; comp++) @@ -280,7 +271,7 @@ static int applyQPAdaptationChroma (Picture* const pcPic, Slice* const pcSlice, int averageAdaptedLumaQP = Clip3 (0, MAX_QP, sliceQP); // mean slice QP #endif - averageAdaptedLumaQP += getGlaringColorQPOffset (pcPic, -1 /*ctuRsAddr*/, 0 /*startAddr*/, 0 /*boundingAddr*/, bitDepth, meanLuma); + averageAdaptedLumaQP += getGlaringColorQPOffset (pcPic, -1 /*ctuRsAddr*/, nullptr /*pcSlice*/, bitDepth, meanLuma); if (averageAdaptedLumaQP > MAX_QP #if SHARP_LUMA_DELTA_QP @@ -301,7 +292,7 @@ static int applyQPAdaptationChroma (Picture* const pcPic, Slice* const pcSlice, savedLumaQP = averageAdaptedLumaQP; } // savedLumaQP < 0 - const int lumaChromaMappingDQP = savedLumaQP - getScaledChromaQP (savedLumaQP, pcEncCfg->getChromaFormatIdc()); + const int lumaChromaMappingDQP = savedLumaQP - pcSlice->getSPS()->getMappedChromaQpValue(compID, savedLumaQP); optSliceChromaQpOffset[comp-1] = std::min (3 + lumaChromaMappingDQP, adaptChromaQPOffset + lumaChromaMappingDQP); } @@ -334,25 +325,24 @@ void EncSlice::initEncSlice(Picture* pcPic, const int pocLast, const int pocCurr { double dQP; double dLambda; + PicHeader *picHeader = pcPic->cs->picHeader; + pcPic->cs->resetPrevPLT(pcPic->cs->prevPLT); rpcSlice = pcPic->slices[0]; rpcSlice->setSliceBits(0); rpcSlice->setPic( pcPic ); + rpcSlice->setPicHeader( picHeader ); rpcSlice->initSlice(); int multipleFactor = m_pcCfg->getUseCompositeRef() ? 2 : 1; if (m_pcCfg->getUseCompositeRef() && isEncodeLtRef) { - rpcSlice->setPicOutputFlag(false); + picHeader->setPicOutputFlag(false); } else { - rpcSlice->setPicOutputFlag(true); + picHeader->setPicOutputFlag(true); } rpcSlice->setPOC( pocCurr ); - rpcSlice->setDepQuantEnabledFlag( m_pcCfg->getDepQuantEnabledFlag() ); -#if HEVC_USE_SIGN_HIDING - rpcSlice->setSignDataHidingEnabledFlag( m_pcCfg->getSignDataHidingEnabledFlag() ); -#endif #if SHARP_LUMA_DELTA_QP pcPic->fieldPic = isField; @@ -420,22 +410,13 @@ void EncSlice::initEncSlice(Picture* pcPic, const int pocLast, const int pocCurr } } + rpcSlice->setDepth ( depth ); rpcSlice->setSliceType ( eSliceType ); // ------------------------------------------------------------------------------------------------------------------ // Non-referenced frame marking // ------------------------------------------------------------------------------------------------------------------ -#if !JVET_M0101_HLS - if(pocLast == 0) - { - rpcSlice->setTemporalLayerNonReferenceFlag(false); - } - else - { - rpcSlice->setTemporalLayerNonReferenceFlag(!m_pcCfg->getGOPEntry(iGOPid).m_refPic); - } -#endif pcPic->referenced = true; // ------------------------------------------------------------------------------------------------------------------ @@ -448,11 +429,6 @@ void EncSlice::initEncSlice(Picture* pcPic, const int pocLast, const int pocCurr dQP = m_pcCfg->getBaseQP(); if(eSliceType!=I_SLICE) { -#if SHARP_LUMA_DELTA_QP - if (!(( m_pcCfg->getMaxDeltaQP() == 0) && (!m_pcCfg->getLumaLevelToDeltaQPMapping().isEnabled()) && (dQP == -rpcSlice->getSPS()->getQpBDOffset(CHANNEL_TYPE_LUMA) ) && (rpcSlice->getPPS()->getTransquantBypassEnabledFlag()))) -#else - if (!(( m_pcCfg->getMaxDeltaQP() == 0 ) && (dQP == -rpcSlice->getSPS()->getQpBDOffset(CHANNEL_TYPE_LUMA) ) && (rpcSlice->getPPS()->getTransquantBypassEnabledFlag()))) -#endif { dQP += m_pcCfg->getGOPEntry(iGOPid).m_QPOffset; } @@ -490,89 +466,12 @@ void EncSlice::initEncSlice(Picture* pcPic, const int pocLast, const int pocCurr { // compute QP value dQP = dOrigQP + ((iDQpIdx+1)>>1)*(iDQpIdx%2 ? -1 : 1); -#if SHARP_LUMA_DELTA_QP - dLambda = calculateLambda(rpcSlice, iGOPid, depth, dQP, dQP, iQP ); -#else // compute lambda value - int NumberBFrames = ( m_pcCfg->getGOPSize() - 1 ); - int SHIFT_QP = 12; - - int bitdepth_luma_qp_scale = - 6 - * (rpcSlice->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA) - 8 - - DISTORTION_PRECISION_ADJUSTMENT(rpcSlice->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA))); - double qp_temp = (double) dQP + bitdepth_luma_qp_scale - SHIFT_QP; -#if FULL_NBIT - double qp_temp_orig = (double) dQP - SHIFT_QP; -#endif - // Case #1: I or P-slices (key-frame) - double dQPFactor = m_pcCfg->getGOPEntry(iGOPid).m_QPFactor; - if ( eSliceType==I_SLICE ) - { - if (m_pcCfg->getIntraQpFactor()>=0.0 && m_pcCfg->getGOPEntry(iGOPid).m_sliceType != I_SLICE) - { - dQPFactor=m_pcCfg->getIntraQpFactor(); - } - else - { -#if X0038_LAMBDA_FROM_QP_CAPABILITY - if(m_pcCfg->getLambdaFromQPEnable()) - { - dQPFactor=0.57; - } - else - { -#endif - double dLambda_scale = 1.0 - Clip3( 0.0, 0.5, 0.05*(double)(isField ? NumberBFrames/2 : NumberBFrames) ); - - dQPFactor=0.57*dLambda_scale; -#if X0038_LAMBDA_FROM_QP_CAPABILITY - } -#endif - } - } -#if X0038_LAMBDA_FROM_QP_CAPABILITY - else if( m_pcCfg->getLambdaFromQPEnable() ) - { - dQPFactor=0.57; - } -#endif - - dLambda = dQPFactor*pow( 2.0, qp_temp/3.0 ); - -#if X0038_LAMBDA_FROM_QP_CAPABILITY - if(!m_pcCfg->getLambdaFromQPEnable() && depth>0) -#else - if ( depth>0 ) -#endif - { -#if FULL_NBIT - dLambda *= Clip3( 2.00, 4.00, (qp_temp_orig / 6.0) ); // (j == B_SLICE && p_cur_frm->layer != 0 ) +#if SHARP_LUMA_DELTA_QP + dLambda = calculateLambda (rpcSlice, iGOPid, dQP, dQP, iQP); #else - dLambda *= Clip3( 2.00, 4.00, (qp_temp / 6.0) ); // (j == B_SLICE && p_cur_frm->layer != 0 ) -#endif - } - - // if hadamard is used in ME process - if ( !m_pcCfg->getUseHADME() && rpcSlice->getSliceType( ) != I_SLICE ) - { - dLambda *= 0.95; - } - -#if X0038_LAMBDA_FROM_QP_CAPABILITY - double lambdaModifier; - if( rpcSlice->getSliceType( ) != I_SLICE || intraLambdaModifiers.empty()) - { - lambdaModifier = m_pcCfg->getLambdaModifier( temporalId ); - } - else - { - lambdaModifier = intraLambdaModifiers[ (temporalId < intraLambdaModifiers.size()) ? temporalId : (intraLambdaModifiers.size()-1) ]; - } - dLambda *= lambdaModifier; -#endif - - iQP = Clip3( -rpcSlice->getSPS()->getQpBDOffset( CHANNEL_TYPE_LUMA ), MAX_QP, (int) floor( dQP + 0.5 ) ); + dLambda = initializeLambda (rpcSlice, iGOPid, int (dQP + 0.5), dQP); + iQP = Clip3 (-rpcSlice->getSPS()->getQpBDOffset (CHANNEL_TYPE_LUMA), MAX_QP, int (dQP + 0.5)); #endif m_vdRdPicLambda[iDQpIdx] = dLambda; @@ -605,6 +504,7 @@ void EncSlice::initEncSlice(Picture* pcPic, const int pocLast, const int pocCurr const bool bUseIntraOrPeriodicOffset = (rpcSlice->isIntra() && !rpcSlice->getSPS()->getIBCFlag()) || (m_pcCfg->getSliceChromaOffsetQpPeriodicity() > 0 && (rpcSlice->getPOC() % m_pcCfg->getSliceChromaOffsetQpPeriodicity()) == 0); int cbQP = bUseIntraOrPeriodicOffset ? m_pcCfg->getSliceChromaOffsetQpIntraOrPeriodic(false) : m_pcCfg->getGOPEntry(iGOPid).m_CbQPoffset; int crQP = bUseIntraOrPeriodicOffset ? m_pcCfg->getSliceChromaOffsetQpIntraOrPeriodic(true) : m_pcCfg->getGOPEntry(iGOPid).m_CrQPoffset; + int cbCrQP = (cbQP + crQP) >> 1; // use floor of average chroma QP offset for joint-Cb/Cr coding cbQP = Clip3( -12, 12, cbQP + rpcSlice->getPPS()->getQpOffset(COMPONENT_Cb) ) - rpcSlice->getPPS()->getQpOffset(COMPONENT_Cb); crQP = Clip3( -12, 12, crQP + rpcSlice->getPPS()->getQpOffset(COMPONENT_Cr) ) - rpcSlice->getPPS()->getQpOffset(COMPONENT_Cr); @@ -612,11 +512,17 @@ void EncSlice::initEncSlice(Picture* pcPic, const int pocLast, const int pocCurr CHECK(!(rpcSlice->getSliceChromaQpDelta(COMPONENT_Cb)+rpcSlice->getPPS()->getQpOffset(COMPONENT_Cb)<=12 && rpcSlice->getSliceChromaQpDelta(COMPONENT_Cb)+rpcSlice->getPPS()->getQpOffset(COMPONENT_Cb)>=-12), "Unspecified error"); rpcSlice->setSliceChromaQpDelta(COMPONENT_Cr, Clip3( -12, 12, crQP)); CHECK(!(rpcSlice->getSliceChromaQpDelta(COMPONENT_Cr)+rpcSlice->getPPS()->getQpOffset(COMPONENT_Cr)<=12 && rpcSlice->getSliceChromaQpDelta(COMPONENT_Cr)+rpcSlice->getPPS()->getQpOffset(COMPONENT_Cr)>=-12), "Unspecified error"); + if (rpcSlice->getSPS()->getJointCbCrEnabledFlag()) + { + cbCrQP = Clip3(-12, 12, cbCrQP + rpcSlice->getPPS()->getQpOffset(JOINT_CbCr)) - rpcSlice->getPPS()->getQpOffset(JOINT_CbCr); + rpcSlice->setSliceChromaQpDelta(JOINT_CbCr, Clip3( -12, 12, cbCrQP )); + } } else { rpcSlice->setSliceChromaQpDelta( COMPONENT_Cb, 0 ); rpcSlice->setSliceChromaQpDelta( COMPONENT_Cr, 0 ); + rpcSlice->setSliceChromaQpDelta( JOINT_CbCr, 0 ); } #endif @@ -634,6 +540,9 @@ void EncSlice::initEncSlice(Picture* pcPic, const int pocLast, const int pocCurr dLambda *= lambdaModifier; #endif +#if RDOQ_CHROMA_LAMBDA + m_pcRdCost->setDistortionWeight (COMPONENT_Y, 1.0); // no chroma weighting for luma +#endif setUpLambda(rpcSlice, dLambda, iQP); #if WCG_EXT @@ -675,10 +584,11 @@ void EncSlice::initEncSlice(Picture* pcPic, const int pocLast, const int pocCurr #if !W0038_CQP_ADJ rpcSlice->setSliceChromaQpDelta( COMPONENT_Cb, 0 ); rpcSlice->setSliceChromaQpDelta( COMPONENT_Cr, 0 ); + rpcSlice->setSliceChromaQpDelta( JOINT_CbCr, 0 ); #endif - rpcSlice->setUseChromaQpAdj( rpcSlice->getPPS()->getPpsRangeExtension().getChromaQpOffsetListEnabledFlag() ); - rpcSlice->setNumRefIdx(REF_PIC_LIST_0,m_pcCfg->getGOPEntry(iGOPid).m_numRefPicsActive); - rpcSlice->setNumRefIdx(REF_PIC_LIST_1,m_pcCfg->getGOPEntry(iGOPid).m_numRefPicsActive); + rpcSlice->setUseChromaQpAdj( rpcSlice->getPPS()->getCuChromaQpOffsetEnabledFlag() ); + rpcSlice->setNumRefIdx(REF_PIC_LIST_0, m_pcCfg->getRPLEntry(0, iGOPid).m_numRefPicsActive); + rpcSlice->setNumRefIdx(REF_PIC_LIST_1, m_pcCfg->getRPLEntry(1, iGOPid).m_numRefPicsActive); if ( m_pcCfg->getDeblockingFilterMetric() ) { @@ -713,8 +623,6 @@ void EncSlice::initEncSlice(Picture* pcPic, const int pocLast, const int pocCurr rpcSlice->setDeblockingFilterTcOffsetDiv2( 0 ); } - rpcSlice->setDepth ( depth ); - pcPic->layer = temporalId; if(eSliceType==I_SLICE) { @@ -722,114 +630,93 @@ void EncSlice::initEncSlice(Picture* pcPic, const int pocLast, const int pocCurr } rpcSlice->setTLayer( pcPic->layer ); - rpcSlice->setSliceMode ( m_pcCfg->getSliceMode() ); - rpcSlice->setSliceArgument ( m_pcCfg->getSliceArgument() ); -#if HEVC_DEPENDENT_SLICES - rpcSlice->setSliceSegmentMode ( m_pcCfg->getSliceSegmentMode() ); - rpcSlice->setSliceSegmentArgument ( m_pcCfg->getSliceSegmentArgument() ); -#endif - rpcSlice->setMaxNumMergeCand ( m_pcCfg->getMaxNumMergeCand() ); - rpcSlice->setMaxNumAffineMergeCand( m_pcCfg->getMaxNumAffineMergeCand() ); - rpcSlice->setSplitConsOverrideFlag(false); - rpcSlice->setMinQTSize( rpcSlice->getSPS()->getMinQTSize(eSliceType)); - rpcSlice->setMaxBTDepth( rpcSlice->isIntra() ? rpcSlice->getSPS()->getMaxBTDepthI() : rpcSlice->getSPS()->getMaxBTDepth() ); - rpcSlice->setMaxBTSize( rpcSlice->isIntra() ? rpcSlice->getSPS()->getMaxBTSizeI() : rpcSlice->getSPS()->getMaxBTSize() ); - rpcSlice->setMaxTTSize( rpcSlice->isIntra() ? rpcSlice->getSPS()->getMaxTTSizeI() : rpcSlice->getSPS()->getMaxTTSize() ); - if ( eSliceType == I_SLICE && rpcSlice->getSPS()->getUseDualITree() ) + rpcSlice->setDisableSATDForRD(false); + + if( ( m_pcCfg->getIBCHashSearch() && m_pcCfg->getIBCMode() ) || m_pcCfg->getAllowDisFracMMVD() ) { - rpcSlice->setMinQTSizeIChroma( rpcSlice->getSPS()->getMinQTSize(eSliceType, CHANNEL_TYPE_CHROMA) ); - rpcSlice->setMaxBTDepthIChroma( rpcSlice->getSPS()->getMaxBTDepthIChroma() ); - rpcSlice->setMaxBTSizeIChroma( rpcSlice->getSPS()->getMaxBTSizeIChroma() ); - rpcSlice->setMaxTTSizeIChroma( rpcSlice->getSPS()->getMaxTTSizeIChroma() ); + m_pcCuEncoder->getIbcHashMap().destroy(); + m_pcCuEncoder->getIbcHashMap().init( pcPic->cs->pps->getPicWidthInLumaSamples(), pcPic->cs->pps->getPicHeightInLumaSamples() ); } } - -#if SHARP_LUMA_DELTA_QP -double EncSlice::calculateLambda( const Slice* slice, - const int GOPid, // entry in the GOP table - const int depth, // slice GOP hierarchical depth. - const double refQP, // initial slice-level QP - const double dQP, // initial double-precision QP - int &iQP ) // returned integer QP. +double EncSlice::initializeLambda(const Slice* slice, const int GOPid, const int refQP, const double dQP) { - enum SliceType eSliceType = slice->getSliceType(); - const bool isField = slice->getPic()->fieldPic; - const int NumberBFrames = ( m_pcCfg->getGOPSize() - 1 ); - const int SHIFT_QP = 12; + const int bitDepthLuma = slice->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA); + const int bitDepthShift = 6 * (bitDepthLuma - 8 - DISTORTION_PRECISION_ADJUSTMENT(bitDepthLuma)) - 12; + const int numberBFrames = m_pcCfg->getGOPSize() - 1; + const SliceType sliceType = slice->getSliceType(); #if X0038_LAMBDA_FROM_QP_CAPABILITY - const int temporalId=m_pcCfg->getGOPEntry(GOPid).m_temporalId; - const std::vector<double> &intraLambdaModifiers=m_pcCfg->getIntraLambdaModifier(); + const int temporalId = m_pcCfg->getGOPEntry(GOPid).m_temporalId; + const std::vector<double> &intraLambdaModifiers = m_pcCfg->getIntraLambdaModifier(); #endif - - int bitdepth_luma_qp_scale = 6 - * (slice->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA) - 8 - - DISTORTION_PRECISION_ADJUSTMENT(slice->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA))); - double qp_temp = dQP + bitdepth_luma_qp_scale - SHIFT_QP; - // Case #1: I or P-slices (key-frame) + // case #1: I or P slices (key-frame) double dQPFactor = m_pcCfg->getGOPEntry(GOPid).m_QPFactor; - if ( eSliceType==I_SLICE ) + double dLambda, lambdaModifier; + + if (sliceType == I_SLICE) { - if (m_pcCfg->getIntraQpFactor()>=0.0 && m_pcCfg->getGOPEntry(GOPid).m_sliceType != I_SLICE) + if ((m_pcCfg->getIntraQpFactor() >= 0.0) && (m_pcCfg->getGOPEntry(GOPid).m_sliceType != I_SLICE)) { - dQPFactor=m_pcCfg->getIntraQpFactor(); + dQPFactor = m_pcCfg->getIntraQpFactor(); } else { #if X0038_LAMBDA_FROM_QP_CAPABILITY - if(m_pcCfg->getLambdaFromQPEnable()) + if (m_pcCfg->getLambdaFromQPEnable()) { - dQPFactor=0.57; + dQPFactor = 0.57; } else - { -#endif - double dLambda_scale = 1.0 - Clip3( 0.0, 0.5, 0.05*(double)(isField ? NumberBFrames/2 : NumberBFrames) ); - dQPFactor=0.57*dLambda_scale; -#if X0038_LAMBDA_FROM_QP_CAPABILITY - } #endif + dQPFactor = 0.57 * (1.0 - Clip3(0.0, 0.5, 0.05 * double (slice->getPic()->fieldPic ? numberBFrames >> 1 : numberBFrames))); } } #if X0038_LAMBDA_FROM_QP_CAPABILITY - else if( m_pcCfg->getLambdaFromQPEnable() ) + else if (m_pcCfg->getLambdaFromQPEnable()) { - dQPFactor=0.57; + dQPFactor = 0.57; } #endif - double dLambda = dQPFactor*pow( 2.0, qp_temp/3.0 ); + dLambda = dQPFactor * pow(2.0, (dQP + bitDepthShift) / 3.0); #if X0038_LAMBDA_FROM_QP_CAPABILITY - if( !(m_pcCfg->getLambdaFromQPEnable()) && depth>0 ) + if (slice->getDepth() > 0 && !m_pcCfg->getLambdaFromQPEnable()) #else - if ( depth>0 ) + if (slice->getDepth() > 0) #endif { - double qp_temp_ref = refQP + bitdepth_luma_qp_scale - SHIFT_QP; - dLambda *= Clip3(2.00, 4.00, (qp_temp_ref / 6.0)); // (j == B_SLICE && p_cur_frm->layer != 0 ) + dLambda *= Clip3(2.0, 4.0, ((refQP + bitDepthShift) / 6.0)); } - - // if hadamard is used in ME process - if ( !m_pcCfg->getUseHADME() && slice->getSliceType( ) != I_SLICE ) + // if Hadamard is used in motion estimation process + if (!m_pcCfg->getUseHADME() && (sliceType != I_SLICE)) { dLambda *= 0.95; } - #if X0038_LAMBDA_FROM_QP_CAPABILITY - double lambdaModifier; - if( eSliceType != I_SLICE || intraLambdaModifiers.empty()) + if ((sliceType != I_SLICE) || intraLambdaModifiers.empty()) { - lambdaModifier = m_pcCfg->getLambdaModifier( temporalId ); + lambdaModifier = m_pcCfg->getLambdaModifier(temporalId); } else { - lambdaModifier = intraLambdaModifiers[ (temporalId < intraLambdaModifiers.size()) ? temporalId : (intraLambdaModifiers.size()-1) ]; + lambdaModifier = intraLambdaModifiers[temporalId < intraLambdaModifiers.size() ? temporalId : intraLambdaModifiers.size() - 1]; } dLambda *= lambdaModifier; #endif - iQP = Clip3( -slice->getSPS()->getQpBDOffset( CHANNEL_TYPE_LUMA ), MAX_QP, (int) floor( dQP + 0.5 ) ); + return dLambda; +} + +#if SHARP_LUMA_DELTA_QP || ENABLE_QPA_SUB_CTU +double EncSlice::calculateLambda( const Slice* slice, + const int GOPid, // entry in the GOP table + const double refQP, // initial slice-level QP + const double dQP, // initial double-precision QP + int &iQP ) // returned integer QP. +{ + double dLambda = initializeLambda (slice, GOPid, int (refQP + 0.5), dQP); + iQP = Clip3 (-slice->getSPS()->getQpBDOffset (CHANNEL_TYPE_LUMA), MAX_QP, int (dQP + 0.5)); if( m_pcCfg->getDepQuantEnabledFlag() ) { @@ -847,19 +734,22 @@ void EncSlice::resetQP( Picture* pic, int sliceQP, double lambda ) // store lambda slice->setSliceQp( sliceQP ); +#if RDOQ_CHROMA_LAMBDA + m_pcRdCost->setDistortionWeight (COMPONENT_Y, 1.0); // no chroma weighting for luma +#endif setUpLambda(slice, lambda, sliceQP); +#if WCG_EXT + m_pcRdCost->saveUnadjustedLambda(); +#endif } #if ENABLE_QPA static bool applyQPAdaptation (Picture* const pcPic, Slice* const pcSlice, const PreCalcValues& pcv, - const uint32_t startAddr, const uint32_t boundingAddr, const bool useSharpLumaDQP, + const bool useSharpLumaDQP, const bool useFrameWiseQPA, const int previouslyAdaptedLumaQP = -1) { const int bitDepth = pcSlice->getSPS()->getBitDepth (CHANNEL_TYPE_LUMA); const int iQPIndex = pcSlice->getSliceQp(); // initial QP index for current slice, used in following loops -#if HEVC_TILES_WPP - const TileMap& tileMap = *pcPic->tileMap; -#endif bool sliceQPModified = false; uint32_t meanLuma = MAX_UINT; double hpEnerAvg = 0.0; @@ -868,13 +758,9 @@ static bool applyQPAdaptation (Picture* const pcPic, Slice* const pcSlice, if (!useFrameWiseQPA || previouslyAdaptedLumaQP < 0) // mean visual activity value and luma value in each CTU #endif { - for (uint32_t ctuTsAddr = startAddr; ctuTsAddr < boundingAddr; ctuTsAddr++) + for (uint32_t ctuIdx = 0; ctuIdx < pcSlice->getNumCtuInSlice(); ctuIdx++) { -#if HEVC_TILES_WPP - const uint32_t ctuRsAddr = tileMap.getCtuTsToRsAddrMap (ctuTsAddr); -#else - const uint32_t ctuRsAddr = ctuTsAddr; -#endif + uint32_t ctuRsAddr = pcSlice->getCtuAddrInSlice( ctuIdx ); const Position pos ((ctuRsAddr % pcv.widthInCtus) * pcv.maxCUWidth, (ctuRsAddr / pcv.widthInCtus) * pcv.maxCUHeight); const CompArea ctuArea = clipArea (CompArea (COMPONENT_Y, pcPic->chromaFormat, Area (pos.x, pos.y, pcv.maxCUWidth, pcv.maxCUHeight)), pcPic->Y()); const CompArea fltArea = clipArea (CompArea (COMPONENT_Y, pcPic->chromaFormat, Area (pos.x > 0 ? pos.x - 1 : 0, pos.y > 0 ? pos.y - 1 : 0, pcv.maxCUWidth + (pos.x > 0 ? 2 : 1), pcv.maxCUHeight + (pos.y > 0 ? 2 : 1))), pcPic->Y()); @@ -888,7 +774,7 @@ static bool applyQPAdaptation (Picture* const pcPic, Slice* const pcSlice, pcPic->m_iOffsetCtu[ctuRsAddr] = pcPic->getOrigBuf (ctuArea).computeAvg(); } - hpEnerAvg /= double (boundingAddr - startAddr); + hpEnerAvg /= double (pcSlice->getNumCtuInSlice()); } #if GLOBAL_AVERAGING const double hpEnerPic = 1.0 / getAveragePictureEnergy (pcPic->getOrigBuf().Y(), bitDepth); // inverse, speed @@ -902,7 +788,7 @@ static bool applyQPAdaptation (Picture* const pcPic, Slice* const pcSlice, if (isChromaEnabled (pcPic->chromaFormat) && (iQPIndex < MAX_QP) && (previouslyAdaptedLumaQP < 0)) { - iQPFixed += getGlaringColorQPOffset (pcPic, -1 /*ctuRsAddr*/, startAddr, boundingAddr, bitDepth, meanLuma); + iQPFixed += getGlaringColorQPOffset (pcPic, -1 /*ctuRsAddr*/, pcSlice, bitDepth, meanLuma); if (iQPFixed > MAX_QP #if SHARP_LUMA_DELTA_QP @@ -919,17 +805,13 @@ static bool applyQPAdaptation (Picture* const pcPic, Slice* const pcSlice, { meanLuma = 0; - for (uint32_t ctuTsAddr = startAddr; ctuTsAddr < boundingAddr; ctuTsAddr++) + for (uint32_t ctuIdx = 0; ctuIdx < pcSlice->getNumCtuInSlice(); ctuIdx++) { - #if HEVC_TILES_WPP - const uint32_t ctuRsAddr = tileMap.getCtuTsToRsAddrMap (ctuTsAddr); - #else - const uint32_t ctuRsAddr = ctuTsAddr; - #endif + uint32_t ctuRsAddr = pcSlice->getCtuAddrInSlice( ctuIdx ); meanLuma += pcPic->m_iOffsetCtu[ctuRsAddr]; // CTU mean } - meanLuma = (meanLuma + ((boundingAddr - startAddr) >> 1)) / (boundingAddr - startAddr); + meanLuma = (meanLuma + (pcSlice->getNumCtuInSlice() >> 1)) / pcSlice->getNumCtuInSlice(); } iQPFixed = Clip3 (0, MAX_QP, iQPFixed + lumaDQPOffset (meanLuma, bitDepth)); } @@ -951,26 +833,18 @@ static bool applyQPAdaptation (Picture* const pcPic, Slice* const pcSlice, sliceQPModified = true; } - for (uint32_t ctuTsAddr = startAddr; ctuTsAddr < boundingAddr; ctuTsAddr++) + for (uint32_t ctuIdx = 0; ctuIdx < pcSlice->getNumCtuInSlice(); ctuIdx++) { -#if HEVC_TILES_WPP - const uint32_t ctuRsAddr = tileMap.getCtuTsToRsAddrMap (ctuTsAddr); -#else - const uint32_t ctuRsAddr = ctuTsAddr; -#endif + uint32_t ctuRsAddr = pcSlice->getCtuAddrInSlice( ctuIdx ); pcPic->m_iOffsetCtu[ctuRsAddr] = (Pel)iQPFixed; // fixed QPs } } else // CTU-wise QPA { - for (uint32_t ctuTsAddr = startAddr; ctuTsAddr < boundingAddr; ctuTsAddr++) + for (uint32_t ctuIdx = 0; ctuIdx < pcSlice->getNumCtuInSlice(); ctuIdx++) { -#if HEVC_TILES_WPP - const uint32_t ctuRsAddr = tileMap.getCtuTsToRsAddrMap (ctuTsAddr); -#else - const uint32_t ctuRsAddr = ctuTsAddr; -#endif + uint32_t ctuRsAddr = pcSlice->getCtuAddrInSlice( ctuIdx ); int iQPAdapt = Clip3 (0, MAX_QP, iQPIndex + apprI3Log2 (pcPic->m_uEnerHpCtu[ctuRsAddr] * hpEnerPic)); @@ -980,7 +854,7 @@ static bool applyQPAdaptation (Picture* const pcPic, Slice* const pcSlice, if (isChromaEnabled (pcPic->chromaFormat)) { - iQPAdapt += getGlaringColorQPOffset (pcPic, (int)ctuRsAddr, startAddr, boundingAddr, bitDepth, meanLuma); + iQPAdapt += getGlaringColorQPOffset (pcPic, (int)ctuRsAddr, nullptr, bitDepth, meanLuma); if (iQPAdapt > MAX_QP #if SHARP_LUMA_DELTA_QP @@ -1001,7 +875,7 @@ static bool applyQPAdaptation (Picture* const pcPic, Slice* const pcSlice, } #endif - const uint32_t uRefScale = g_invQuantScales[iQPAdapt % 6] << ((iQPAdapt / 6) + bitDepth - 4); + const uint32_t uRefScale = g_invQuantScales[0][iQPAdapt % 6] << ((iQPAdapt / 6) + bitDepth - 4); const CompArea subArea = clipArea (CompArea (COMPONENT_Y, pcPic->chromaFormat, Area ((ctuRsAddr % pcv.widthInCtus) * pcv.maxCUWidth, (ctuRsAddr / pcv.widthInCtus) * pcv.maxCUHeight, pcv.maxCUWidth, pcv.maxCUHeight)), pcPic->Y()); const Pel* pSrc = pcPic->getOrigBuf (subArea).buf; const SizeType iSrcStride = pcPic->getOrigBuf (subArea).stride; @@ -1041,7 +915,7 @@ static bool applyQPAdaptation (Picture* const pcPic, Slice* const pcSlice, pcPic->m_iOffsetCtu[ctuRsAddr] = (Pel)iQPAdapt; // adapted QPs #if ENABLE_QPA_SUB_CTU - if (pcv.widthInCtus > 1 && pcSlice->getPPS()->getCuQpDeltaSubdiv() == 0) // reduce local DQP rate peaks + if (pcv.widthInCtus > 1 && pcSlice->getCuQpDeltaSubdiv() == 0) // reduce local DQP rate peaks #elif ENABLE_QPA_SUB_CTU if (pcv.widthInCtus > 1 && pcSlice->getPPS()->getMaxCuDQPDepth() == 0) // reduce local DQP rate peaks #else @@ -1065,7 +939,7 @@ static bool applyQPAdaptation (Picture* const pcPic, Slice* const pcSlice, { pcPic->m_iOffsetCtu[ctuRsAddr - 1] = (Pel)iQPAdapt; } - if ((ctuTsAddr == boundingAddr - 1) && (ctuRsAddr > pcv.widthInCtus)) // last CTU in the given slice + if ((ctuIdx == pcSlice->getNumCtuInSlice() - 1) && (ctuRsAddr > pcv.widthInCtus)) // last CTU in the given slice { iQPAdapt = std::min (pcPic->m_iOffsetCtu[ctuRsAddr - 1], pcPic->m_iOffsetCtu[ctuRsAddr - pcv.widthInCtus]); if (pcPic->m_iOffsetCtu[ctuRsAddr] < (Pel)iQPAdapt) @@ -1088,7 +962,7 @@ static int applyQPAdaptationSubCtu (CodingStructure &cs, const UnitArea ctuArea, const int bitDepth = cs.slice->getSPS()->getBitDepth (CHANNEL_TYPE_LUMA); // overall image bit-depth const int adaptedCtuQP = pcPic ? pcPic->m_iOffsetCtu[ctuAddr] : cs.slice->getSliceQpBase(); - if (!pcPic || cs.pps->getCuQpDeltaSubdiv() == 0) return adaptedCtuQP; + if (!pcPic || cs.slice->getCuQpDeltaSubdiv() == 0) return adaptedCtuQP; for (unsigned addr = 0; addr < cs.picture->m_subCtuQP.size(); addr++) { @@ -1099,12 +973,8 @@ static int applyQPAdaptationSubCtu (CodingStructure &cs, const UnitArea ctuArea, #if SHARP_LUMA_DELTA_QP const int lumaCtuDQP = useSharpLumaDQP ? lumaDQPOffset ((uint32_t)pcPic->m_uEnerHpCtu[ctuAddr], bitDepth) : 0; #endif -#if MAX_TB_SIZE_SIGNALLING const unsigned mts = std::min (cs.sps->getMaxTbSize(), pcv.maxCUWidth); -#else - const unsigned mts = std::min<uint32_t> (MAX_TB_SIZEY, pcv.maxCUWidth); -#endif - const unsigned mtsLog2 = (unsigned)g_aucLog2[mts]; + const unsigned mtsLog2 = (unsigned)floorLog2(mts); const unsigned stride = pcv.maxCUWidth >> mtsLog2; unsigned numAct = 0; // number of block activities double sumAct = 0.0; // sum of all block activities @@ -1198,12 +1068,6 @@ void EncSlice::setSearchRange( Slice* pcSlice ) iRefPOC = pcSlice->getRefPic(e, iRefIdx)->getPOC(); int newSearchRange = Clip3(m_pcCfg->getMinSearchWindow(), iMaxSR, (iMaxSR*ADAPT_SR_SCALE*abs(iCurrPOC - iRefPOC)+iOffset)/iGOPSize); m_pcInterSearch->setAdaptiveSearchRange(iDir, iRefIdx, newSearchRange); -#if ENABLE_WPP_PARALLELISM - for( int jId = 1; jId < m_pcLib->getNumCuEncStacks(); jId++ ) - { - m_pcLib->getInterSearch( jId )->setAdaptiveSearchRange( iDir, iRefIdx, newSearchRange ); - } -#endif } } } @@ -1228,20 +1092,7 @@ void EncSlice::precompressSlice( Picture* pcPic ) Slice* pcSlice = pcPic->slices[getSliceSegmentIdx()]; -#if HEVC_DEPENDENT_SLICES - if (pcSlice->getDependentSliceSegmentFlag()) - { - // if this is a dependent slice segment, then it was optimised - // when analysing the entire slice. - return; - } -#endif - if (pcSlice->getSliceMode()==FIXED_NUMBER_OF_BYTES) - { - // TODO: investigate use of average cost per CTU so that this Slice Mode can be used. - THROW( "Unable to optimise Slice-level QP if Slice Mode is set to FIXED_NUMBER_OF_BYTES\n" ); - } double dPicRdCostBest = MAX_DOUBLE; uint32_t uiQpIdxBest = 0; @@ -1298,31 +1149,15 @@ void EncSlice::calCostSliceI(Picture* pcPic) // TODO: this only analyses the fir { double iSumHadSlice = 0; Slice * const pcSlice = pcPic->slices[getSliceSegmentIdx()]; -#if HEVC_TILES_WPP - const TileMap &tileMap = *pcPic->tileMap; -#endif const PreCalcValues& pcv = *pcPic->cs->pcv; const SPS &sps = *(pcSlice->getSPS()); const int shift = sps.getBitDepth(CHANNEL_TYPE_LUMA)-8; const int offset = (shift>0)?(1<<(shift-1)):0; -#if HEVC_DEPENDENT_SLICES - pcSlice->setSliceSegmentBits(0); -#endif - - uint32_t startCtuTsAddr, boundingCtuTsAddr; - xDetermineStartAndBoundingCtuTsAddr ( startCtuTsAddr, boundingCtuTsAddr, pcPic ); -#if HEVC_TILES_WPP - for( uint32_t ctuTsAddr = startCtuTsAddr, ctuRsAddr = tileMap.getCtuTsToRsAddrMap( startCtuTsAddr); - ctuTsAddr < boundingCtuTsAddr; - ctuRsAddr = tileMap.getCtuTsToRsAddrMap(++ctuTsAddr) ) -#else - for( uint32_t ctuTsAddr = startCtuTsAddr, ctuRsAddr = startCtuTsAddr; - ctuTsAddr < boundingCtuTsAddr; - ctuRsAddr = ++ctuTsAddr ) -#endif + for( uint32_t ctuIdx = 0; ctuIdx < pcSlice->getNumCtuInSlice(); ctuIdx++ ) { + uint32_t ctuRsAddr = pcSlice->getCtuAddrInSlice( ctuIdx ); Position pos( (ctuRsAddr % pcv.widthInCtus) * pcv.maxCUWidth, (ctuRsAddr / pcv.widthInCtus) * pcv.maxCUHeight); const int height = std::min( pcv.maxCUHeight, pcv.lumaHeight - pos.y ); @@ -1345,20 +1180,6 @@ void EncSlice::compressSlice( Picture* pcPic, const bool bCompressEntireSlice, c // effectively disabling the slice-segment-mode. Slice* const pcSlice = pcPic->slices[getSliceSegmentIdx()]; - uint32_t startCtuTsAddr; - uint32_t boundingCtuTsAddr; - -#if HEVC_DEPENDENT_SLICES - pcSlice->setSliceSegmentBits(0); -#endif - xDetermineStartAndBoundingCtuTsAddr ( startCtuTsAddr, boundingCtuTsAddr, pcPic ); - if (bCompressEntireSlice) - { - boundingCtuTsAddr = pcSlice->getSliceCurEndCtuTsAddr(); -#if HEVC_DEPENDENT_SLICES - pcSlice->setSliceSegmentCurEndCtuTsAddr(boundingCtuTsAddr); -#endif - } // initialize cost values - these are used by precompressSlice (they should be parameters). m_uiPicTotalBits = 0; @@ -1368,7 +1189,7 @@ void EncSlice::compressSlice( Picture* pcPic, const bool bCompressEntireSlice, c m_CABACEstimator->initCtxModels( *pcSlice ); -#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM +#if ENABLE_SPLIT_PARALLELISM for( int jId = 1; jId < m_pcLib->getNumCuEncStacks(); jId++ ) { CABACWriter* cw = m_pcLib->getCABACEncoder( jId )->getCABACEstimator( pcSlice->getSPS() ); @@ -1392,17 +1213,6 @@ void EncSlice::compressSlice( Picture* pcPic, const bool bCompressEntireSlice, c if ( bWp_explicit ) { - //------------------------------------------------------------------------------ - // Weighted Prediction implemented at Slice level. SliceMode=2 is not supported yet. - //------------------------------------------------------------------------------ -#if HEVC_DEPENDENT_SLICES - if ( pcSlice->getSliceMode()==FIXED_NUMBER_OF_BYTES || pcSlice->getSliceSegmentMode()==FIXED_NUMBER_OF_BYTES ) -#else - if(pcSlice->getSliceMode() == FIXED_NUMBER_OF_BYTES) -#endif - { - EXIT("Weighted Prediction is not yet supported with slice mode determined by max number of bins."); - } xEstimateWPParamSlice( pcSlice, m_pcCfg->getWeightedPredictionMethod() ); pcSlice->initWpScaling(pcSlice->getSPS()); @@ -1412,45 +1222,8 @@ void EncSlice::compressSlice( Picture* pcPic, const bool bCompressEntireSlice, c } -#if HEVC_DEPENDENT_SLICES -#if HEVC_TILES_WPP - // Adjust initial state if this is the start of a dependent slice. - { - const TileMap& tileMap = *pcPic->tileMap; - const uint32_t ctuRsAddr = tileMap.getCtuTsToRsAddrMap( startCtuTsAddr); - const uint32_t currentTileIdx = tileMap.getTileIdxMap(ctuRsAddr); - const Tile& currentTile = tileMap.tiles[currentTileIdx]; - const uint32_t firstCtuRsAddrOfTile = currentTile.getFirstCtuRsAddr(); - if( pcSlice->getDependentSliceSegmentFlag() && ctuRsAddr != firstCtuRsAddrOfTile ) - { - // This will only occur if dependent slice-segments (m_entropyCodingSyncContextState=true) are being used. - if( currentTile.getTileWidthInCtus() >= 2 || !m_pcCfg->getEntropyCodingSyncEnabledFlag() ) - { - m_CABACEstimator->getCtx() = m_lastSliceSegmentEndContextState; - m_CABACEstimator->start(); - } - } - } -#else - // KJS: not sure if this works (but both dep slices and tiles shall be removed in VTM, so this code should not be used) - if( pcSlice->getDependentSliceSegmentFlag() && ctuRsAddr != startCtuTsAddr ) - { - if( pcPic->cs->pcv->widthInCtus >= 2 || !m_pcCfg->getEntropyCodingSyncEnabledFlag() ) - { - m_CABACEstimator->getCtx() = m_lastSliceSegmentEndContextState; - m_CABACEstimator->start(); - } -#endif -#endif -#if HEVC_DEPENDENT_SLICES - if( !pcSlice->getDependentSliceSegmentFlag() ) - { -#endif pcPic->m_prevQP[0] = pcPic->m_prevQP[1] = pcSlice->getSliceQp(); -#if HEVC_DEPENDENT_SLICES - } -#endif CHECK( pcPic->m_prevQP[0] == std::numeric_limits<int>::max(), "Invalid previous QP" ); @@ -1459,34 +1232,27 @@ void EncSlice::compressSlice( Picture* pcPic, const bool bCompressEntireSlice, c cs.pcv = pcSlice->getPPS()->pcv; cs.fracBits = 0; - if( startCtuTsAddr == 0 && ( pcSlice->getPOC() != m_pcCfg->getSwitchPOC() || -1 == m_pcCfg->getDebugCTU() ) ) + if( pcSlice->getFirstCtuRsAddrInSlice() == 0 && ( pcSlice->getPOC() != m_pcCfg->getSwitchPOC() || -1 == m_pcCfg->getDebugCTU() ) ) { - cs.initStructData (pcSlice->getSliceQp(), pcSlice->getPPS()->getTransquantBypassEnabledFlag()); + cs.initStructData (pcSlice->getSliceQp()); } #if ENABLE_QPA - if (m_pcCfg->getUsePerceptQPA() && !m_pcCfg->getUseRateCtrl() && (boundingCtuTsAddr > startCtuTsAddr)) + if (m_pcCfg->getUsePerceptQPA() && !m_pcCfg->getUseRateCtrl()) { - if (applyQPAdaptation (pcPic, pcSlice, *cs.pcv, startCtuTsAddr, boundingCtuTsAddr, m_pcCfg->getLumaLevelToDeltaQPMapping().mode == LUMALVL_TO_DQP_NUM_MODES, + if (applyQPAdaptation (pcPic, pcSlice, *cs.pcv, m_pcCfg->getLumaLevelToDeltaQPMapping().mode == LUMALVL_TO_DQP_NUM_MODES, (m_pcCfg->getBaseQP() >= 38) || (m_pcCfg->getSourceWidth() <= 512 && m_pcCfg->getSourceHeight() <= 320), m_adaptedLumaQP)) { m_CABACEstimator->initCtxModels (*pcSlice); -#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM +#if ENABLE_SPLIT_PARALLELISM for (int jId = 1; jId < m_pcLib->getNumCuEncStacks(); jId++) { CABACWriter* cw = m_pcLib->getCABACEncoder (jId)->getCABACEstimator (pcSlice->getSPS()); cw->initCtxModels (*pcSlice); } -#endif -#if HEVC_DEPENDENT_SLICES - if (!pcSlice->getDependentSliceSegmentFlag()) - { #endif pcPic->m_prevQP[0] = pcPic->m_prevQP[1] = pcSlice->getSliceQp(); -#if HEVC_DEPENDENT_SLICES - } -#endif - if (startCtuTsAddr == 0) + if (pcSlice->getFirstCtuRsAddrInSlice() == 0) { cs.currQP[0] = cs.currQP[1] = pcSlice->getSliceQp(); // cf code above } @@ -1494,46 +1260,27 @@ void EncSlice::compressSlice( Picture* pcPic, const bool bCompressEntireSlice, c } #endif // ENABLE_QPA -#if ENABLE_WPP_PARALLELISM - bool bUseThreads = m_pcCfg->getNumWppThreads() > 1; - if( bUseThreads ) + bool checkPLTRatio = m_pcCfg->getIntraPeriod() != 1 && pcSlice->isIRAP(); + if (checkPLTRatio) { - CHECK( startCtuTsAddr != 0 || boundingCtuTsAddr != pcPic->cs->pcv->sizeInCtus, "not intended" ); - - pcPic->cs->allocateVectorsAtPicLevel(); - - omp_set_num_threads( m_pcCfg->getNumWppThreads() + m_pcCfg->getNumWppExtraLines() ); - - #pragma omp parallel for schedule(static,1) if(bUseThreads) - for( int ctuTsAddr = startCtuTsAddr; ctuTsAddr < boundingCtuTsAddr; ctuTsAddr += widthInCtus ) - { - // wpp thread start - pcPic->scheduler.setWppThreadId(); -#if ENABLE_SPLIT_PARALLELISM - pcPic->scheduler.setSplitThreadId( 0 ); -#endif - encodeCtus( pcPic, bCompressEntireSlice, bFastDeltaQP, ctuTsAddr, ctuTsAddr + widthInCtus, m_pcLib ); - // wpp thread stop - } + m_pcCuEncoder->getModeCtrl()->setPltEnc(true); } else -#endif + { + bool doPlt = m_pcLib->getPltEnc(); + m_pcCuEncoder->getModeCtrl()->setPltEnc(doPlt); + } + #if K0149_BLOCK_STATISTICS const SPS *sps = pcSlice->getSPS(); CHECK(sps == 0, "No SPS present"); writeBlockStatisticsHeader(sps); #endif m_pcInterSearch->resetAffineMVList(); - encodeCtus( pcPic, bCompressEntireSlice, bFastDeltaQP, startCtuTsAddr, boundingCtuTsAddr, m_pcLib ); - -#if HEVC_DEPENDENT_SLICES - // store context state at the end of this slice-segment, in case the next slice is a dependent slice and continues using the CABAC contexts. - if( pcSlice->getPPS()->getDependentSliceSegmentsEnabledFlag() ) - { - m_lastSliceSegmentEndContextState = m_CABACEstimator->getCtx();//ctx end of dep.slice - } -#endif - + m_pcInterSearch->resetUniMvList(); + ::memset(g_isReusedUniMVsFilled, 0, sizeof(g_isReusedUniMVsFilled)); + encodeCtus( pcPic, bCompressEntireSlice, bFastDeltaQP, m_pcLib ); + if (checkPLTRatio) m_pcLib->checkPltStats( pcPic ); } void EncSlice::checkDisFracMmvd( Picture* pcPic, uint32_t startCtuTsAddr, uint32_t boundingCtuTsAddr ) @@ -1542,25 +1289,18 @@ void EncSlice::checkDisFracMmvd( Picture* pcPic, uint32_t startCtuTsAddr, uint32 Slice* pcSlice = cs.slice; const PreCalcValues& pcv = *cs.pcv; const uint32_t widthInCtus = pcv.widthInCtus; -#if HEVC_TILES_WPP - const TileMap& tileMap = *pcPic->tileMap; -#endif const uint32_t hashThreshold = 20; uint32_t totalCtu = 0; uint32_t hashRatio = 0; - if ( !pcSlice->getSPS()->getDisFracMmvdEnabledFlag() ) + if ( !pcSlice->getSPS()->getFpelMmvdEnabledFlag() ) { return; } - for ( uint32_t ctuTsAddr = startCtuTsAddr; ctuTsAddr < boundingCtuTsAddr; ctuTsAddr++ ) + for ( uint32_t ctuIdx = 0; ctuIdx < pcSlice->getNumCtuInSlice(); ctuIdx++ ) { -#if HEVC_TILES_WPP - const uint32_t ctuRsAddr = tileMap.getCtuTsToRsAddrMap( ctuTsAddr ); -#else - const uint32_t ctuRsAddr = ctuTsAddr; -#endif + const uint32_t ctuRsAddr = pcSlice->getCtuAddrInSlice( ctuIdx ); const uint32_t ctuXPosInCtus = ctuRsAddr % widthInCtus; const uint32_t ctuYPosInCtus = ctuRsAddr / widthInCtus; @@ -1573,32 +1313,64 @@ void EncSlice::checkDisFracMmvd( Picture* pcPic, uint32_t startCtuTsAddr, uint32 if ( hashRatio > totalCtu * hashThreshold ) { - pcSlice->setDisFracMMVD( true ); + pcPic->cs->picHeader->setDisFracMMVD( true ); } - if (!pcSlice->getDisFracMMVD()) { + if (!pcPic->cs->picHeader->getDisFracMMVD()) { bool useIntegerMVD = (pcPic->lwidth()*pcPic->lheight() > 1920 * 1080); - pcSlice->setDisFracMMVD( useIntegerMVD ); + pcPic->cs->picHeader->setDisFracMMVD( useIntegerMVD ); } } -void EncSlice::encodeCtus( Picture* pcPic, const bool bCompressEntireSlice, const bool bFastDeltaQP, uint32_t startCtuTsAddr, uint32_t boundingCtuTsAddr, EncLib* pEncLib ) + +void EncSlice::setJointCbCrModes( CodingStructure& cs, const Position topLeftLuma, const Size sizeLuma ) +{ + bool sgnFlag = true; + + if( isChromaEnabled( cs.picture->chromaFormat) ) + { + const CompArea cbArea = CompArea( COMPONENT_Cb, cs.picture->chromaFormat, Area(topLeftLuma,sizeLuma), true ); + const CompArea crArea = CompArea( COMPONENT_Cr, cs.picture->chromaFormat, Area(topLeftLuma,sizeLuma), true ); + const CPelBuf orgCb = cs.picture->getOrigBuf( cbArea ); + const CPelBuf orgCr = cs.picture->getOrigBuf( crArea ); + const int x0 = ( cbArea.x > 0 ? 0 : 1 ); + const int y0 = ( cbArea.y > 0 ? 0 : 1 ); + const int x1 = ( cbArea.x + cbArea.width < cs.picture->Cb().width ? cbArea.width : cbArea.width - 1 ); + const int y1 = ( cbArea.y + cbArea.height < cs.picture->Cb().height ? cbArea.height : cbArea.height - 1 ); + const int cbs = orgCb.stride; + const int crs = orgCr.stride; + const Pel* pCb = orgCb.buf + y0 * cbs; + const Pel* pCr = orgCr.buf + y0 * crs; + int64_t sumCbCr = 0; + + // determine inter-chroma transform sign from correlation between high-pass filtered (i.e., zero-mean) Cb and Cr planes + for( int y = y0; y < y1; y++, pCb += cbs, pCr += crs ) + { + for( int x = x0; x < x1; x++ ) + { + int cb = ( 12*(int)pCb[x] - 2*((int)pCb[x-1] + (int)pCb[x+1] + (int)pCb[x-cbs] + (int)pCb[x+cbs]) - ((int)pCb[x-1-cbs] + (int)pCb[x+1-cbs] + (int)pCb[x-1+cbs] + (int)pCb[x+1+cbs]) ); + int cr = ( 12*(int)pCr[x] - 2*((int)pCr[x-1] + (int)pCr[x+1] + (int)pCr[x-crs] + (int)pCr[x+crs]) - ((int)pCr[x-1-crs] + (int)pCr[x+1-crs] + (int)pCr[x-1+crs] + (int)pCr[x+1+crs]) ); + sumCbCr += cb*cr; + } + } + + sgnFlag = ( sumCbCr < 0 ); + } + + cs.picHeader->setJointCbCrSignFlag( sgnFlag ); +} + + +void EncSlice::encodeCtus( Picture* pcPic, const bool bCompressEntireSlice, const bool bFastDeltaQP, EncLib* pEncLib ) { - //PROF_ACCUM_AND_START_NEW_SET( getProfilerCTU( pcPic, 0, 0 ), P_PIC_LEVEL ); - //PROF_START( getProfilerCTU( cs.slice->isIntra(), pcPic->scheduler.getWppThreadId() ), P_PIC_LEVEL, toWSizeIdx( cs.pcv->maxCUWidth ), toHSizeIdx( cs.pcv->maxCUHeight ) ); CodingStructure& cs = *pcPic->cs; Slice* pcSlice = cs.slice; const PreCalcValues& pcv = *cs.pcv; const uint32_t widthInCtus = pcv.widthInCtus; -#if HEVC_TILES_WPP - const TileMap& tileMap = *pcPic->tileMap; -#endif #if ENABLE_QPA const int iQPIndex = pcSlice->getSliceQpBase(); #endif -#if ENABLE_WPP_PARALLELISM - const int dataId = pcPic->scheduler.getWppDataId(); -#elif ENABLE_SPLIT_PARALLELISM +#if ENABLE_SPLIT_PARALLELISM const int dataId = 0; #endif CABACWriter* pCABACWriter = pEncLib->getCABACEncoder( PARL_PARAM0( dataId ) )->getCABACEstimator( pcSlice->getSPS() ); @@ -1606,54 +1378,42 @@ void EncSlice::encodeCtus( Picture* pcPic, const bool bCompressEntireSlice, cons RdCost* pRdCost = pEncLib->getRdCost( PARL_PARAM0( dataId ) ); EncCfg* pCfg = pEncLib; RateCtrl* pRateCtrl = pEncLib->getRateCtrl(); -#if ENABLE_WPP_PARALLELISM - // first version dont use ctx from above - pCABACWriter->initCtxModels( *pcSlice ); -#endif #if RDOQ_CHROMA_LAMBDA pTrQuant ->setLambdas( pcSlice->getLambdas() ); #else pTrQuant ->setLambda ( pcSlice->getLambdas()[0] ); #endif pRdCost ->setLambda ( pcSlice->getLambdas()[0], pcSlice->getSPS()->getBitDepths() ); +#if WCG_EXT && ER_CHROMA_QP_WCG_PPS && ENABLE_QPA + if (!pCfg->getWCGChromaQPControl().isEnabled() && pCfg->getUsePerceptQPA() && !pCfg->getUseRateCtrl()) + { + pRdCost->saveUnadjustedLambda(); + } +#endif int prevQP[2]; int currQP[2]; prevQP[0] = prevQP[1] = pcSlice->getSliceQp(); currQP[0] = currQP[1] = pcSlice->getSliceQp(); -#if HEVC_DEPENDENT_SLICES - if( !pcSlice->getDependentSliceSegmentFlag() ) - { -#endif prevQP[0] = prevQP[1] = pcSlice->getSliceQp(); -#if HEVC_DEPENDENT_SLICES - } -#endif - if ( pcSlice->getSPS()->getDisFracMmvdEnabledFlag() || + if ( pcSlice->getSPS()->getFpelMmvdEnabledFlag() || (pcSlice->getSPS()->getIBCFlag() && m_pcCuEncoder->getEncCfg()->getIBCHashSearch())) { - if (pcSlice->getSPS()->getUseReshaper() && m_pcLib->getReshaper()->getCTUFlag() && pcSlice->getSPS()->getIBCFlag()) - cs.picture->getOrigBuf(COMPONENT_Y).rspSignal(m_pcLib->getReshaper()->getFwdLUT()); - m_pcCuEncoder->getIbcHashMap().rebuildPicHashMap( cs.picture->getOrigBuf() ); - if (pcSlice->getSPS()->getUseReshaper() && m_pcLib->getReshaper()->getCTUFlag() && pcSlice->getSPS()->getIBCFlag()) - cs.picture->getOrigBuf().copyFrom(cs.picture->getTrueOrigBuf()); + m_pcCuEncoder->getIbcHashMap().rebuildPicHashMap(cs.picture->getTrueOrigBuf()); + if (m_pcCfg->getIntraPeriod() != -1) + { + int hashBlkHitPerc = m_pcCuEncoder->getIbcHashMap().calHashBlkMatchPerc(cs.area.Y()); + cs.slice->setDisableSATDForRD(hashBlkHitPerc > 59); + } } - checkDisFracMmvd( pcPic, startCtuTsAddr, boundingCtuTsAddr ); - // for every CTU in the slice segment (may terminate sooner if there is a byte limit on the slice-segment) - for( uint32_t ctuTsAddr = startCtuTsAddr; ctuTsAddr < boundingCtuTsAddr; ctuTsAddr++ ) + + // for every CTU in the slice + for( uint32_t ctuIdx = 0; ctuIdx < pcSlice->getNumCtuInSlice(); ctuIdx++ ) { -#if HEVC_TILES_WPP - const int32_t ctuRsAddr = tileMap.getCtuTsToRsAddrMap( ctuTsAddr ); -#else - const int32_t ctuRsAddr = ctuTsAddr; -#endif + const int32_t ctuRsAddr = pcSlice->getCtuAddrInSlice( ctuIdx ); -#if HEVC_TILES_WPP // update CABAC state - const uint32_t firstCtuRsAddrOfTile = tileMap.tiles[tileMap.getTileIdxMap(ctuRsAddr)].getFirstCtuRsAddr(); - const uint32_t tileXPosInCtus = firstCtuRsAddrOfTile % widthInCtus; -#endif const uint32_t ctuXPosInCtus = ctuRsAddr % widthInCtus; const uint32_t ctuYPosInCtus = ctuRsAddr / widthInCtus; @@ -1662,44 +1422,32 @@ void EncSlice::encodeCtus( Picture* pcPic, const bool bCompressEntireSlice, cons DTRACE_UPDATE( g_trace_ctx, std::make_pair( "ctu", ctuRsAddr ) ); if( pCfg->getSwitchPOC() != pcPic->poc || -1 == pCfg->getDebugCTU() ) - if ((cs.slice->getSliceType() != I_SLICE || cs.sps->getIBCFlag()) && ctuXPosInCtus == 0) + if ((cs.slice->getSliceType() != I_SLICE || cs.sps->getIBCFlag()) && cs.pps->ctuIsTileColBd( ctuXPosInCtus )) { cs.motionLut.lut.resize(0); cs.motionLut.lutIbc.resize(0); - cs.motionLut.lutShare.resize(0); - cs.motionLut.lutShareIbc.resize(0); } -#if ENABLE_WPP_PARALLELISM - pcPic->scheduler.wait( ctuXPosInCtus, ctuYPosInCtus ); -#endif -#if HEVC_TILES_WPP - if (ctuRsAddr == firstCtuRsAddrOfTile) + if (cs.pps->ctuIsTileColBd( ctuXPosInCtus ) && cs.pps->ctuIsTileRowBd( ctuYPosInCtus )) { pCABACWriter->initCtxModels( *pcSlice ); + cs.resetPrevPLT(cs.prevPLT); prevQP[0] = prevQP[1] = pcSlice->getSliceQp(); } - else if (ctuXPosInCtus == tileXPosInCtus && pEncLib->getEntropyCodingSyncEnabledFlag()) + else if (cs.pps->ctuIsTileColBd( ctuXPosInCtus ) && pEncLib->getEntropyCodingSyncEnabledFlag()) { - // reset and then update contexts to the state at the end of the top-right CTU (if within current slice and tile). + // reset and then update contexts to the state at the end of the top CTU (if within current slice and tile). pCABACWriter->initCtxModels( *pcSlice ); - if( cs.getCURestricted( pos.offset(pcv.maxCUWidth, -1), pcSlice->getIndependentSliceIdx(), tileMap.getTileIdxMap( pos ), CH_L ) ) + cs.resetPrevPLT(cs.prevPLT); + if( cs.getCURestricted( pos.offset(0, -1), pos, pcSlice->getIndependentSliceIdx(), cs.pps->getTileIdx( pos ), CH_L ) ) { - // Top-right is available, we use it. + // Top is available, we use it. pCABACWriter->getCtx() = pEncLib->m_entropyCodingSyncContextState; } prevQP[0] = prevQP[1] = pcSlice->getSliceQp(); } -#endif -#if ENABLE_WPP_PARALLELISM - if( ctuXPosInCtus == 0 && ctuYPosInCtus > 0 && widthInCtus > 1 && ( pEncLib->getNumWppThreads() > 1 || pEncLib->getEnsureWppBitEqual() ) ) - { - pCABACWriter->getCtx() = pEncLib->m_entropyCodingSyncContextStateVec[ctuYPosInCtus-1]; // last line - } -#else -#endif #if RDOQ_CHROMA_LAMBDA && ENABLE_QPA && !ENABLE_QPA_SUB_CTU double oldLambdaArray[MAX_NUM_COMPONENT] = {0.0}; @@ -1731,11 +1479,14 @@ void EncSlice::encodeCtus( Picture* pcPic, const bool bCompressEntireSlice, cons estQP = Clip3( -pcSlice->getSPS()->getQpBDOffset(CHANNEL_TYPE_LUMA), MAX_QP, estQP ); pRdCost->setLambda(estLambda, pcSlice->getSPS()->getBitDepths()); +#if WCG_EXT + pRdCost->saveUnadjustedLambda(); +#endif #if RDOQ_CHROMA_LAMBDA - // set lambda for RDOQ - const double chromaLambda = estLambda / pRdCost->getChromaWeight(); - const double lambdaArray[MAX_NUM_COMPONENT] = { estLambda, chromaLambda, chromaLambda }; + const double lambdaArray[MAX_NUM_COMPONENT] = {estLambda / m_pcRdCost->getDistortionWeight (COMPONENT_Y), + estLambda / m_pcRdCost->getDistortionWeight (COMPONENT_Cb), + estLambda / m_pcRdCost->getDistortionWeight (COMPONENT_Cr)}; pTrQuant->setLambdas( lambdaArray ); #else pTrQuant->setLambda( estLambda ); @@ -1757,8 +1508,9 @@ void EncSlice::encodeCtus( Picture* pcPic, const bool bCompressEntireSlice, cons #if !ENABLE_QPA_SUB_CTU #if RDOQ_CHROMA_LAMBDA pTrQuant->getLambdas (oldLambdaArray); // save the old lambdas - const double chromaLambda = newLambda / pRdCost->getChromaWeight(); - const double lambdaArray[MAX_NUM_COMPONENT] = {newLambda, chromaLambda, chromaLambda}; + const double lambdaArray[MAX_NUM_COMPONENT] = {newLambda / m_pcRdCost->getDistortionWeight (COMPONENT_Y), + newLambda / m_pcRdCost->getDistortionWeight (COMPONENT_Cb), + newLambda / m_pcRdCost->getDistortionWeight (COMPONENT_Cr)}; pTrQuant->setLambdas (lambdaArray); #else pTrQuant->setLambda (newLambda); @@ -1769,17 +1521,17 @@ void EncSlice::encodeCtus( Picture* pcPic, const bool bCompressEntireSlice, cons } #endif - bool updateGbiCodingOrder = cs.slice->getSliceType() == B_SLICE && ctuTsAddr == startCtuTsAddr; - if( updateGbiCodingOrder ) + bool updateBcwCodingOrder = cs.slice->getSliceType() == B_SLICE && ctuIdx == 0; + if( updateBcwCodingOrder ) { - resetGbiCodingOrder(false, cs); + resetBcwCodingOrder(false, cs); m_pcInterSearch->initWeightIdxBits(); } - if (pcSlice->getSPS()->getUseReshaper()) + if (pcSlice->getSPS()->getUseLmcs()) { m_pcCuEncoder->setDecCuReshaperInEncCU(m_pcLib->getReshaper(), pcSlice->getSPS()->getChromaFormatIdc()); -#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM +#if ENABLE_SPLIT_PARALLELISM for (int jId = 1; jId < m_pcLib->getNumCuEncStacks(); jId++) { m_pcLib->getCuEncoder(jId)->setDecCuReshaperInEncCU(m_pcLib->getReshaper(jId), pcSlice->getSPS()->getChromaFormatIdc()); @@ -1792,79 +1544,34 @@ void EncSlice::encodeCtus( Picture* pcPic, const bool bCompressEntireSlice, cons } if (pCfg->getSwitchPOC() != pcPic->poc || ctuRsAddr >= pCfg->getDebugCTU()) -#if ENABLE_WPP_PARALLELISM - pEncLib->getCuEncoder( dataId )->compressCtu( cs, ctuArea, ctuRsAddr, prevQP, currQP ); -#else m_pcCuEncoder->compressCtu( cs, ctuArea, ctuRsAddr, prevQP, currQP ); -#endif #if K0149_BLOCK_STATISTICS getAndStoreBlockStatistics(cs, ctuArea); #endif pCABACWriter->resetBits(); - pCABACWriter->coding_tree_unit( cs, ctuArea, prevQP, ctuRsAddr, true ); + pCABACWriter->coding_tree_unit( cs, ctuArea, prevQP, ctuRsAddr, true, true ); const int numberOfWrittenBits = int( pCABACWriter->getEstFracBits() >> SCALE_BITS ); - // Calculate if this CTU puts us over slice bit size. - // cannot terminate if current slice/slice-segment would be 0 Ctu in size, - const uint32_t validEndOfSliceCtuTsAddr = ctuTsAddr + (ctuTsAddr == startCtuTsAddr ? 1 : 0); - // Set slice end parameter - if(pcSlice->getSliceMode()==FIXED_NUMBER_OF_BYTES && pcSlice->getSliceBits()+numberOfWrittenBits > (pcSlice->getSliceArgument()<<3)) - { -#if HEVC_DEPENDENT_SLICES - pcSlice->setSliceSegmentCurEndCtuTsAddr(validEndOfSliceCtuTsAddr); -#endif - pcSlice->setSliceCurEndCtuTsAddr(validEndOfSliceCtuTsAddr); - boundingCtuTsAddr=validEndOfSliceCtuTsAddr; - } -#if HEVC_DEPENDENT_SLICES - else if((!bCompressEntireSlice) && pcSlice->getSliceSegmentMode()==FIXED_NUMBER_OF_BYTES && pcSlice->getSliceSegmentBits()+numberOfWrittenBits > (pcSlice->getSliceSegmentArgument()<<3)) - { - pcSlice->setSliceSegmentCurEndCtuTsAddr(validEndOfSliceCtuTsAddr); - boundingCtuTsAddr=validEndOfSliceCtuTsAddr; - } -#endif - if (boundingCtuTsAddr <= ctuTsAddr) - { - break; - } - -#if ENABLE_WPP_PARALLELISM || ENABLE_SPLIT_PARALLELISM +#if ENABLE_SPLIT_PARALLELISM #pragma omp critical #endif pcSlice->setSliceBits( ( uint32_t ) ( pcSlice->getSliceBits() + numberOfWrittenBits ) ); -#if ENABLE_WPP_PARALLELISM || ENABLE_SPLIT_PARALLELISM +#if ENABLE_SPLIT_PARALLELISM #pragma omp critical #endif -#if HEVC_DEPENDENT_SLICES - pcSlice->setSliceSegmentBits( pcSlice->getSliceSegmentBits() + numberOfWrittenBits ); -#endif -#if HEVC_TILES_WPP - // Store probabilities of second CTU in line into buffer - used only if wavefront-parallel-processing is enabled. - if( ctuXPosInCtus == tileXPosInCtus + 1 && pEncLib->getEntropyCodingSyncEnabledFlag() ) + // Store probabilities of first CTU in line into buffer - used only if wavefront-parallel-processing is enabled. + if( cs.pps->ctuIsTileColBd( ctuXPosInCtus ) && pEncLib->getEntropyCodingSyncEnabledFlag() ) { pEncLib->m_entropyCodingSyncContextState = pCABACWriter->getCtx(); } -#endif -#if ENABLE_WPP_PARALLELISM - if( ctuXPosInCtus == 1 && ( pEncLib->getNumWppThreads() > 1 || pEncLib->getEnsureWppBitEqual() ) ) - { - pEncLib->m_entropyCodingSyncContextStateVec[ctuYPosInCtus] = pCABACWriter->getCtx(); - } -#endif -#if !ENABLE_WPP_PARALLELISM int actualBits = int(cs.fracBits >> SCALE_BITS); actualBits -= (int)m_uiPicTotalBits; -#endif if ( pCfg->getUseRateCtrl() ) { -#if ENABLE_WPP_PARALLELISM - int actualBits = int( cs.fracBits >> SCALE_BITS ); - actualBits -= (int)m_uiPicTotalBits; -#endif int actualQP = g_RCInvalidQPValue; double actualLambda = pRdCost->getLambda(); int numberOfEffectivePixels = 0; @@ -1910,13 +1617,8 @@ void EncSlice::encodeCtus( Picture* pcPic, const bool bCompressEntireSlice, cons } #endif -#if !ENABLE_WPP_PARALLELISM m_uiPicTotalBits += actualBits; m_uiPicDist = cs.dist; -#endif -#if ENABLE_WPP_PARALLELISM - pcPic->scheduler.setReady( ctuXPosInCtus, ctuYPosInCtus ); -#endif } // this is wpp exclusive section @@ -1930,20 +1632,7 @@ void EncSlice::encodeSlice ( Picture* pcPic, OutputBitstream* pcSubstreams, ui { Slice *const pcSlice = pcPic->slices[getSliceSegmentIdx()]; -#if HEVC_TILES_WPP - const TileMap& tileMap = *pcPic->tileMap; -#endif -#if HEVC_DEPENDENT_SLICES - const uint32_t startCtuTsAddr = pcSlice->getSliceSegmentCurStartCtuTsAddr(); - const uint32_t boundingCtuTsAddr = pcSlice->getSliceSegmentCurEndCtuTsAddr(); - const bool depSliceSegmentsEnabled = pcSlice->getPPS()->getDependentSliceSegmentsEnabledFlag(); -#else - const uint32_t startCtuTsAddr = pcSlice->getSliceCurStartCtuTsAddr(); - const uint32_t boundingCtuTsAddr = pcSlice->getSliceCurEndCtuTsAddr(); -#endif -#if HEVC_TILES_WPP const bool wavefrontsEnabled = pcSlice->getPPS()->getEntropyCodingSyncEnabledFlag(); -#endif // setup coding structure @@ -1954,65 +1643,18 @@ void EncSlice::encodeSlice ( Picture* pcPic, OutputBitstream* pcSubstreams, ui DTRACE( g_trace_ctx, D_HEADER, "=========== POC: %d ===========\n", pcSlice->getPOC() ); -#if HEVC_DEPENDENT_SLICES - if (depSliceSegmentsEnabled) - { -#if HEVC_TILES_WPP - // modify initial contexts with previous slice segment if this is a dependent slice. - const uint32_t ctuRsAddr = tileMap.getCtuTsToRsAddrMap( startCtuTsAddr ); - const uint32_t currentTileIdx = tileMap.getTileIdxMap(ctuRsAddr); - const Tile& currentTile = tileMap.tiles[currentTileIdx]; - const uint32_t firstCtuRsAddrOfTile = currentTile.getFirstCtuRsAddr(); - - if( pcSlice->getDependentSliceSegmentFlag() && ctuRsAddr != firstCtuRsAddrOfTile ) - { - if( currentTile.getTileWidthInCtus() >= 2 || !wavefrontsEnabled ) - { - m_CABACWriter->getCtx() = m_lastSliceSegmentEndContextState; - } - } -#else - // KJS: not sure if this works (but both dep slices and tiles shall be removed in VTM, so this code should not be used) - if( pcSlice->getDependentSliceSegmentFlag() && ctuRsAddr != startCtuTsAddr ) - { - if( pcPic->cs->pcv->widthInCtus >= 2 || !m_pcCfg->getEntropyCodingSyncEnabledFlag() ) - { - m_CABACWriter->getCtx() = m_lastSliceSegmentEndContextState; - } -#endif - } - - if( !pcSlice->getDependentSliceSegmentFlag() ) - { -#endif pcPic->m_prevQP[0] = pcPic->m_prevQP[1] = pcSlice->getSliceQp(); -#if HEVC_DEPENDENT_SLICES - } -#endif const PreCalcValues& pcv = *cs.pcv; const uint32_t widthInCtus = pcv.widthInCtus; + uint32_t uiSubStrm = 0; - // for every CTU in the slice segment... - - for( uint32_t ctuTsAddr = startCtuTsAddr; ctuTsAddr < boundingCtuTsAddr; ctuTsAddr++ ) + // for every CTU in the slice... + for( uint32_t ctuIdx = 0; ctuIdx < pcSlice->getNumCtuInSlice(); ctuIdx++ ) { -#if HEVC_TILES_WPP - const uint32_t ctuRsAddr = tileMap.getCtuTsToRsAddrMap(ctuTsAddr); - const Tile& currentTile = tileMap.tiles[tileMap.getTileIdxMap(ctuRsAddr)]; - const uint32_t firstCtuRsAddrOfTile = currentTile.getFirstCtuRsAddr(); - const uint32_t tileXPosInCtus = firstCtuRsAddrOfTile % widthInCtus; - const uint32_t tileYPosInCtus = firstCtuRsAddrOfTile / widthInCtus; -#else - const uint32_t ctuRsAddr = ctuTsAddr; -#endif + const uint32_t ctuRsAddr = pcSlice->getCtuAddrInSlice( ctuIdx ); const uint32_t ctuXPosInCtus = ctuRsAddr % widthInCtus; const uint32_t ctuYPosInCtus = ctuRsAddr / widthInCtus; -#if HEVC_TILES_WPP - const uint32_t uiSubStrm = tileMap.getSubstreamForCtuAddr(ctuRsAddr, true, pcSlice); -#else - const uint32_t uiSubStrm = 0; -#endif DTRACE_UPDATE( g_trace_ctx, std::make_pair( "ctu", ctuRsAddr ) ); @@ -2020,82 +1662,66 @@ void EncSlice::encodeSlice ( Picture* pcPic, OutputBitstream* pcSubstreams, ui const UnitArea ctuArea (cs.area.chromaFormat, Area(pos.x, pos.y, pcv.maxCUWidth, pcv.maxCUHeight)); m_CABACWriter->initBitstream( &pcSubstreams[uiSubStrm] ); -#if HEVC_TILES_WPP // set up CABAC contexts' state for this CTU - if (ctuRsAddr == firstCtuRsAddrOfTile) + if ( cs.pps->ctuIsTileColBd( ctuXPosInCtus ) && cs.pps->ctuIsTileRowBd( ctuYPosInCtus ) ) { - if (ctuTsAddr != startCtuTsAddr) // if it is the first CTU, then the entropy coder has already been reset + if (ctuIdx != 0) // if it is the first CTU, then the entropy coder has already been reset { m_CABACWriter->initCtxModels( *pcSlice ); + cs.resetPrevPLT(cs.prevPLT); } } - else if (ctuXPosInCtus == tileXPosInCtus && wavefrontsEnabled) + else if (cs.pps->ctuIsTileColBd( ctuXPosInCtus ) && wavefrontsEnabled) { - // Synchronize cabac probabilities with upper-right CTU if it's available and at the start of a line. - if (ctuTsAddr != startCtuTsAddr) // if it is the first CTU, then the entropy coder has already been reset + // Synchronize cabac probabilities with upper CTU if it's available and at the start of a line. + if (ctuIdx != 0) // if it is the first CTU, then the entropy coder has already been reset { m_CABACWriter->initCtxModels( *pcSlice ); + cs.resetPrevPLT(cs.prevPLT); } - if( cs.getCURestricted( pos.offset( pcv.maxCUWidth, -1 ), pcSlice->getIndependentSliceIdx(), tileMap.getTileIdxMap( pos ), CH_L ) ) + if( cs.getCURestricted( pos.offset( 0, -1 ), pos, pcSlice->getIndependentSliceIdx(), cs.pps->getTileIdx( pos ), CH_L ) ) { - // Top-right is available, so use it. + // Top is available, so use it. m_CABACWriter->getCtx() = m_entropyCodingSyncContextState; } } -#endif - bool updateGbiCodingOrder = cs.slice->getSliceType() == B_SLICE && ctuTsAddr == startCtuTsAddr; - if( updateGbiCodingOrder ) + bool updateBcwCodingOrder = cs.slice->getSliceType() == B_SLICE && ctuIdx == 0; + if( updateBcwCodingOrder ) { - resetGbiCodingOrder(false, cs); + resetBcwCodingOrder(false, cs); } m_CABACWriter->coding_tree_unit( cs, ctuArea, pcPic->m_prevQP, ctuRsAddr ); -#if HEVC_TILES_WPP - // store probabilities of second CTU in line into buffer - if( ctuXPosInCtus == tileXPosInCtus + 1 && wavefrontsEnabled ) + // store probabilities of first CTU in line into buffer + if( cs.pps->ctuIsTileColBd( ctuXPosInCtus ) && wavefrontsEnabled ) { m_entropyCodingSyncContextState = m_CABACWriter->getCtx(); } -#endif // terminate the sub-stream, if required (end of slice-segment, end of tile, end of wavefront-CTU-row): -#if HEVC_TILES_WPP - if( ctuTsAddr + 1 == boundingCtuTsAddr || - ( ctuXPosInCtus + 1 == tileXPosInCtus + currentTile.getTileWidthInCtus () && - ( ctuYPosInCtus + 1 == tileYPosInCtus + currentTile.getTileHeightInCtus() || wavefrontsEnabled ) - ) - ) -#else - if( ctuTsAddr + 1 == boundingCtuTsAddr ) -#endif + bool isLastCTUsinSlice = ctuIdx == pcSlice->getNumCtuInSlice()-1; + bool isLastCTUinTile = !isLastCTUsinSlice && cs.pps->getTileIdx( ctuRsAddr ) != cs.pps->getTileIdx( pcSlice->getCtuAddrInSlice( ctuIdx + 1 ) ); + bool isLastCTUinWPP = !isLastCTUsinSlice && !isLastCTUinTile && wavefrontsEnabled && cs.pps->ctuIsTileColBd( pcSlice->getCtuAddrInSlice( ctuIdx + 1 ) % cs.pps->getPicWidthInCtu() ); + if (isLastCTUsinSlice || isLastCTUinTile || isLastCTUinWPP ) // this the the last CTU of the slice, tile, or WPP { - m_CABACWriter->end_of_slice(); + m_CABACWriter->end_of_slice(); // end_of_slice_one_bit, end_of_tile_one_bit, or end_of_subset_one_bit // Byte-alignment in slice_data() when new tile pcSubstreams[uiSubStrm].writeByteAlignment(); - // write sub-stream size - if( ctuTsAddr + 1 != boundingCtuTsAddr ) + if (!isLastCTUsinSlice) //Byte alignment only when it is not the last substream in the slice { - pcSlice->addSubstreamSize( (pcSubstreams[uiSubStrm].getNumberOfWrittenBits() >> 3) + pcSubstreams[uiSubStrm].countStartCodeEmulations() ); + // write sub-stream size + pcSlice->addSubstreamSize((pcSubstreams[uiSubStrm].getNumberOfWrittenBits() >> 3) + pcSubstreams[uiSubStrm].countStartCodeEmulations()); } + uiSubStrm++; } } // CTU-loop -#if HEVC_DEPENDENT_SLICES - if( depSliceSegmentsEnabled ) - { - m_lastSliceSegmentEndContextState = m_CABACWriter->getCtx();//ctx end of dep.slice - } -#endif -#if HEVC_DEPENDENT_SLICES - if (pcSlice->getPPS()->getCabacInitPresentFlag() && !pcSlice->getPPS()->getDependentSliceSegmentsEnabledFlag()) -#else if(pcSlice->getPPS()->getCabacInitPresentFlag()) -#endif { m_encCABACTableIdx = m_CABACWriter->getCtxInitId( *pcSlice ); } @@ -2107,160 +1733,6 @@ void EncSlice::encodeSlice ( Picture* pcPic, OutputBitstream* pcSubstreams, ui } -#if HEVC_TILES_WPP -void EncSlice::calculateBoundingCtuTsAddrForSlice(uint32_t &startCtuTSAddrSlice, uint32_t &boundingCtuTSAddrSlice, bool &haveReachedTileBoundary, - Picture* pcPic, const int sliceMode, const int sliceArgument) -#else -void EncSlice::calculateBoundingCtuTsAddrForSlice(uint32_t &startCtuTSAddrSlice, uint32_t &boundingCtuTSAddrSlice, - Picture* pcPic, const int sliceMode, const int sliceArgument) -#endif -{ -#if HEVC_TILES_WPP - Slice* pcSlice = pcPic->slices[getSliceSegmentIdx()]; - const TileMap& tileMap = *( pcPic->tileMap ); - const PPS &pps = *( pcSlice->getPPS() ); -#endif - const uint32_t numberOfCtusInFrame = pcPic->cs->pcv->sizeInCtus; - boundingCtuTSAddrSlice=0; -#if HEVC_TILES_WPP - haveReachedTileBoundary=false; -#endif - - switch (sliceMode) - { - case FIXED_NUMBER_OF_CTU: - { - uint32_t ctuAddrIncrement = sliceArgument; - boundingCtuTSAddrSlice = ((startCtuTSAddrSlice + ctuAddrIncrement) < numberOfCtusInFrame) ? (startCtuTSAddrSlice + ctuAddrIncrement) : numberOfCtusInFrame; - } - break; - case FIXED_NUMBER_OF_BYTES: - boundingCtuTSAddrSlice = numberOfCtusInFrame; // This will be adjusted later if required. - break; -#if HEVC_TILES_WPP - case FIXED_NUMBER_OF_TILES: - { - const uint32_t tileIdx = tileMap.getTileIdxMap( tileMap.getCtuTsToRsAddrMap(startCtuTSAddrSlice) ); - const uint32_t tileTotalCount = (pps.getNumTileColumnsMinus1()+1) * (pps.getNumTileRowsMinus1()+1); - uint32_t ctuAddrIncrement = 0; - - for(uint32_t tileIdxIncrement = 0; tileIdxIncrement < sliceArgument; tileIdxIncrement++) - { - if((tileIdx + tileIdxIncrement) < tileTotalCount) - { - uint32_t tileWidthInCtus = tileMap.tiles[tileIdx + tileIdxIncrement].getTileWidthInCtus(); - uint32_t tileHeightInCtus = tileMap.tiles[tileIdx + tileIdxIncrement].getTileHeightInCtus(); - ctuAddrIncrement += (tileWidthInCtus * tileHeightInCtus); - } - } - - boundingCtuTSAddrSlice = ((startCtuTSAddrSlice + ctuAddrIncrement) < numberOfCtusInFrame) ? (startCtuTSAddrSlice + ctuAddrIncrement) : numberOfCtusInFrame; - } - break; -#endif - default: - boundingCtuTSAddrSlice = numberOfCtusInFrame; - break; - } - -#if HEVC_TILES_WPP - // Adjust for tiles and wavefronts. - const bool wavefrontsAreEnabled = pps.getEntropyCodingSyncEnabledFlag(); - - if ((sliceMode == FIXED_NUMBER_OF_CTU || sliceMode == FIXED_NUMBER_OF_BYTES) && - (pps.getNumTileRowsMinus1() > 0 || pps.getNumTileColumnsMinus1() > 0)) - { - const uint32_t ctuRsAddr = tileMap.getCtuTsToRsAddrMap(startCtuTSAddrSlice); - const uint32_t startTileIdx = tileMap.getTileIdxMap(ctuRsAddr); - const Tile& startingTile = tileMap.tiles[startTileIdx]; - const uint32_t tileStartTsAddr = tileMap.getCtuRsToTsAddrMap(startingTile.getFirstCtuRsAddr()); - const uint32_t tileStartWidth = startingTile.getTileWidthInCtus(); - const uint32_t tileStartHeight = startingTile.getTileHeightInCtus(); - const uint32_t tileLastTsAddr_excl = tileStartTsAddr + tileStartWidth*tileStartHeight; - const uint32_t tileBoundingCtuTsAddrSlice = tileLastTsAddr_excl; - const uint32_t ctuColumnOfStartingTile = ((startCtuTSAddrSlice-tileStartTsAddr)%tileStartWidth); - if (wavefrontsAreEnabled && ctuColumnOfStartingTile!=0) - { - // WPP: if a slice does not start at the beginning of a CTB row, it must end within the same CTB row - const uint32_t numberOfCTUsToEndOfRow = tileStartWidth - ctuColumnOfStartingTile; - const uint32_t wavefrontTileBoundingCtuAddrSlice = startCtuTSAddrSlice + numberOfCTUsToEndOfRow; - if (wavefrontTileBoundingCtuAddrSlice < boundingCtuTSAddrSlice) - { - boundingCtuTSAddrSlice = wavefrontTileBoundingCtuAddrSlice; - } - } - - if (tileBoundingCtuTsAddrSlice < boundingCtuTSAddrSlice) - { - boundingCtuTSAddrSlice = tileBoundingCtuTsAddrSlice; - haveReachedTileBoundary = true; - } - } - else if ((sliceMode == FIXED_NUMBER_OF_CTU || sliceMode == FIXED_NUMBER_OF_BYTES) && wavefrontsAreEnabled && ((startCtuTSAddrSlice % pcPic->cs->pcv->widthInCtus) != 0)) - { - // Adjust for wavefronts (no tiles). - // WPP: if a slice does not start at the beginning of a CTB row, it must end within the same CTB row - boundingCtuTSAddrSlice = std::min(boundingCtuTSAddrSlice, startCtuTSAddrSlice - (startCtuTSAddrSlice % pcPic->cs->pcv->widthInCtus) + (pcPic->cs->pcv->widthInCtus)); - } -#endif -} - -/** Determines the starting and bounding CTU address of current slice / dependent slice - * \param [out] startCtuTsAddr - * \param [out] boundingCtuTsAddr - * \param [in] pcPic - - * Updates startCtuTsAddr, boundingCtuTsAddr with appropriate CTU address - */ -void EncSlice::xDetermineStartAndBoundingCtuTsAddr ( uint32_t& startCtuTsAddr, uint32_t& boundingCtuTsAddr, Picture* pcPic ) -{ - Slice* pcSlice = pcPic->slices[getSliceSegmentIdx()]; - - // Non-dependent slice - uint32_t startCtuTsAddrSlice = pcSlice->getSliceCurStartCtuTsAddr(); -#if HEVC_TILES_WPP - bool haveReachedTileBoundarySlice = false; -#endif - uint32_t boundingCtuTsAddrSlice; -#if HEVC_TILES_WPP - calculateBoundingCtuTsAddrForSlice(startCtuTsAddrSlice, boundingCtuTsAddrSlice, haveReachedTileBoundarySlice, pcPic, - m_pcCfg->getSliceMode(), m_pcCfg->getSliceArgument()); -#else - calculateBoundingCtuTsAddrForSlice(startCtuTsAddrSlice, boundingCtuTsAddrSlice, pcPic, - m_pcCfg->getSliceMode(), m_pcCfg->getSliceArgument()); -#endif - pcSlice->setSliceCurEndCtuTsAddr( boundingCtuTsAddrSlice ); - pcSlice->setSliceCurStartCtuTsAddr( startCtuTsAddrSlice ); - -#if HEVC_DEPENDENT_SLICES - // Dependent slice - uint32_t startCtuTsAddrSliceSegment = pcSlice->getSliceSegmentCurStartCtuTsAddr(); -#if HEVC_TILES_WPP - bool haveReachedTileBoundarySliceSegment = false; -#endif - uint32_t boundingCtuTsAddrSliceSegment; -#if HEVC_TILES_WPP - calculateBoundingCtuTsAddrForSlice(startCtuTsAddrSliceSegment, boundingCtuTsAddrSliceSegment, haveReachedTileBoundarySliceSegment, pcPic, - m_pcCfg->getSliceSegmentMode(), m_pcCfg->getSliceSegmentArgument()); -#else - calculateBoundingCtuTsAddrForSlice(startCtuTsAddrSliceSegment, boundingCtuTsAddrSliceSegment, pcPic, - m_pcCfg->getSliceSegmentMode(), m_pcCfg->getSliceSegmentArgument()); -#endif - if (boundingCtuTsAddrSliceSegment>boundingCtuTsAddrSlice) - { - boundingCtuTsAddrSliceSegment = boundingCtuTsAddrSlice; - } - pcSlice->setSliceSegmentCurEndCtuTsAddr( boundingCtuTsAddrSliceSegment ); - pcSlice->setSliceSegmentCurStartCtuTsAddr(startCtuTsAddrSliceSegment); - - // Make a joint decision based on reconstruction and dependent slice bounds - startCtuTsAddr = std::max(startCtuTsAddrSlice, startCtuTsAddrSliceSegment); - boundingCtuTsAddr = boundingCtuTsAddrSliceSegment; -#else - startCtuTsAddr = startCtuTsAddrSlice; - boundingCtuTsAddr = boundingCtuTsAddrSlice; -#endif -} double EncSlice::xGetQPValueAccordingToLambda ( double lambda ) { diff --git a/source/Lib/EncoderLib/EncSlice.h b/source/Lib/EncoderLib/EncSlice.h index 3b802180ef35b0b7902c35a3710e2374bfc1c54c..ed88068075c5a87db40fb86e6750f62e394f6937 100644 --- a/source/Lib/EncoderLib/EncSlice.h +++ b/source/Lib/EncoderLib/EncSlice.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -90,33 +90,20 @@ private: std::vector<int> m_viRdPicQp; ///< array of picture QP candidates (int-type) RateCtrl* m_pcRateCtrl; ///< Rate control manager uint32_t m_uiSliceSegmentIdx; -#if HEVC_DEPENDENT_SLICES - Ctx m_lastSliceSegmentEndContextState; ///< context storage for state at the end of the previous slice-segment (used for dependent slices only). -#endif -#if HEVC_TILES_WPP Ctx m_entropyCodingSyncContextState; ///< context storage for state of contexts at the wavefront/WPP/entropy-coding-sync second CTU of tile-row -#endif SliceType m_encCABACTableIdx; -#if SHARP_LUMA_DELTA_QP +#if SHARP_LUMA_DELTA_QP || ENABLE_QPA_SUB_CTU int m_gopID; #endif -#if SHARP_LUMA_DELTA_QP public: - int getGopId() const { return m_gopID; } - double calculateLambda( const Slice* slice, const int GOPid, const int depth, const double refQP, const double dQP, int &iQP ); - void setUpLambda( Slice* slice, const double dLambda, int iQP ); - -private: + double initializeLambda(const Slice* slice, const int GOPid, const int refQP, const double dQP); // called by calculateLambda() and updateLambda() +#if SHARP_LUMA_DELTA_QP || ENABLE_QPA_SUB_CTU + int getGopId() const { return m_gopID; } + double calculateLambda( const Slice* slice, const int GOPid, const double refQP, const double dQP, int &iQP ); #endif -#if HEVC_TILES_WPP - void calculateBoundingCtuTsAddrForSlice( uint32_t &startCtuTSAddrSlice, uint32_t &boundingCtuTSAddrSlice, bool &haveReachedTileBoundary, Picture* pcPic, const int sliceMode, const int sliceArgument ); -#else - void calculateBoundingCtuTsAddrForSlice( uint32_t &startCtuTSAddrSlice, uint32_t &boundingCtuTSAddrSlice, Picture* pcPic, const int sliceMode, const int sliceArgument ); -#endif - + void setUpLambda( Slice* slice, const double dLambda, int iQP ); -public: #if ENABLE_QPA int m_adaptedLumaQP; @@ -142,17 +129,14 @@ public: void calCostSliceI ( Picture* pcPic ); void encodeSlice ( Picture* pcPic, OutputBitstream* pcSubstreams, uint32_t &numBinsCoded ); -#if ENABLE_WPP_PARALLELISM - static -#endif - void encodeCtus ( Picture* pcPic, const bool bCompressEntireSlice, const bool bFastDeltaQP, uint32_t startCtuTsAddr, uint32_t boundingCtuTsAddr, EncLib* pcEncLib ); + void encodeCtus ( Picture* pcPic, const bool bCompressEntireSlice, const bool bFastDeltaQP, EncLib* pcEncLib ); void checkDisFracMmvd ( Picture* pcPic, uint32_t startCtuTsAddr, uint32_t boundingCtuTsAddr ); + void setJointCbCrModes( CodingStructure& cs, const Position topLeftLuma, const Size sizeLuma ); // misc. functions void setSearchRange ( Slice* pcSlice ); ///< set ME range adaptively EncCu* getCUEncoder () { return m_pcCuEncoder; } ///< CU encoder - void xDetermineStartAndBoundingCtuTsAddr ( uint32_t& startCtuTsAddr, uint32_t& boundingCtuTsAddr, Picture* pcPic ); uint32_t getSliceSegmentIdx () { return m_uiSliceSegmentIdx; } void setSliceSegmentIdx (uint32_t i) { m_uiSliceSegmentIdx = i; } diff --git a/source/Lib/EncoderLib/EncTemporalFilter.cpp b/source/Lib/EncoderLib/EncTemporalFilter.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c9c74336ff102cc7074193b3fbf0262267fea1ab --- /dev/null +++ b/source/Lib/EncoderLib/EncTemporalFilter.cpp @@ -0,0 +1,626 @@ +/* The copyright in this software is being made available under the BSD +* License, included below. This software may be subject to other third party +* and contributor rights, including patent rights, and no such rights are +* granted under this license. +* +* Copyright (c) 2010-2020, ITU/ISO/IEC +* All rights reserved. +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are met: +* +* * Redistributions of source code must retain the above copyright notice, +* this list of conditions and the following disclaimer. +* * Redistributions in binary form must reproduce the above copyright notice, +* this list of conditions and the following disclaimer in the documentation +* and/or other materials provided with the distribution. +* * Neither the name of the ITU/ISO/IEC nor the names of its contributors may +* be used to endorse or promote products derived from this software without +* specific prior written permission. +* +* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS +* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +* THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/** \file EncTemporalFilter.cpp +\brief EncTemporalFilter class +*/ + +#include "EncTemporalFilter.h" +#include <math.h> + + +// ==================================================================================================================== +// Constructor / destructor / initialization / destroy +// ==================================================================================================================== + +const int EncTemporalFilter::m_range = 2; +const double EncTemporalFilter::m_chromaFactor = 0.55; +const double EncTemporalFilter::m_sigmaMultiplier = 9.0; +const double EncTemporalFilter::m_sigmaZeroPoint = 10.0; +const int EncTemporalFilter::m_motionVectorFactor = 16; +const int EncTemporalFilter::m_padding = 128; +const int EncTemporalFilter::m_interpolationFilter[16][8] = +{ + { 0, 0, 0, 64, 0, 0, 0, 0 }, //0 + { 0, 1, -3, 64, 4, -2, 0, 0 }, //1 -->--> + { 0, 1, -6, 62, 9, -3, 1, 0 }, //2 --> + { 0, 2, -8, 60, 14, -5, 1, 0 }, //3 -->--> + { 0, 2, -9, 57, 19, -7, 2, 0 }, //4 + { 0, 3, -10, 53, 24, -8, 2, 0 }, //5 -->--> + { 0, 3, -11, 50, 29, -9, 2, 0 }, //6 --> + { 0, 3, -11, 44, 35, -10, 3, 0 }, //7 -->--> + { 0, 1, -7, 38, 38, -7, 1, 0 }, //8 + { 0, 3, -10, 35, 44, -11, 3, 0 }, //9 -->--> + { 0, 2, -9, 29, 50, -11, 3, 0 }, //10--> + { 0, 2, -8, 24, 53, -10, 3, 0 }, //11-->--> + { 0, 2, -7, 19, 57, -9, 2, 0 }, //12 + { 0, 1, -5, 14, 60, -8, 2, 0 }, //13-->--> + { 0, 1, -3, 9, 62, -6, 1, 0 }, //14--> + { 0, 0, -2, 4, 64, -3, 1, 0 } //15-->--> +}; + +const double EncTemporalFilter::m_refStrengths[3][2] = +{ // abs(POC offset) + // 1, 2 + {0.85, 0.60}, // m_range * 2 + {1.20, 1.00}, // m_range + {0.30, 0.30} // otherwise +}; + +EncTemporalFilter::EncTemporalFilter() : + m_FrameSkip(0), + m_chromaFormatIDC(NUM_CHROMA_FORMAT), + m_sourceWidth(0), + m_sourceHeight(0), + m_QP(0), + m_clipInputVideoToRec709Range(false), + m_inputColourSpaceConvert(NUMBER_INPUT_COLOUR_SPACE_CONVERSIONS) +{} + +void EncTemporalFilter::init(const int frameSkip, + const int inputBitDepth[MAX_NUM_CHANNEL_TYPE], + const int msbExtendedBitDepth[MAX_NUM_CHANNEL_TYPE], + const int internalBitDepth[MAX_NUM_CHANNEL_TYPE], + const int width, + const int height, + const int *pad, + const bool rec709, + const std::string &filename, + const ChromaFormat inputChromaFormatIDC, + const InputColourSpaceConversion colorSpaceConv, + const int qp, + const std::map<int, double> &temporalFilterStrengths, + const bool gopBasedTemporalFilterFutureReference) +{ + m_FrameSkip = frameSkip; + for (int i = 0; i < MAX_NUM_CHANNEL_TYPE; i++) + { + m_inputBitDepth[i] = inputBitDepth[i]; + m_MSBExtendedBitDepth[i] = msbExtendedBitDepth[i]; + m_internalBitDepth[i] = internalBitDepth[i]; + } + + m_sourceWidth = width; + m_sourceHeight = height; + for (int i = 0; i < 2; i++) + { + m_pad[i] = pad[i]; + } + m_clipInputVideoToRec709Range = rec709; + m_inputFileName = filename; + m_chromaFormatIDC = inputChromaFormatIDC; + m_inputColourSpaceConvert = colorSpaceConv; + m_area = Area(0, 0, width, height); + m_QP = qp; + m_temporalFilterStrengths = temporalFilterStrengths; + m_gopBasedTemporalFilterFutureReference = gopBasedTemporalFilterFutureReference; +} + +// ==================================================================================================================== +// Public member functions +// ==================================================================================================================== + +bool EncTemporalFilter::filter(PelStorage *orgPic, int receivedPoc) +{ + bool isFilterThisFrame = false; + if (m_QP >= 17) // disable filter for QP < 17 + { + for (map<int, double>::iterator it = m_temporalFilterStrengths.begin(); it != m_temporalFilterStrengths.end(); ++it) + { + int filteredFrame = it->first; + if (receivedPoc % filteredFrame == 0) + { + isFilterThisFrame = true; + break; + } + } + } + + if (isFilterThisFrame) + { + int offset = m_FrameSkip; + VideoIOYuv yuvFrames; + yuvFrames.open(m_inputFileName, false, m_inputBitDepth, m_MSBExtendedBitDepth, m_internalBitDepth); + yuvFrames.skipFrames(std::max(offset + receivedPoc - m_range, 0), m_sourceWidth - m_pad[0], m_sourceHeight - m_pad[1], m_chromaFormatIDC); + + + std::deque<TemporalFilterSourcePicInfo> srcFrameInfo; + + int firstFrame = receivedPoc + offset - m_range; + int lastFrame = receivedPoc + offset + m_range; + if (!m_gopBasedTemporalFilterFutureReference) + { + lastFrame = receivedPoc + offset - 1; + } + int origOffset = -m_range; + + // subsample original picture so it only needs to be done once + PelStorage origPadded; + + origPadded.create(m_chromaFormatIDC, m_area, 0, m_padding); + origPadded.copyFrom(*orgPic); + origPadded.extendBorderPel(m_padding, m_padding); + + PelStorage origSubsampled2; + PelStorage origSubsampled4; + + subsampleLuma(origPadded, origSubsampled2); + subsampleLuma(origSubsampled2, origSubsampled4); + + // determine motion vectors + for (int poc = firstFrame; poc <= lastFrame; poc++) + { + if (poc < 0) + { + origOffset++; + continue; // frame not available + } + else if (poc == offset + receivedPoc) + { // hop over frame that will be filtered + yuvFrames.skipFrames(1, m_sourceWidth - m_pad[0], m_sourceHeight - m_pad[1], m_chromaFormatIDC); + origOffset++; + continue; + } + srcFrameInfo.push_back(TemporalFilterSourcePicInfo()); + TemporalFilterSourcePicInfo &srcPic=srcFrameInfo.back(); + + PelStorage dummyPicBufferTO; // Only used temporary in yuvFrames.read + srcPic.picBuffer.create(m_chromaFormatIDC, m_area, 0, m_padding); + dummyPicBufferTO.create(m_chromaFormatIDC, m_area, 0, m_padding); + if (!yuvFrames.read(srcPic.picBuffer, dummyPicBufferTO, m_inputColourSpaceConvert, m_pad, m_chromaFormatIDC, m_clipInputVideoToRec709Range)) + { + return false; // eof or read fail + } + srcPic.picBuffer.extendBorderPel(m_padding, m_padding); + srcPic.mvs.allocate(m_sourceWidth / 4, m_sourceHeight / 4); + + motionEstimation(srcPic.mvs, origPadded, srcPic.picBuffer, origSubsampled2, origSubsampled4); + srcPic.origOffset = origOffset; + origOffset++; + } + + // filter + PelStorage newOrgPic; + newOrgPic.create(m_chromaFormatIDC, m_area, 0, m_padding); + double overallStrength = -1.0; + for (map<int, double>::iterator it = m_temporalFilterStrengths.begin(); it != m_temporalFilterStrengths.end(); ++it) + { + int frame = it->first; + double strength = it->second; + if (receivedPoc % frame == 0) + { + overallStrength = strength; + } + } + + bilateralFilter(origPadded, srcFrameInfo, newOrgPic, overallStrength); + + // move filtered to orgPic + orgPic->copyFrom(newOrgPic); + + yuvFrames.close(); + return true; + } + return false; +} + +// ==================================================================================================================== +// Private member functions +// ==================================================================================================================== + +void EncTemporalFilter::subsampleLuma(const PelStorage &input, PelStorage &output, const int factor) const +{ + const int newWidth = input.Y().width / factor; + const int newHeight = input.Y().height / factor; + output.create(m_chromaFormatIDC, Area(0, 0, newWidth, newHeight), 0, m_padding); + + const Pel* srcRow = input.Y().buf; + const int srcStride = input.Y().stride; + Pel *dstRow = output.Y().buf; + const int dstStride = output.Y().stride; + + for (int y = 0; y < newHeight; y++, srcRow+=factor*srcStride, dstRow+=dstStride) + { + const Pel *inRow = srcRow; + const Pel *inRowBelow = srcRow+srcStride; + Pel *target = dstRow; + + for (int x = 0; x < newWidth; x++) + { + target[x] = (inRow[0] + inRowBelow[0] + inRow[1] + inRowBelow[1] + 2) >> 2; + inRow += 2; + inRowBelow += 2; + } + } + output.extendBorderPel(m_padding, m_padding); +} + +int EncTemporalFilter::motionErrorLuma(const PelStorage &orig, + const PelStorage &buffer, + const int x, + const int y, + int dx, + int dy, + const int bs, + const int besterror = 8 * 8 * 1024 * 1024) const +{ + const Pel* origOrigin = orig.Y().buf; + const int origStride = orig.Y().stride; + const Pel *buffOrigin = buffer.Y().buf; + const int buffStride = buffer.Y().stride; + + int error = 0;// dx * 10 + dy * 10; + if (((dx | dy) & 0xF) == 0) + { + dx /= m_motionVectorFactor; + dy /= m_motionVectorFactor; + for (int y1 = 0; y1 < bs; y1++) + { + const Pel* origRowStart = origOrigin + (y+y1)*origStride + x; + const Pel* bufferRowStart = buffOrigin + (y+y1+dy)*buffStride + (x+dx); + for (int x1 = 0; x1 < bs; x1 += 2) + { + int diff = origRowStart[x1] - bufferRowStart[x1]; + error += diff * diff; + diff = origRowStart[x1 + 1] - bufferRowStart[x1 + 1]; + error += diff * diff; + } + if (error > besterror) + { + return error; + } + } + } + else + { + const int *xFilter = m_interpolationFilter[dx & 0xF]; + const int *yFilter = m_interpolationFilter[dy & 0xF]; + int tempArray[64 + 8][64]; + + int sum, base; + for (int y1 = 1; y1 < bs + 7; y1++) + { + const int yOffset = y + y1 + (dy >> 4) - 3; + const Pel *sourceRow = buffOrigin + (yOffset)*buffStride + 0; + for (int x1 = 0; x1 < bs; x1++) + { + sum = 0; + base = x + x1 + (dx >> 4) - 3; + const Pel *rowStart = sourceRow + base; + + sum += xFilter[1] * rowStart[1]; + sum += xFilter[2] * rowStart[2]; + sum += xFilter[3] * rowStart[3]; + sum += xFilter[4] * rowStart[4]; + sum += xFilter[5] * rowStart[5]; + sum += xFilter[6] * rowStart[6]; + + tempArray[y1][x1] = sum; + } + } + + const Pel maxSampleValue = (1<<m_internalBitDepth[CHANNEL_TYPE_LUMA])-1; + for (int y1 = 0; y1 < bs; y1++) + { + const Pel *origRow = origOrigin + (y+y1)*origStride + 0; + for (int x1 = 0; x1 < bs; x1++) + { + sum = 0; + sum += yFilter[1] * tempArray[y1 + 1][x1]; + sum += yFilter[2] * tempArray[y1 + 2][x1]; + sum += yFilter[3] * tempArray[y1 + 3][x1]; + sum += yFilter[4] * tempArray[y1 + 4][x1]; + sum += yFilter[5] * tempArray[y1 + 5][x1]; + sum += yFilter[6] * tempArray[y1 + 6][x1]; + + sum = (sum + (1 << 11)) >> 12; + sum = sum < 0 ? 0 : (sum > maxSampleValue ? maxSampleValue : sum); + + error += (sum - origRow[x + x1]) * (sum - origRow[x + x1]); + } + if (error > besterror) + { + return error; + } + } + } + return error; +} + +void EncTemporalFilter::motionEstimationLuma(Array2D<MotionVector> &mvs, const PelStorage &orig, const PelStorage &buffer, const int blockSize, + const Array2D<MotionVector> *previous, const int factor, const bool doubleRes) const +{ + int range = 5; + const int stepSize = blockSize; + + const int origWidth = orig.Y().width; + const int origHeight = orig.Y().height; + + for (int blockY = 0; blockY + blockSize < origHeight; blockY += stepSize) + { + for (int blockX = 0; blockX + blockSize < origWidth; blockX += stepSize) + { + MotionVector best; + + if (previous == NULL) + { + range = 8; + } + else + { + for (int py = -2; py <= 2; py++) + { + int testy = blockY / (2 * blockSize) + py; + for (int px = -2; px <= 2; px++) + { + int testx = blockX / (2 * blockSize) + px; + if ((testx >= 0) && (testx < origWidth / (2 * blockSize)) && (testy >= 0) && (testy < origHeight / (2 * blockSize))) + { + MotionVector old = previous->get(testx, testy); + int error = motionErrorLuma(orig, buffer, blockX, blockY, old.x * factor, old.y * factor, blockSize, best.error); + if (error < best.error) + { + best.set(old.x * factor, old.y * factor, error); + } + } + } + } + } + MotionVector prevBest = best; + for (int y2 = prevBest.y / m_motionVectorFactor - range; y2 <= prevBest.y / m_motionVectorFactor + range; y2++) + { + for (int x2 = prevBest.x / m_motionVectorFactor - range; x2 <= prevBest.x / m_motionVectorFactor + range; x2++) + { + int error = motionErrorLuma(orig, buffer, blockX, blockY, x2 * m_motionVectorFactor, y2 * m_motionVectorFactor, blockSize, best.error); + if (error < best.error) + { + best.set(x2 * m_motionVectorFactor, y2 * m_motionVectorFactor, error); + } + } + } + if (doubleRes) + { // merge into one loop, probably with precision array (here [12, 3] or maybe [4, 1]) with setable number of iterations + prevBest = best; + int doubleRange = 3 * 4; + for (int y2 = prevBest.y - doubleRange; y2 <= prevBest.y + doubleRange; y2 += 4) + { + for (int x2 = prevBest.x - doubleRange; x2 <= prevBest.x + doubleRange; x2 += 4) + { + int error = motionErrorLuma(orig, buffer, blockX, blockY, x2, y2, blockSize, best.error); + if (error < best.error) + { + best.set(x2, y2, error); + } + + } + } + + prevBest = best; + doubleRange = 3; + for (int y2 = prevBest.y - doubleRange; y2 <= prevBest.y + doubleRange; y2++) + { + for (int x2 = prevBest.x - doubleRange; x2 <= prevBest.x + doubleRange; x2++) + { + int error = motionErrorLuma(orig, buffer, blockX, blockY, x2, y2, blockSize, best.error); + if (error < best.error) + { + best.set(x2, y2, error); + } + + } + } + + } + mvs.get(blockX / stepSize, blockY / stepSize) = best; + } + } +} + +void EncTemporalFilter::motionEstimation(Array2D<MotionVector> &mv, const PelStorage &orgPic, const PelStorage &buffer, const PelStorage &origSubsampled2, const PelStorage &origSubsampled4) const +{ + const int width = m_sourceWidth; + const int height = m_sourceHeight; + Array2D<MotionVector> mv_0(width / 16, height / 16); + Array2D<MotionVector> mv_1(width / 16, height / 16); + Array2D<MotionVector> mv_2(width / 16, height / 16); + + PelStorage bufferSub2; + PelStorage bufferSub4; + + subsampleLuma(buffer, bufferSub2); + subsampleLuma(bufferSub2, bufferSub4); + + motionEstimationLuma(mv_0, origSubsampled4, bufferSub4, 16); + motionEstimationLuma(mv_1, origSubsampled2, bufferSub2, 16, &mv_0, 2); + motionEstimationLuma(mv_2, orgPic, buffer, 16, &mv_1, 2); + + motionEstimationLuma(mv, orgPic, buffer, 8, &mv_2, 1, true); +} + +void EncTemporalFilter::applyMotion(const Array2D<MotionVector> &mvs, const PelStorage &input, PelStorage &output) const +{ + static const int lumaBlockSize=8; + + for(int c=0; c< getNumberValidComponents(m_chromaFormatIDC); c++) + { + const ComponentID compID=(ComponentID)c; + const int csx=getComponentScaleX(compID, m_chromaFormatIDC); + const int csy=getComponentScaleY(compID, m_chromaFormatIDC); + const int blockSizeX = lumaBlockSize>>csx; + const int blockSizeY = lumaBlockSize>>csy; + const int height = input.bufs[c].height; + const int width = input.bufs[c].width; + + const Pel maxValue = (1<<m_internalBitDepth[toChannelType(compID)])-1; + + const Pel *srcImage = input.bufs[c].buf; + const int srcStride = input.bufs[c].stride; + + Pel *dstImage = output.bufs[c].buf; + int dstStride = output.bufs[c].stride; + + for (int y = 0, blockNumY = 0; y + blockSizeY <= height; y += blockSizeY, blockNumY++) + { + for (int x = 0, blockNumX = 0; x + blockSizeX <= width; x += blockSizeX, blockNumX++) + { + const MotionVector &mv = mvs.get(blockNumX,blockNumY); + const int dx = mv.x >> csx ; + const int dy = mv.y >> csy ; + const int xInt = mv.x >> (4+csx) ; + const int yInt = mv.y >> (4+csy) ; + + const int *xFilter = m_interpolationFilter[dx & 0xf]; + const int *yFilter = m_interpolationFilter[dy & 0xf]; // will add 6 bit. + const int numFilterTaps=7; + const int centreTapOffset=3; + + int tempArray[lumaBlockSize + numFilterTaps][lumaBlockSize]; + + for (int by = 1; by < blockSizeY + numFilterTaps; by++) + { + const int yOffset = y + by + yInt - centreTapOffset; + const Pel *sourceRow = srcImage+yOffset*srcStride; + for (int bx = 0; bx < blockSizeX; bx++) + { + int base = x + bx + xInt - centreTapOffset; + const Pel *rowStart = sourceRow + base; + + int sum = 0; + sum += xFilter[1] * rowStart[1]; + sum += xFilter[2] * rowStart[2]; + sum += xFilter[3] * rowStart[3]; + sum += xFilter[4] * rowStart[4]; + sum += xFilter[5] * rowStart[5]; + sum += xFilter[6] * rowStart[6]; + + tempArray[by][bx] = sum; + } + } + + Pel *dstRow = dstImage+y*dstStride; + for (int by = 0; by < blockSizeY; by++, dstRow+=dstStride) + { + Pel *dstPel=dstRow+x; + for (int bx = 0; bx < blockSizeX; bx++, dstPel++) + { + int sum = 0; + + sum += yFilter[1] * tempArray[by + 1][bx]; + sum += yFilter[2] * tempArray[by + 2][bx]; + sum += yFilter[3] * tempArray[by + 3][bx]; + sum += yFilter[4] * tempArray[by + 4][bx]; + sum += yFilter[5] * tempArray[by + 5][bx]; + sum += yFilter[6] * tempArray[by + 6][bx]; + + sum = (sum + (1 << 11)) >> 12; + sum = sum < 0 ? 0 : (sum > maxValue ? maxValue : sum); + *dstPel = sum; + } + } + } + } + } +} + +void EncTemporalFilter::bilateralFilter(const PelStorage &orgPic, + const std::deque<TemporalFilterSourcePicInfo> &srcFrameInfo, + PelStorage &newOrgPic, + double overallStrength) const +{ + const int numRefs = int(srcFrameInfo.size()); + std::vector<PelStorage> correctedPics(numRefs); + for (int i = 0; i < numRefs; i++) + { + correctedPics[i].create(m_chromaFormatIDC, m_area, 0, m_padding); + applyMotion(srcFrameInfo[i].mvs, srcFrameInfo[i].picBuffer, correctedPics[i]); + } + + int refStrengthRow = 2; + if (numRefs == m_range*2) + { + refStrengthRow = 0; + } + else if (numRefs == m_range) + { + refStrengthRow = 1; + } + + const double lumaSigmaSq = (m_QP - m_sigmaZeroPoint) * (m_QP - m_sigmaZeroPoint) * m_sigmaMultiplier; + const double chromaSigmaSq = 30 * 30; + + for(int c=0; c< getNumberValidComponents(m_chromaFormatIDC); c++) + { + const ComponentID compID=(ComponentID)c; + const int height = orgPic.bufs[c].height; + const int width = orgPic.bufs[c].width; + const Pel *srcPelRow = orgPic.bufs[c].buf; + const int srcStride = orgPic.bufs[c].stride; + Pel *dstPelRow = newOrgPic.bufs[c].buf; + const int dstStride = newOrgPic.bufs[c].stride; + const double sigmaSq = isChroma(compID)? chromaSigmaSq : lumaSigmaSq; + const double weightScaling = overallStrength * (isChroma(compID) ? m_chromaFactor : 0.4); + const Pel maxSampleValue = (1<<m_internalBitDepth[toChannelType(compID)])-1; + const double bitDepthDiffWeighting=1024.0 / (maxSampleValue+1); + + for (int y = 0; y < height; y++, srcPelRow+=srcStride, dstPelRow+=dstStride) + { + const Pel *srcPel=srcPelRow; + Pel *dstPel=dstPelRow; + for (int x = 0; x < width; x++, srcPel++, dstPel++) + { + const int orgVal = (int) *srcPel; + double temporalWeightSum = 1.0; + double newVal = (double) orgVal; + for (int i = 0; i < numRefs; i++) + { + const Pel *pCorrectedPelPtr=correctedPics[i].bufs[c].buf+(y*correctedPics[i].bufs[c].stride+x); + const int refVal = (int) *pCorrectedPelPtr; + double diff = (double)(refVal - orgVal); + diff *= bitDepthDiffWeighting; + double diffSq = diff * diff; + const int index = std::min(1, std::abs(srcFrameInfo[i].origOffset) - 1); + const double weight = weightScaling * m_refStrengths[refStrengthRow][index] * exp(-diffSq / (2 * sigmaSq)); + newVal += weight * refVal; + temporalWeightSum += weight; + } + newVal /= temporalWeightSum; + Pel sampleVal = (Pel)round(newVal); + sampleVal=(sampleVal<0?0 : (sampleVal>maxSampleValue ? maxSampleValue : sampleVal)); + *dstPel = sampleVal; + } + } + } +} + +//! \} + diff --git a/source/Lib/EncoderLib/EncTemporalFilter.h b/source/Lib/EncoderLib/EncTemporalFilter.h new file mode 100644 index 0000000000000000000000000000000000000000..b46b265dc783b548290155278be0aa3708ab5f41 --- /dev/null +++ b/source/Lib/EncoderLib/EncTemporalFilter.h @@ -0,0 +1,165 @@ +/* The copyright in this software is being made available under the BSD +* License, included below. This software may be subject to other third party +* and contributor rights, including patent rights, and no such rights are +* granted under this license. +* +* Copyright (c) 2010-2020, ITU/ISO/IEC +* All rights reserved. +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are met: +* +* * Redistributions of source code must retain the above copyright notice, +* this list of conditions and the following disclaimer. +* * Redistributions in binary form must reproduce the above copyright notice, +* this list of conditions and the following disclaimer in the documentation +* and/or other materials provided with the distribution. +* * Neither the name of the ITU/ISO/IEC nor the names of its contributors may +* be used to endorse or promote products derived from this software without +* specific prior written permission. +* +* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS +* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +* THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/** \file EncTemporalFilter.h +\brief EncTemporalFilter class (header) +*/ + +#ifndef __TEMPORAL_FILTER__ +#define __TEMPORAL_FILTER__ +#include "EncLib.h" +#include "CommonLib/Buffer.h" +#include <sstream> +#include <map> +#include <deque> + + +//! \ingroup EncoderLib +//! \{ + +struct MotionVector +{ + int x, y; + int error; + MotionVector() : x(0), y(0), error(INT_LEAST32_MAX) {} + void set(int vectorX, int vectorY, int errorValue) { x = vectorX; y = vectorY; error = errorValue; } +}; + +template <class T> +struct Array2D +{ +private: + int m_width, m_height; + std::vector< T > v; +public: + Array2D() : m_width(0), m_height(0), v() { } + Array2D(int width, int height, const T& value=T()) : m_width(0), m_height(0), v() { allocate(width, height, value); } + + void allocate(int width, int height, const T& value=T()) + { + m_width=width; + m_height=height; + v.resize(std::size_t(m_width*m_height), value); + } + + T& get(int x, int y) + { + assert(x<m_width && y<m_height); + return v[y*m_width+x]; + } + + const T& get(int x, int y) const + { + assert(x<m_width && y<m_height); + return v[y*m_width+x]; + } +}; + +struct TemporalFilterSourcePicInfo +{ + TemporalFilterSourcePicInfo() : picBuffer(), mvs(), origOffset(0) { } + PelStorage picBuffer; + Array2D<MotionVector> mvs; + int origOffset; +}; + +// ==================================================================================================================== +// Class definition +// ==================================================================================================================== + +class EncTemporalFilter +{ +public: + EncTemporalFilter(); + ~EncTemporalFilter() {} + + void init(const int frameSkip, + const int inputBitDepth[MAX_NUM_CHANNEL_TYPE], + const int msbExtendedBitDepth[MAX_NUM_CHANNEL_TYPE], + const int internalBitDepth[MAX_NUM_CHANNEL_TYPE], + const int width, + const int height, + const int *pad, + const bool rec709, + const std::string &filename, + const ChromaFormat inputChroma, + const InputColourSpaceConversion colorSpaceConv, + const int qp, + const std::map<int, double> &temporalFilterStrengths, + const bool gopBasedTemporalFilterFutureReference); + + bool filter(PelStorage *orgPic, int frame); + +private: + // Private static member variables + static const int m_range; + static const double m_chromaFactor; + static const double m_sigmaMultiplier; + static const double m_sigmaZeroPoint; + static const int m_motionVectorFactor; + static const int m_padding; + static const int m_interpolationFilter[16][8]; + static const double m_refStrengths[3][2]; + + // Private member variables + int m_FrameSkip; + std::string m_inputFileName; + int m_inputBitDepth[MAX_NUM_CHANNEL_TYPE]; + int m_MSBExtendedBitDepth[MAX_NUM_CHANNEL_TYPE]; + int m_internalBitDepth[MAX_NUM_CHANNEL_TYPE]; + ChromaFormat m_chromaFormatIDC; + int m_sourceWidth; + int m_sourceHeight; + int m_QP; + std::map<int, double> m_temporalFilterStrengths; + int m_pad[2]; + bool m_clipInputVideoToRec709Range; + InputColourSpaceConversion m_inputColourSpaceConvert; + Area m_area; + bool m_gopBasedTemporalFilterFutureReference; + + // Private functions + void subsampleLuma(const PelStorage &input, PelStorage &output, const int factor = 2) const; + int motionErrorLuma(const PelStorage &orig, const PelStorage &buffer, const int x, const int y, int dx, int dy, const int bs, const int besterror) const; + void motionEstimationLuma(Array2D<MotionVector> &mvs, const PelStorage &orig, const PelStorage &buffer, const int bs, + const Array2D<MotionVector> *previous=0, const int factor = 1, const bool doubleRes = false) const; + void motionEstimation(Array2D<MotionVector> &mvs, const PelStorage &orgPic, const PelStorage &buffer, const PelStorage &origSubsampled2, const PelStorage &origSubsampled4) const; + + void bilateralFilter(const PelStorage &orgPic, const std::deque<TemporalFilterSourcePicInfo> &srcFrameInfo, PelStorage &newOrgPic, double overallStrength) const; + void applyMotion(const Array2D<MotionVector> &mvs, const PelStorage &input, PelStorage &output) const; +}; // END CLASS DEFINITION EncTemporalFilter + + //! \} + + +#endif // __TEMPORAL_FILTER__ diff --git a/source/Lib/EncoderLib/InterSearch.cpp b/source/Lib/EncoderLib/InterSearch.cpp index dd85f3c41916f7b352698f6d45d73c6c28279c28..2a0f143d8303546ea456cb88082bb046efc50c11 100644 --- a/source/Lib/EncoderLib/InterSearch.cpp +++ b/source/Lib/EncoderLib/InterSearch.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -112,8 +112,12 @@ InterSearch::InterSearch() m_affMVList = nullptr; m_affMVListSize = 0; m_affMVListIdx = 0; + m_uniMvList = nullptr; + m_uniMvListSize = 0; + m_uniMvListIdx = 0; m_histBestSbt = MAX_UCHAR; m_histBestMtsIdx = MAX_UCHAR; + } @@ -156,6 +160,13 @@ void InterSearch::destroy() } m_affMVListIdx = 0; m_affMVListSize = 0; + if (m_uniMvList) + { + delete[] m_uniMvList; + m_uniMvList = nullptr; + } + m_uniMvListIdx = 0; + m_uniMvListSize = 0; m_isInitialized = false; } @@ -198,7 +209,11 @@ void InterSearch::init( EncCfg* pcEncCfg, { CHECK(m_isInitialized, "Already initialized"); m_numBVs = 0; - m_numBV16s = 0; + for (int i = 0; i < IBC_NUM_CANDIDATES; i++) + { + m_defaultCachedBvs.m_bvCands[i].setZero(); + } + m_defaultCachedBvs.currCnt = 0; m_pcEncCfg = pcEncCfg; m_pcTrQuant = pcTrQuant; m_iSearchRange = iSearchRange; @@ -234,7 +249,7 @@ void InterSearch::init( EncCfg* pcEncCfg, } const ChromaFormat cform = pcEncCfg->getChromaFormatIdc(); - InterPrediction::init( pcRdCost, cform ); + InterPrediction::init( pcRdCost, cform, maxCUHeight ); for( uint32_t i = 0; i < NUM_REF_PIC_LIST_01; i++ ) { @@ -251,6 +266,13 @@ void InterSearch::init( EncCfg* pcEncCfg, m_affMVList = new AffineMVInfo[m_affMVListMaxSize]; m_affMVListIdx = 0; m_affMVListSize = 0; + m_uniMvListMaxSize = 15; + if (!m_uniMvList) + { + m_uniMvList = new BlkUniMvInfo[m_uniMvListMaxSize]; + } + m_uniMvListIdx = 0; + m_uniMvListSize = 0; m_isInitialized = true; } @@ -276,9 +298,9 @@ void InterSearch::resetSavedAffineMotion() m_affineMotion.affine6ParaAvail = false; } -void InterSearch::storeAffineMotion( Mv acAffineMv[2][3], int16_t affineRefIdx[2], EAffineModel affineType, int gbiIdx ) +void InterSearch::storeAffineMotion( Mv acAffineMv[2][3], int16_t affineRefIdx[2], EAffineModel affineType, int bcwIdx ) { - if ( ( gbiIdx == GBI_DEFAULT || !m_affineMotion.affine6ParaAvail ) && affineType == AFFINEMODEL_6PARAM ) + if ( ( bcwIdx == BCW_DEFAULT || !m_affineMotion.affine6ParaAvail ) && affineType == AFFINEMODEL_6PARAM ) { for ( int i = 0; i < 2; i++ ) { @@ -291,7 +313,7 @@ void InterSearch::storeAffineMotion( Mv acAffineMv[2][3], int16_t affineRefIdx[2 m_affineMotion.affine6ParaAvail = true; } - if ( ( gbiIdx == GBI_DEFAULT || !m_affineMotion.affine4ParaAvail ) && affineType == AFFINEMODEL_4PARAM ) + if ( ( bcwIdx == BCW_DEFAULT || !m_affineMotion.affine4ParaAvail ) && affineType == AFFINEMODEL_4PARAM ) { for ( int i = 0; i < 2; i++ ) { @@ -747,7 +769,7 @@ Distortion InterSearch::xGetInterPredictionError( PredictionUnit& pu, PelUnitBuf DistParam cDistParam; cDistParam.applyWeight = false; - m_pcRdCost->setDistParam( cDistParam, origBuf.Y(), predBuf.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, m_pcEncCfg->getUseHADME() && !pu.cu->transQuantBypass ); + m_pcRdCost->setDistParam(cDistParam, origBuf.Y(), predBuf.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, m_pcEncCfg->getUseHADME() && !pu.cu->slice->getDisableSATDForRD()); return (Distortion)cDistParam.distFunc( cDistParam ); } @@ -799,12 +821,17 @@ int InterSearch::xIBCSearchMVChromaRefine(PredictionUnit& pu, int refStride, orgStride; int width, height; - int picWidth = pu.cs->slice->getSPS()->getPicWidthInLumaSamples(); - int picHeight = pu.cs->slice->getSPS()->getPicHeightInLumaSamples(); + int picWidth = pu.cs->slice->getPPS()->getPicWidthInLumaSamples(); + int picHeight = pu.cs->slice->getPPS()->getPicHeightInLumaSamples(); UnitArea allCompBlocks(pu.chromaFormat, (Area)pu.block(COMPONENT_Y)); for (int cand = 0; cand < CHROMA_REFINEMENT_CANDIDATES; cand++) { + if (sadBestCand[cand] == std::numeric_limits<Distortion>::max()) + { + continue; + } + if ((!cMVCand[cand].getHor()) && (!cMVCand[cand].getVer())) continue; @@ -869,10 +896,14 @@ int InterSearch::xIBCSearchMVChromaRefine(PredictionUnit& pu, return bestCandIdx; } -static unsigned int xMergeCandLists(Mv *dst, unsigned int dn, Mv *src, unsigned int sn) +static unsigned int xMergeCandLists(Mv *dst, unsigned int dn, unsigned int dstTotalLength, Mv *src, unsigned int sn) { - for (unsigned int cand = 0; cand < sn && dn<IBC_NUM_CANDIDATES; cand++) + for (unsigned int cand = 0; cand < sn && dn < dstTotalLength; cand++) { + if (src[cand] == Mv()) + { + continue; + } bool found = false; for (int j = 0; j<dn; j++) { @@ -931,9 +962,8 @@ void InterSearch::xIntraPatternSearch(PredictionUnit& pu, IntTZSearchStruct& cS m_cDistParam.useMR = false; m_pcRdCost->setDistParam(m_cDistParam, *cStruct.pcPatternKey, cStruct.piRefY, cStruct.iRefStride, m_lumaClpRng.bd, COMPONENT_Y, cStruct.subShiftMode); - - const int picWidth = pu.cs->slice->getSPS()->getPicWidthInLumaSamples(); - const int picHeight = pu.cs->slice->getSPS()->getPicHeightInLumaSamples(); + const int picWidth = pu.cs->slice->getPPS()->getPicWidthInLumaSamples(); + const int picHeight = pu.cs->slice->getPPS()->getPicHeightInLumaSamples(); { @@ -942,20 +972,13 @@ void InterSearch::xIntraPatternSearch(PredictionUnit& pu, IntTZSearchStruct& cS Distortion tempSadBest = 0; int srLeft = srchRngHorLeft, srRight = srchRngHorRight, srTop = srchRngVerTop, srBottom = srchRngVerBottom; + m_numBVs = 0; + m_numBVs = xMergeCandLists(m_acBVs, m_numBVs, (2 * IBC_NUM_CANDIDATES), m_defaultCachedBvs.m_bvCands, m_defaultCachedBvs.currCnt); - if (roiWidth>8 || roiHeight>8) - { - m_numBVs = 0; - } - else if (roiWidth + roiHeight == 16) - { - m_numBVs = m_numBV16s; - } - - Mv cMvPredEncOnly[16]; + Mv cMvPredEncOnly[IBC_NUM_CANDIDATES]; int nbPreds = 0; PU::getIbcMVPsEncOnly(pu, cMvPredEncOnly, nbPreds); - m_numBVs = xMergeCandLists(m_acBVs, m_numBVs, cMvPredEncOnly, nbPreds); + m_numBVs = xMergeCandLists(m_acBVs, m_numBVs, (2 * IBC_NUM_CANDIDATES), cMvPredEncOnly, nbPreds); for (unsigned int cand = 0; cand < m_numBVs; cand++) { @@ -966,7 +989,7 @@ void InterSearch::xIntraPatternSearch(PredictionUnit& pu, IntTZSearchStruct& cS && !((yPred < srTop) || (yPred > srBottom)) && !((xPred < srLeft) || (xPred > srRight))) { - bool validCand = PU::isBlockVectorValid(pu, cuPelX, cuPelY, roiWidth, roiHeight, picWidth, picHeight, 0, 0, xPred, yPred, lcuWidth); + bool validCand = searchBv(pu, cuPelX, cuPelY, roiWidth, roiHeight, picWidth, picHeight, xPred, yPred, lcuWidth); if (validCand) { @@ -987,7 +1010,7 @@ void InterSearch::xIntraPatternSearch(PredictionUnit& pu, IntTZSearchStruct& cS const int boundY = (0 - roiHeight - puPelOffsetY); for (int y = std::max(srchRngVerTop, 0 - cuPelY); y <= boundY; ++y) { - if (!PU::isBlockVectorValid(pu, cuPelX, cuPelY, roiWidth, roiHeight, picWidth, picHeight, 0, 0, 0, y, lcuWidth)) + if (!searchBv(pu, cuPelX, cuPelY, roiWidth, roiHeight, picWidth, picHeight, 0, y, lcuWidth)) { continue; } @@ -1012,7 +1035,7 @@ void InterSearch::xIntraPatternSearch(PredictionUnit& pu, IntTZSearchStruct& cS const int boundX = std::max(srchRngHorLeft, -cuPelX); for (int x = 0 - roiWidth - puPelOffsetX; x >= boundX; --x) { - if (!PU::isBlockVectorValid(pu, cuPelX, cuPelY, roiWidth, roiHeight, picWidth, picHeight, 0, 0, x, 0, lcuWidth)) + if (!searchBv(pu, cuPelX, cuPelY, roiWidth, roiHeight, picWidth, picHeight, x, 0, lcuWidth)) { continue; } @@ -1063,7 +1086,7 @@ void InterSearch::xIntraPatternSearch(PredictionUnit& pu, IntTZSearchStruct& cS if ((x == 0) || ((int)(cuPelX + x + roiWidth) >= picWidth)) continue; - if (!PU::isBlockVectorValid(pu, cuPelX, cuPelY, roiWidth, roiHeight, picWidth, picHeight, 0, 0, x, y, lcuWidth)) + if (!searchBv(pu, cuPelX, cuPelY, roiWidth, roiHeight, picWidth, picHeight, x, y, lcuWidth)) { continue; } @@ -1103,7 +1126,7 @@ void InterSearch::xIntraPatternSearch(PredictionUnit& pu, IntTZSearchStruct& cS if ((x == 0) || ((int)(cuPelX + x + roiWidth) >= picWidth)) continue; - if (!PU::isBlockVectorValid(pu, cuPelX, cuPelY, roiWidth, roiHeight, picWidth, picHeight, 0, 0, x, y, lcuWidth)) + if (!searchBv(pu, cuPelX, cuPelY, roiWidth, roiHeight, picWidth, picHeight, x, y, lcuWidth)) { continue; } @@ -1160,7 +1183,7 @@ void InterSearch::xIntraPatternSearch(PredictionUnit& pu, IntTZSearchStruct& cS if ((x == 0) || ((int)(cuPelX + x + roiWidth) >= picWidth)) continue; - if (!PU::isBlockVectorValid(pu, cuPelX, cuPelY, roiWidth, roiHeight, picWidth, picHeight, 0, 0, x, y, lcuWidth)) + if (!searchBv(pu, cuPelX, cuPelY, roiWidth, roiHeight, picWidth, picHeight, x, y, lcuWidth)) { continue; } @@ -1196,14 +1219,20 @@ void InterSearch::xIntraPatternSearch(PredictionUnit& pu, IntTZSearchStruct& cS ruiCost = sadBest; end: - if (roiWidth + roiHeight > 8) - { - m_numBVs = xMergeCandLists(m_acBVs, m_numBVs, cMVCand, CHROMA_REFINEMENT_CANDIDATES); + m_numBVs = 0; + m_numBVs = xMergeCandLists(m_acBVs, m_numBVs, (2 * IBC_NUM_CANDIDATES), m_defaultCachedBvs.m_bvCands, m_defaultCachedBvs.currCnt); - if (roiWidth + roiHeight == 32) + m_defaultCachedBvs.currCnt = 0; + m_defaultCachedBvs.currCnt = xMergeCandLists(m_defaultCachedBvs.m_bvCands, m_defaultCachedBvs.currCnt, IBC_NUM_CANDIDATES, cMVCand, CHROMA_REFINEMENT_CANDIDATES); + m_defaultCachedBvs.currCnt = xMergeCandLists(m_defaultCachedBvs.m_bvCands, m_defaultCachedBvs.currCnt, IBC_NUM_CANDIDATES, m_acBVs, m_numBVs); + + for (unsigned int cand = 0; cand < CHROMA_REFINEMENT_CANDIDATES; cand++) + { + if (cMVCand[cand].getHor() == 0 && cMVCand[cand].getVer() == 0) { - m_numBV16s = m_numBVs; + continue; } + m_ctuRecord[pu.lumaPos()][pu.lumaSize()].bvRecord[cMVCand[cand]] = sadBestCand[cand]; } return; @@ -1218,38 +1247,79 @@ void InterSearch::xIBCEstimation(PredictionUnit& pu, PelUnitBuf& origBuf, Distortion &ruiCost, const int localSearchRangeX, const int localSearchRangeY ) { + const int iPicWidth = pu.cs->slice->getPPS()->getPicWidthInLumaSamples(); + const int iPicHeight = pu.cs->slice->getPPS()->getPicHeightInLumaSamples(); + const unsigned int lcuWidth = pu.cs->slice->getSPS()->getMaxCUWidth(); + const int cuPelX = pu.Y().x; + const int cuPelY = pu.Y().y; + int iRoiWidth = pu.lwidth(); + int iRoiHeight = pu.lheight(); + + PelUnitBuf* pBuf = &origBuf; + + // Search key pattern initialization + CPelBuf tmpPattern = pBuf->Y(); + CPelBuf* pcPatternKey = &tmpPattern; + PelBuf tmpOrgLuma; + + if ((pu.cs->picHeader->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag())) + { + const CompArea &area = pu.blocks[COMPONENT_Y]; + CompArea tmpArea(COMPONENT_Y, area.chromaFormat, Position(0, 0), area.size()); + tmpOrgLuma = m_tmpStorageLCU.getBuf(tmpArea); + tmpOrgLuma.copyFrom(tmpPattern); + tmpOrgLuma.rspSignal(m_pcReshape->getFwdLUT()); + pcPatternKey = (CPelBuf*)&tmpOrgLuma; + } + + m_lumaClpRng = pu.cs->slice->clpRng(COMPONENT_Y); + Picture* refPic = pu.cu->slice->getPic(); + const CPelBuf refBuf = refPic->getRecoBuf(pu.blocks[COMPONENT_Y]); + + IntTZSearchStruct cStruct; + cStruct.pcPatternKey = pcPatternKey; + cStruct.iRefStride = refBuf.stride; + cStruct.piRefY = refBuf.buf; + CHECK(pu.cu->imv == IMV_HPEL, "IF_IBC"); + cStruct.imvShift = pu.cu->imv << 1; + cStruct.subShiftMode = 0; // used by intra pattern search function + + // disable weighted prediction + setWpScalingDistParam(-1, REF_PIC_LIST_X, pu.cs->slice); + + m_pcRdCost->getMotionCost(0); + m_pcRdCost->setPredictors(pcMvPred); + m_pcRdCost->setCostScale(0); + + m_cDistParam.useMR = false; + m_pcRdCost->setDistParam(m_cDistParam, *cStruct.pcPatternKey, cStruct.piRefY, cStruct.iRefStride, m_lumaClpRng.bd, COMPONENT_Y, cStruct.subShiftMode); bool buffered = false; if (m_pcEncCfg->getIBCFastMethod() & IBC_FAST_METHOD_BUFFERBV) { ruiCost = MAX_UINT; - const int iPicWidth = pu.cs->slice->getSPS()->getPicWidthInLumaSamples(); - const int iPicHeight = pu.cs->slice->getSPS()->getPicHeightInLumaSamples(); - const int cuPelX = pu.Y().x; - const int cuPelY = pu.Y().y; - - int iRoiWidth = pu.lwidth(); - int iRoiHeight = pu.lheight(); std::unordered_map<Mv, Distortion>& history = m_ctuRecord[pu.lumaPos()][pu.lumaSize()].bvRecord; - const unsigned int lcuWidth = pu.cs->slice->getSPS()->getMaxCUWidth(); for (std::unordered_map<Mv, Distortion>::iterator p = history.begin(); p != history.end(); p++) { const Mv& bv = p->first; int xBv = bv.hor; int yBv = bv.ver; - if (PU::isBlockVectorValid(pu, cuPelX, cuPelY, iRoiWidth, iRoiHeight, iPicWidth, iPicHeight, 0, 0, xBv, yBv, lcuWidth)) - { - if (p->second < ruiCost) + if (searchBv(pu, cuPelX, cuPelY, iRoiWidth, iRoiHeight, iPicWidth, iPicHeight, xBv, yBv, lcuWidth)) + { + buffered = true; + Distortion sad = m_pcRdCost->getBvCostMultiplePreds(xBv, yBv, pu.cs->sps->getAMVREnabledFlag()); + m_cDistParam.cur.buf = cStruct.piRefY + cStruct.iRefStride * yBv + xBv; + sad += m_cDistParam.distFunc(m_cDistParam); + if (sad < ruiCost) { rcMv = bv; - ruiCost = p->second; - buffered = true; + ruiCost = sad; } - else if (p->second == ruiCost) + else if (sad == ruiCost) { // stabilise the search through the unordered list if (bv.hor < rcMv.getHor() - || (bv.hor == rcMv.getHor() && bv.ver < rcMv.getVer())) + || (bv.hor == rcMv.getHor() && bv.ver < rcMv.getVer())) { // update the vector. rcMv = bv; @@ -1257,56 +1327,54 @@ void InterSearch::xIBCEstimation(PredictionUnit& pu, PelUnitBuf& origBuf, } } } - } - - if (!buffered) - { - Mv cMvSrchRngLT; - Mv cMvSrchRngRB; - //cMvSrchRngLT.highPrec = false; - //cMvSrchRngRB.highPrec = false; + if (buffered) + { + Mv cMvPredEncOnly[IBC_NUM_CANDIDATES]; + int nbPreds = 0; + PU::getIbcMVPsEncOnly(pu, cMvPredEncOnly, nbPreds); - PelUnitBuf* pBuf = &origBuf; + for (unsigned int cand = 0; cand < nbPreds; cand++) + { + int xPred = cMvPredEncOnly[cand].getHor(); + int yPred = cMvPredEncOnly[cand].getVer(); - // Search key pattern initialization - CPelBuf tmpPattern = pBuf->Y(); - CPelBuf* pcPatternKey = &tmpPattern; + if (searchBv(pu, cuPelX, cuPelY, iRoiWidth, iRoiHeight, iPicWidth, iPicHeight, xPred, yPred, lcuWidth)) + { + Distortion sad = m_pcRdCost->getBvCostMultiplePreds(xPred, yPred, pu.cs->sps->getAMVREnabledFlag()); + m_cDistParam.cur.buf = cStruct.piRefY + cStruct.iRefStride * yPred + xPred; + sad += m_cDistParam.distFunc(m_cDistParam); + if (sad < ruiCost) + { + rcMv.set(xPred, yPred); + ruiCost = sad; + } + else if (sad == ruiCost) + { + // stabilise the search through the unordered list + if (xPred < rcMv.getHor() + || (xPred == rcMv.getHor() && yPred < rcMv.getVer())) + { + // update the vector. + rcMv.set(xPred, yPred); + } + } - if ((pu.cs->slice->getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag())) - { - const CompArea &area = pu.blocks[COMPONENT_Y]; - CompArea tmpArea(COMPONENT_Y, area.chromaFormat, Position(0, 0), area.size()); - PelBuf tmpOrgLuma = m_tmpStorageLCU.getBuf(tmpArea); - tmpOrgLuma.copyFrom(tmpPattern); - tmpOrgLuma.rspSignal(m_pcReshape->getFwdLUT()); - pcPatternKey = (CPelBuf*)&tmpOrgLuma; + m_ctuRecord[pu.lumaPos()][pu.lumaSize()].bvRecord[Mv(xPred, yPred)] = sad; + } + } } + } - m_lumaClpRng = pu.cs->slice->clpRng(COMPONENT_Y); - Picture* refPic = pu.cu->slice->getPic(); - - const CPelBuf refBuf = refPic->getRecoBuf(pu.blocks[COMPONENT_Y]); - - IntTZSearchStruct cStruct; - cStruct.pcPatternKey = pcPatternKey; - cStruct.iRefStride = refBuf.stride; - cStruct.piRefY = refBuf.buf; - cStruct.imvShift = pu.cu->imv << 1; - cStruct.subShiftMode = 0; // used by intra pattern search function + if (!buffered) + { + Mv cMvSrchRngLT; + Mv cMvSrchRngRB; - // assume that intra BV is integer-pel precision + // assume that intra BV is integer-pel precision xSetIntraSearchRange(pu, pu.lwidth(), pu.lheight(), localSearchRangeX, localSearchRangeY, cMvSrchRngLT, cMvSrchRngRB); - // disable weighted prediction - setWpScalingDistParam(-1, REF_PIC_LIST_X, pu.cs->slice); - - m_pcRdCost->getMotionCost(0, pu.cu->transQuantBypass); - m_pcRdCost->setPredictors(pcMvPred); - m_pcRdCost->setCostScale(0); - // Do integer search - xIntraPatternSearch(pu, cStruct, rcMv, ruiCost, &cMvSrchRngLT, &cMvSrchRngRB, pcMvPred); } } @@ -1321,14 +1389,15 @@ void InterSearch::xSetIntraSearchRange(PredictionUnit& pu, int iRoiWidth, int iR const int cuPelX = pu.Y().x; const int cuPelY = pu.Y().y; - const int iPicWidth = pu.cs->slice->getSPS()->getPicWidthInLumaSamples(); - const int iPicHeight = pu.cs->slice->getSPS()->getPicHeightInLumaSamples(); + const int lcuWidth = pu.cs->slice->getSPS()->getMaxCUWidth(); + const int ctuSizeLog2 = floorLog2(lcuWidth); + int numLeftCTUs = (1 << ((7 - ctuSizeLog2) << 1)) - ((ctuSizeLog2 < 7) ? 1 : 0); - srLeft = -std::min(cuPelX, localSearchRangeX); - srTop = -std::min(cuPelY, localSearchRangeY); + srLeft = -(numLeftCTUs * lcuWidth + (cuPelX % lcuWidth)); + srTop = -(cuPelY % lcuWidth); - srRight = std::min(iPicWidth - cuPelX - iRoiWidth, localSearchRangeX); - srBottom = std::min(iPicHeight - cuPelY - iRoiHeight, localSearchRangeY); + srRight = lcuWidth - (cuPelX % lcuWidth) - iRoiWidth; + srBottom = lcuWidth - (cuPelY % lcuWidth) - iRoiHeight; rcMvSrchRngLT.setHor(srLeft); rcMvSrchRngLT.setVer(srTop); @@ -1339,10 +1408,14 @@ void InterSearch::xSetIntraSearchRange(PredictionUnit& pu, int iRoiWidth, int iR rcMvSrchRngRB <<= 2; xClipMv(rcMvSrchRngLT, pu.cu->lumaPos(), pu.cu->lumaSize(), - sps); + sps + , *pu.cs->pps + ); xClipMv(rcMvSrchRngRB, pu.cu->lumaPos(), pu.cu->lumaSize(), - sps); + sps + , *pu.cs->pps + ); rcMvSrchRngLT >>= 2; rcMvSrchRngRB >>= 2; } @@ -1370,14 +1443,23 @@ bool InterSearch::predIBCSearch(CodingUnit& cu, Partitioner& partitioner, const Mv cMv, cMvPred[2]; AMVPInfo amvpInfo; PU::fillIBCMvpCand(pu, amvpInfo); - cMvPred[0].set(amvpInfo.mvCand[0].getHor() >> (2), amvpInfo.mvCand[0].getVer() >> (2)); // store in full pel accuracy, shift before use in search - cMvPred[1].set(amvpInfo.mvCand[1].getHor() >> (2), amvpInfo.mvCand[1].getVer() >> (2)); + // store in full pel accuracy, shift before use in search + cMvPred[0] = amvpInfo.mvCand[0]; + cMvPred[0].changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_INT); + cMvPred[1] = amvpInfo.mvCand[1]; + cMvPred[1].changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_INT); int iBvpNum = 2; int bvpIdxBest = 0; cMv.setZero(); Distortion cost = 0; + if ( pu.cu->slice->getPicHeader()->getMaxNumIBCMergeCand() == 1 ) + { + iBvpNum = 1; + cMvPred[1] = cMvPred[0]; + } + if (m_pcEncCfg->getIBCHashSearch()) { xxIBCHashSearch(pu, cMvPred, iBvpNum, cMv, bvpIdxBest, ibcHashMap); @@ -1426,14 +1508,14 @@ bool InterSearch::predIBCSearch(CodingUnit& cu, Partitioner& partitioner, const { mvPredQuadPel = amvpInfo4Pel.mvCand[bvpIdxTemp];// cMvPred[bvpIdxTemp]; - mvPredQuadPel >>= (4); + mvPredQuadPel.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_4PEL); m_pcRdCost->setPredictor(mvPredQuadPel); bitsBVPQP = m_pcRdCost->getBitsOfVectorWithPredictor(cMv.getHor() >> 2, cMv.getVer() >> 2, 0); } - mvPredQuadPel <<= (2); + mvPredQuadPel.changePrecision(MV_PRECISION_4PEL, MV_PRECISION_INT); if (bitsBVPQP < bitsBVPBest && cMv != mvPredQuadPel) { bitsBVPBest = bitsBVPQP; @@ -1445,8 +1527,8 @@ bool InterSearch::predIBCSearch(CodingUnit& cu, Partitioner& partitioner, const } - pu.bv = cMv; - cMv <<= (2); + pu.bv = cMv; // bv is always at integer accuracy + cMv.changePrecision(MV_PRECISION_INT, MV_PRECISION_INTERNAL); pu.mv[REF_PIC_LIST_0] = cMv; // store in fractional pel accuracy pu.mvpIdx[REF_PIC_LIST_0] = bvpIdxBest; @@ -1463,13 +1545,8 @@ bool InterSearch::predIBCSearch(CodingUnit& cu, Partitioner& partitioner, const if (cu.cs->sps->getAMVREnabledFlag()) assert(pu.cu->imv>0 || pu.mvd[REF_PIC_LIST_0] == Mv()); - if (!cu.cs->sps->getAMVREnabledFlag()) - pu.mvd[REF_PIC_LIST_0] >>= (2); - pu.refIdx[REF_PIC_LIST_0] = MAX_NUM_REF; - pu.mv[REF_PIC_LIST_0].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); - m_ctuRecord[cu.lumaPos()][cu.lumaSize()].bvRecord[pu.bv] = cost; } return true; @@ -1488,21 +1565,21 @@ void InterSearch::xxIBCHashSearch(PredictionUnit& pu, Mv* mvPred, int numMvPred, const unsigned int lcuWidth = pu.cs->slice->getSPS()->getMaxCUWidth(); const int cuPelX = pu.Y().x; const int cuPelY = pu.Y().y; - const int picWidth = pu.cs->slice->getSPS()->getPicWidthInLumaSamples(); - const int picHeight = pu.cs->slice->getSPS()->getPicHeightInLumaSamples(); + const int picWidth = pu.cs->slice->getPPS()->getPicWidthInLumaSamples(); + const int picHeight = pu.cs->slice->getPPS()->getPicHeightInLumaSamples(); int roiWidth = pu.lwidth(); int roiHeight = pu.lheight(); for (std::vector<Position>::iterator pos = candPos.begin(); pos != candPos.end(); pos++) { Position bottomRight = pos->offset(pu.Y().width - 1, pu.Y().height - 1); - if (pu.cs->isDecomp(*pos, pu.cs->chType) && pu.cs->isDecomp(bottomRight, pu.cs->chType)) + if (pu.cs->isDecomp(*pos, CHANNEL_TYPE_LUMA) && pu.cs->isDecomp(bottomRight, CHANNEL_TYPE_LUMA)) { Position tmp = *pos - pu.Y().pos(); Mv candMv; candMv.set(tmp.x, tmp.y); - if (!PU::isBlockVectorValid(pu, cuPelX, cuPelY, roiWidth, roiHeight, picWidth, picHeight, 0, 0, candMv.getHor(), candMv.getVer(), lcuWidth)) + if (!searchBv(pu, cuPelX, cuPelY, roiWidth, roiHeight, picWidth, picHeight, candMv.getHor(), candMv.getVer(), lcuWidth)) { continue; } @@ -1527,7 +1604,9 @@ void InterSearch::xxIBCHashSearch(PredictionUnit& pu, Mv* mvPred, int numMvPred, int imvShift = 2; int offset = 1 << (imvShift - 1); - mvPredQuadPel.set(((mvPred[n].hor + offset) >> 2), ((mvPred[n].ver + offset) >> 2)); + int x = (mvPred[n].hor + offset - (mvPred[n].hor >= 0)) >> 2; + int y = (mvPred[n].ver + offset - (mvPred[n].ver >= 0)) >> 2; + mvPredQuadPel.set(x, y); m_pcRdCost->setPredictor(mvPredQuadPel); @@ -1602,83 +1681,152 @@ void InterSearch::selectMatchesInter(const MapIterator& itBegin, int count, std: } } } - -int InterSearch::xHashInterPredME(const PredictionUnit& pu, RefPicList currRefPicList, int currRefPicIndex, Mv bestMv[5]) +void InterSearch::selectRectangleMatchesInter(const MapIterator& itBegin, int count, std::list<BlockHash>& listBlockHash, const BlockHash& currBlockHash, int width, int height, int idxNonSimple, unsigned int* &hashValues, int baseNum, int picWidth, int picHeight, bool isHorizontal, uint16_t* curHashPic) { - int width = pu.cu->lumaSize().width; - int height = pu.cu->lumaSize().height; - int xPos = pu.cu->lumaPos().x; - int yPos = pu.cu->lumaPos().y; + const int maxReturnNumber = 5; + int baseSize = min(width, height); + unsigned int crcMask = 1 << 16; + crcMask -= 1; - uint32_t hashValue1; - uint32_t hashValue2; + listBlockHash.clear(); + std::list<int> listCost; + listCost.clear(); - if (!TComHash::getBlockHashValue((pu.cs->picture->getOrigBuf()), width, height, xPos, yPos, pu.cu->slice->getSPS()->getBitDepths(), hashValue1, hashValue2)) - { - return 0; - } - BlockHash currBlockHash; - currBlockHash.x = xPos; - currBlockHash.y = yPos; - currBlockHash.hashValue2 = hashValue2; + MapIterator it = itBegin; - int count = static_cast<int>(pu.cu->slice->getRefPic(currRefPicList, currRefPicIndex)->getHashMap()->count(hashValue1)); - if (count == 0) + for (int i = 0; i < count; i++, it++) { - return 0; - } + if ((*it).hashValue2 != currBlockHash.hashValue2) + { + continue; + } + int xRef = (*it).x; + int yRef = (*it).y; + if (isHorizontal) + { + xRef -= idxNonSimple * baseSize; + } + else + { + yRef -= idxNonSimple * baseSize; + } + if (xRef < 0 || yRef < 0 || xRef + width >= picWidth || yRef + height >= picHeight) + { + continue; + } + //check Other baseSize hash values + uint16_t* refHashValue = curHashPic + yRef * picWidth + xRef; + bool isSame = true; - list<BlockHash> listBlockHash; - selectMatchesInter(pu.cu->slice->getRefPic(currRefPicList, currRefPicIndex)->getHashMap()->getFirstIterator(hashValue1), count, listBlockHash, currBlockHash); + for (int k = 0; k < baseNum; k++) + { + if ((*refHashValue) != (uint16_t)(hashValues[k] & crcMask)) + { + isSame = false; + break; + } + refHashValue += (isHorizontal ? baseSize : (baseSize*picWidth)); + } + if (!isSame) + { + continue; + } - if (listBlockHash.empty()) - { - return 0; - } + int currCost = RdCost::xGetExpGolombNumberOfBits(xRef - currBlockHash.x) + + RdCost::xGetExpGolombNumberOfBits(yRef - currBlockHash.y); - int totalSize = 0; - list<BlockHash>::iterator it = listBlockHash.begin(); - for (int i = 0; i < 5 && i < listBlockHash.size(); i++, it++) - { - bestMv[i].set((*it).x - currBlockHash.x, (*it).y - currBlockHash.y); - totalSize++; - } + BlockHash refBlockHash; + refBlockHash.hashValue2 = (*it).hashValue2; + refBlockHash.x = xRef; + refBlockHash.y = yRef; - return totalSize; + if (listBlockHash.size() < maxReturnNumber) + { + addToSortList(listBlockHash, listCost, currCost, refBlockHash); + } + else if (!listCost.empty() && currCost < listCost.back()) + { + listCost.pop_back(); + listBlockHash.pop_back(); + addToSortList(listBlockHash, listCost, currCost, refBlockHash); + } + } } -bool InterSearch::xHashInterEstimation(PredictionUnit& pu, RefPicList& bestRefPicList, int& bestRefIndex, Mv& bestMv, Mv& bestMvd, int& bestMVPIndex, bool& isPerfectMatch) +bool InterSearch::xRectHashInterEstimation(PredictionUnit& pu, RefPicList& bestRefPicList, int& bestRefIndex, Mv& bestMv, Mv& bestMvd, int& bestMVPIndex, bool& isPerfectMatch) { int width = pu.cu->lumaSize().width; int height = pu.cu->lumaSize().height; + + int baseSize = min(width, height); + bool isHorizontal = true;; + int baseNum = 0; + if (height < width) + { + isHorizontal = true; + baseNum = 1 << (floorLog2(width) - floorLog2(height)); + } + else + { + isHorizontal = false; + baseNum = 1 << (floorLog2(height) - floorLog2(width)); + } + int xPos = pu.cu->lumaPos().x; int yPos = pu.cu->lumaPos().y; + const int currStride = pu.cs->picture->getOrigBuf().get(COMPONENT_Y).stride; + const Pel* curPel = pu.cs->picture->getOrigBuf().get(COMPONENT_Y).buf + yPos * currStride + xPos; + int picWidth = pu.cu->slice->getPPS()->getPicWidthInLumaSamples(); + int picHeight = pu.cu->slice->getPPS()->getPicHeightInLumaSamples(); - uint32_t hashValue1; - uint32_t hashValue2; - Distortion bestCost = UINT64_MAX; + int xBase = xPos; + int yBase = yPos; + const Pel* basePel = curPel; + int idxNonSimple = -1; + unsigned int* hashValue1s = new unsigned int[baseNum]; + unsigned int* hashValue2s = new unsigned int[baseNum]; - if (!TComHash::getBlockHashValue((pu.cs->picture->getOrigBuf()), width, height, xPos, yPos, pu.cu->slice->getSPS()->getBitDepths(), hashValue1, hashValue2)) + for (int k = 0; k < baseNum; k++) { - return false; + if (isHorizontal) + { + xBase = xPos + k * baseSize; + basePel = curPel + k * baseSize; + } + else + { + yBase = yPos + k * baseSize; + basePel = curPel + k * baseSize * currStride; + } + + if (idxNonSimple == -1 && !TComHash::isHorizontalPerfectLuma(basePel, currStride, baseSize, baseSize) && !TComHash::isVerticalPerfectLuma(basePel, currStride, baseSize, baseSize)) + { + idxNonSimple = k; + } + TComHash::getBlockHashValue((pu.cs->picture->getOrigBuf()), baseSize, baseSize, xBase, yBase, pu.cu->slice->getSPS()->getBitDepths(), hashValue1s[k], hashValue2s[k]); + } + if (idxNonSimple == -1) + { + idxNonSimple = 0; } + Distortion bestCost = UINT64_MAX; + BlockHash currBlockHash; - currBlockHash.x = xPos; + currBlockHash.x = xPos;//still use the first base block location currBlockHash.y = yPos; - currBlockHash.hashValue2 = hashValue2; + + currBlockHash.hashValue2 = hashValue2s[idxNonSimple]; m_pcRdCost->setDistParam(m_cDistParam, pu.cs->getOrgBuf(pu).Y(), 0, 0, m_lumaClpRng.bd, COMPONENT_Y, 0, 1, false); int imvBest = 0; - int numPredDir = pu.cu->slice->isInterP() ? 1 : 2; for (int refList = 0; refList < numPredDir; refList++) { RefPicList eRefPicList = (refList == 0) ? REF_PIC_LIST_0 : REF_PIC_LIST_1; int refPicNumber = pu.cu->slice->getNumRefIdx(eRefPicList); - for (int refIdx = 0; refIdx < refPicNumber; refIdx++) { int bitsOnRefIdx = 1; @@ -1690,47 +1838,63 @@ bool InterSearch::xHashInterEstimation(PredictionUnit& pu, RefPicList& bestRefPi bitsOnRefIdx--; } } + m_numHashMVStoreds[eRefPicList][refIdx] = 0; + + const std::pair<int, int>& scaleRatio = pu.cu->slice->getScalingRatio( eRefPicList, refIdx ); + if( scaleRatio != SCALE_1X ) + { + continue; + } + + CHECK( pu.cu->slice->getRefPic( eRefPicList, refIdx )->getHashMap() == nullptr, "Hash table is not initialized" ); if (refList == 0 || pu.cu->slice->getList1IdxToList0Idx(refIdx) < 0) { - int count = static_cast<int>(pu.cu->slice->getRefPic(eRefPicList, refIdx)->getHashMap()->count(hashValue1)); + int count = static_cast<int>(pu.cu->slice->getRefPic(eRefPicList, refIdx)->getHashMap()->count(hashValue1s[idxNonSimple])); if (count == 0) { continue; } list<BlockHash> listBlockHash; - selectMatchesInter(pu.cu->slice->getRefPic(eRefPicList, refIdx)->getHashMap()->getFirstIterator(hashValue1), count, listBlockHash, currBlockHash); + selectRectangleMatchesInter(pu.cu->slice->getRefPic(eRefPicList, refIdx)->getHashMap()->getFirstIterator(hashValue1s[idxNonSimple]), count, listBlockHash, currBlockHash, width, height, idxNonSimple, hashValue2s, baseNum, picWidth, picHeight, isHorizontal, pu.cu->slice->getRefPic(eRefPicList, refIdx)->getHashMap()->getHashPic(baseSize)); + m_numHashMVStoreds[eRefPicList][refIdx] = int(listBlockHash.size()); if (listBlockHash.empty()) { continue; } AMVPInfo currAMVPInfoPel; AMVPInfo currAMVPInfo4Pel; + AMVPInfo currAMVPInfoQPel; pu.cu->imv = 2; PU::fillMvpCand(pu, eRefPicList, refIdx, currAMVPInfo4Pel); pu.cu->imv = 1; PU::fillMvpCand(pu, eRefPicList, refIdx, currAMVPInfoPel); - AMVPInfo currAMVPInfoQPel; pu.cu->imv = 0; PU::fillMvpCand(pu, eRefPicList, refIdx, currAMVPInfoQPel); - CHECK(currAMVPInfoPel.numCand <= 1, "Wrong") - - const Pel* refBufStart = pu.cu->slice->getRefPic(eRefPicList, refIdx)->getRecoBuf().get(COMPONENT_Y).buf; - const int refStride = pu.cu->slice->getRefPic(eRefPicList, refIdx)->getRecoBuf().get(COMPONENT_Y).stride; + for (int mvpIdxTemp = 0; mvpIdxTemp < 2; mvpIdxTemp++) + { + currAMVPInfoQPel.mvCand[mvpIdxTemp].changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER); + currAMVPInfoPel.mvCand[mvpIdxTemp].changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER); + currAMVPInfo4Pel.mvCand[mvpIdxTemp].changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER); + } + const Pel* refBufStart = pu.cu->slice->getRefPic(eRefPicList, refIdx)->getRecoBuf(pu.cs->sps->getWrapAroundEnabledFlag()).get(COMPONENT_Y).buf; + const int refStride = pu.cu->slice->getRefPic(eRefPicList, refIdx)->getRecoBuf(pu.cs->sps->getWrapAroundEnabledFlag()).get(COMPONENT_Y).stride; m_cDistParam.cur.stride = refStride; - m_pcRdCost->selectMotionLambda(pu.cu->transQuantBypass); + m_pcRdCost->selectMotionLambda( ); m_pcRdCost->setCostScale(0); list<BlockHash>::iterator it; + int countMV = 0; for (it = listBlockHash.begin(); it != listBlockHash.end(); ++it) { int curMVPIdx = 0; unsigned int curMVPbits = MAX_UINT; Mv cMv((*it).x - currBlockHash.x, (*it).y - currBlockHash.y); + m_hashMVStoreds[eRefPicList][refIdx][countMV++] = cMv; cMv.changePrecision(MV_PRECISION_INT, MV_PRECISION_QUARTER); for (int mvpIdxTemp = 0; mvpIdxTemp < 2; mvpIdxTemp++) @@ -1775,7 +1939,6 @@ bool InterSearch::xHashInterEstimation(PredictionUnit& pu, RefPicList& bestRefPi } } } - curMVPbits += bitsOnRefIdx; m_cDistParam.cur.buf = refBufStart + (*it).y*refStride + (*it).x; @@ -1815,6 +1978,8 @@ bool InterSearch::xHashInterEstimation(PredictionUnit& pu, RefPicList& bestRefPi } } } + delete[] hashValue1s; + delete[] hashValue2s; pu.cu->imv = imvBest; if (bestMvd == Mv(0, 0)) { @@ -1824,74 +1989,272 @@ bool InterSearch::xHashInterEstimation(PredictionUnit& pu, RefPicList& bestRefPi return (bestCost < MAX_INT); } -bool InterSearch::predInterHashSearch(CodingUnit& cu, Partitioner& partitioner, bool& isPerfectMatch) +bool InterSearch::xHashInterEstimation(PredictionUnit& pu, RefPicList& bestRefPicList, int& bestRefIndex, Mv& bestMv, Mv& bestMvd, int& bestMVPIndex, bool& isPerfectMatch) { - Mv bestMv, bestMvd; - RefPicList bestRefPicList; - int bestRefIndex; - int bestMVPIndex; - - auto &pu = *cu.firstPU; - - Mv cMvZero; - pu.mv[REF_PIC_LIST_0] = Mv(); - pu.mv[REF_PIC_LIST_1] = Mv(); - pu.mvd[REF_PIC_LIST_0] = cMvZero; - pu.mvd[REF_PIC_LIST_1] = cMvZero; - pu.refIdx[REF_PIC_LIST_0] = NOT_VALID; - pu.refIdx[REF_PIC_LIST_1] = NOT_VALID; - pu.mvpIdx[REF_PIC_LIST_0] = NOT_VALID; - pu.mvpIdx[REF_PIC_LIST_1] = NOT_VALID; - pu.mvpNum[REF_PIC_LIST_0] = NOT_VALID; - pu.mvpNum[REF_PIC_LIST_1] = NOT_VALID; - - if (xHashInterEstimation(pu, bestRefPicList, bestRefIndex, bestMv, bestMvd, bestMVPIndex, isPerfectMatch)) + int width = pu.cu->lumaSize().width; + int height = pu.cu->lumaSize().height; + if (width != height) { - pu.interDir = static_cast<int>(bestRefPicList) + 1; - pu.mv[bestRefPicList] = bestMv; - pu.mv[bestRefPicList].hor <<= MV_FRACTIONAL_BITS_DIFF; - pu.mv[bestRefPicList].ver <<= MV_FRACTIONAL_BITS_DIFF; - - pu.mvd[bestRefPicList] = bestMvd; - pu.refIdx[bestRefPicList] = bestRefIndex; - pu.mvpIdx[bestRefPicList] = bestMVPIndex; + return xRectHashInterEstimation(pu, bestRefPicList, bestRefIndex, bestMv, bestMvd, bestMVPIndex, isPerfectMatch); + } + int xPos = pu.cu->lumaPos().x; + int yPos = pu.cu->lumaPos().y; - pu.mvpNum[bestRefPicList] = 2; + uint32_t hashValue1; + uint32_t hashValue2; + Distortion bestCost = UINT64_MAX; - PU::spanMotionInfo(pu); - PelUnitBuf predBuf = pu.cs->getPredBuf(pu); - motionCompensation(pu, predBuf, REF_PIC_LIST_X); - return true; - } - else + if (!TComHash::getBlockHashValue((pu.cs->picture->getOrigBuf()), width, height, xPos, yPos, pu.cu->slice->getSPS()->getBitDepths(), hashValue1, hashValue2)) { return false; } - return true; -} - - -//! search of the best candidate for inter prediction -void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner) -{ - CodingStructure& cs = *cu.cs; + BlockHash currBlockHash; + currBlockHash.x = xPos; + currBlockHash.y = yPos; + currBlockHash.hashValue2 = hashValue2; - AMVPInfo amvp[2]; - Mv cMvSrchRngLT; - Mv cMvSrchRngRB; + m_pcRdCost->setDistParam(m_cDistParam, pu.cs->getOrgBuf(pu).Y(), 0, 0, m_lumaClpRng.bd, COMPONENT_Y, 0, 1, false); - Mv cMvZero; + int imvBest = 0; - Mv cMv[2]; - Mv cMvBi[2]; - Mv cMvTemp[2][33]; - Mv cMvHevcTemp[2][33]; - int iNumPredDir = cs.slice->isInterP() ? 1 : 2; + int numPredDir = pu.cu->slice->isInterP() ? 1 : 2; + for (int refList = 0; refList < numPredDir; refList++) + { + RefPicList eRefPicList = (refList == 0) ? REF_PIC_LIST_0 : REF_PIC_LIST_1; + int refPicNumber = pu.cu->slice->getNumRefIdx(eRefPicList); - Mv cMvPred[2][33]; - Mv cMvPredBi[2][33]; + for (int refIdx = 0; refIdx < refPicNumber; refIdx++) + { + int bitsOnRefIdx = 1; + if (refPicNumber > 1) + { + bitsOnRefIdx += refIdx + 1; + if (refIdx == refPicNumber - 1) + { + bitsOnRefIdx--; + } + } + m_numHashMVStoreds[eRefPicList][refIdx] = 0; + + const std::pair<int, int>& scaleRatio = pu.cu->slice->getScalingRatio( eRefPicList, refIdx ); + if( scaleRatio != SCALE_1X ) + { + continue; + } + + CHECK( pu.cu->slice->getRefPic( eRefPicList, refIdx )->getHashMap() == nullptr, "Hash table is not initialized" ); + + if (refList == 0 || pu.cu->slice->getList1IdxToList0Idx(refIdx) < 0) + { + int count = static_cast<int>(pu.cu->slice->getRefPic(eRefPicList, refIdx)->getHashMap()->count(hashValue1)); + if (count == 0) + { + continue; + } + + list<BlockHash> listBlockHash; + selectMatchesInter(pu.cu->slice->getRefPic(eRefPicList, refIdx)->getHashMap()->getFirstIterator(hashValue1), count, listBlockHash, currBlockHash); + m_numHashMVStoreds[eRefPicList][refIdx] = (int)listBlockHash.size(); + if (listBlockHash.empty()) + { + continue; + } + AMVPInfo currAMVPInfoPel; + AMVPInfo currAMVPInfo4Pel; + pu.cu->imv = 2; + PU::fillMvpCand(pu, eRefPicList, refIdx, currAMVPInfo4Pel); + pu.cu->imv = 1; + PU::fillMvpCand(pu, eRefPicList, refIdx, currAMVPInfoPel); + AMVPInfo currAMVPInfoQPel; + pu.cu->imv = 0; + PU::fillMvpCand(pu, eRefPicList, refIdx, currAMVPInfoQPel); + CHECK(currAMVPInfoPel.numCand <= 1, "Wrong") + for (int mvpIdxTemp = 0; mvpIdxTemp < 2; mvpIdxTemp++) + { + currAMVPInfoQPel.mvCand[mvpIdxTemp].changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER); + currAMVPInfoPel.mvCand[mvpIdxTemp].changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER); + currAMVPInfo4Pel.mvCand[mvpIdxTemp].changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER); + } + + const Pel* refBufStart = pu.cu->slice->getRefPic(eRefPicList, refIdx)->getRecoBuf(pu.cs->sps->getWrapAroundEnabledFlag()).get(COMPONENT_Y).buf; + const int refStride = pu.cu->slice->getRefPic(eRefPicList, refIdx)->getRecoBuf(pu.cs->sps->getWrapAroundEnabledFlag()).get(COMPONENT_Y).stride; + + m_cDistParam.cur.stride = refStride; + + m_pcRdCost->selectMotionLambda( ); + m_pcRdCost->setCostScale(0); + + list<BlockHash>::iterator it; + int countMV = 0; + for (it = listBlockHash.begin(); it != listBlockHash.end(); ++it) + { + int curMVPIdx = 0; + unsigned int curMVPbits = MAX_UINT; + Mv cMv((*it).x - currBlockHash.x, (*it).y - currBlockHash.y); + m_hashMVStoreds[eRefPicList][refIdx][countMV++] = cMv; + cMv.changePrecision(MV_PRECISION_INT, MV_PRECISION_QUARTER); + + for (int mvpIdxTemp = 0; mvpIdxTemp < 2; mvpIdxTemp++) + { + Mv cMvPredPel = currAMVPInfoQPel.mvCand[mvpIdxTemp]; + m_pcRdCost->setPredictor(cMvPredPel); + + unsigned int tempMVPbits = m_pcRdCost->getBitsOfVectorWithPredictor(cMv.getHor(), cMv.getVer(), 0); + + if (tempMVPbits < curMVPbits) + { + curMVPbits = tempMVPbits; + curMVPIdx = mvpIdxTemp; + pu.cu->imv = 0; + } + + if (pu.cu->slice->getSPS()->getAMVREnabledFlag()) + { + unsigned int bitsMVP1Pel = MAX_UINT; + Mv mvPred1Pel = currAMVPInfoPel.mvCand[mvpIdxTemp]; + m_pcRdCost->setPredictor(mvPred1Pel); + bitsMVP1Pel = m_pcRdCost->getBitsOfVectorWithPredictor(cMv.getHor(), cMv.getVer(), 2); + if (bitsMVP1Pel < curMVPbits) + { + curMVPbits = bitsMVP1Pel; + curMVPIdx = mvpIdxTemp; + pu.cu->imv = 1; + } + + if ((cMv.getHor() % 16 == 0) && (cMv.getVer() % 16 == 0)) + { + unsigned int bitsMVP4Pel = MAX_UINT; + Mv mvPred4Pel = currAMVPInfo4Pel.mvCand[mvpIdxTemp]; + m_pcRdCost->setPredictor(mvPred4Pel); + bitsMVP4Pel = m_pcRdCost->getBitsOfVectorWithPredictor(cMv.getHor(), cMv.getVer(), 4); + if (bitsMVP4Pel < curMVPbits) + { + curMVPbits = bitsMVP4Pel; + curMVPIdx = mvpIdxTemp; + pu.cu->imv = 2; + } + } + } + } + + curMVPbits += bitsOnRefIdx; + + m_cDistParam.cur.buf = refBufStart + (*it).y*refStride + (*it).x; + Distortion currSad = m_cDistParam.distFunc(m_cDistParam); + Distortion currCost = currSad + m_pcRdCost->getCost(curMVPbits); + + if (!isPerfectMatch) + { + if (pu.cu->slice->getRefPic(eRefPicList, refIdx)->slices[0]->getSliceQp() <= pu.cu->slice->getSliceQp()) + { + isPerfectMatch = true; + } + } + + if (currCost < bestCost) + { + bestCost = currCost; + bestRefPicList = eRefPicList; + bestRefIndex = refIdx; + bestMv = cMv; + bestMVPIndex = curMVPIdx; + imvBest = pu.cu->imv; + if (pu.cu->imv == 2) + { + bestMvd = cMv - currAMVPInfo4Pel.mvCand[curMVPIdx]; + } + else if (pu.cu->imv == 1) + { + bestMvd = cMv - currAMVPInfoPel.mvCand[curMVPIdx]; + } + else + { + bestMvd = cMv - currAMVPInfoQPel.mvCand[curMVPIdx]; + } + } + } + } + } + } + pu.cu->imv = imvBest; + if (bestMvd == Mv(0, 0)) + { + pu.cu->imv = 0; + return false; + } + return (bestCost < MAX_INT); +} + +bool InterSearch::predInterHashSearch(CodingUnit& cu, Partitioner& partitioner, bool& isPerfectMatch) +{ + Mv bestMv, bestMvd; + RefPicList bestRefPicList; + int bestRefIndex; + int bestMVPIndex; + + auto &pu = *cu.firstPU; + + Mv cMvZero; + pu.mv[REF_PIC_LIST_0] = Mv(); + pu.mv[REF_PIC_LIST_1] = Mv(); + pu.mvd[REF_PIC_LIST_0] = cMvZero; + pu.mvd[REF_PIC_LIST_1] = cMvZero; + pu.refIdx[REF_PIC_LIST_0] = NOT_VALID; + pu.refIdx[REF_PIC_LIST_1] = NOT_VALID; + pu.mvpIdx[REF_PIC_LIST_0] = NOT_VALID; + pu.mvpIdx[REF_PIC_LIST_1] = NOT_VALID; + pu.mvpNum[REF_PIC_LIST_0] = NOT_VALID; + pu.mvpNum[REF_PIC_LIST_1] = NOT_VALID; + + if (xHashInterEstimation(pu, bestRefPicList, bestRefIndex, bestMv, bestMvd, bestMVPIndex, isPerfectMatch)) + { + pu.interDir = static_cast<int>(bestRefPicList) + 1; + pu.mv[bestRefPicList] = bestMv; + pu.mv[bestRefPicList].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); + + pu.mvd[bestRefPicList] = bestMvd; + pu.mvd[bestRefPicList].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); + pu.refIdx[bestRefPicList] = bestRefIndex; + pu.mvpIdx[bestRefPicList] = bestMVPIndex; + + pu.mvpNum[bestRefPicList] = 2; + + PU::spanMotionInfo(pu); + PelUnitBuf predBuf = pu.cs->getPredBuf(pu); + motionCompensation(pu, predBuf, REF_PIC_LIST_X); + return true; + } + else + { + return false; + } + + return true; +} + + +//! search of the best candidate for inter prediction +void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner) +{ + CodingStructure& cs = *cu.cs; + + AMVPInfo amvp[2]; + Mv cMvSrchRngLT; + Mv cMvSrchRngRB; + + Mv cMvZero; + + Mv cMv[2]; + Mv cMvBi[2]; + Mv cMvTemp[2][33]; + Mv cMvHevcTemp[2][33]; + int iNumPredDir = cs.slice->isInterP() ? 1 : 2; + + Mv cMvPred[2][33]; + + Mv cMvPredBi[2][33]; int aaiMvpIdxBi[2][33]; int aaiMvpIdx[2][33]; @@ -1914,8 +2277,8 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner) int bestBiPMvpL1 = 0; Distortion biPDistTemp = std::numeric_limits<Distortion>::max(); - uint8_t gbiIdx = (cu.cs->slice->isInterB() ? cu.GBiIdx : GBI_DEFAULT); - bool enforceGBiPred = false; + uint8_t bcwIdx = (cu.cs->slice->isInterB() ? cu.BcwIdx : BCW_DEFAULT); + bool enforceBcwPred = false; MergeCtx mergeCtx; // Loop over Prediction Units @@ -1925,9 +2288,9 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner) WPScalingParam *wp0; WPScalingParam *wp1; int tryBipred = 0; - bool checkAffine = pu.cu->imv == 0 || pu.cu->slice->getSPS()->getAffineAmvrEnabledFlag(); - bool checkNonAffine = pu.cu->imv == 0 || ( pu.cu->slice->getSPS()->getAMVREnabledFlag() && - pu.cu->imv <= (pu.cu->slice->getSPS()->getAMVREnabledFlag() ? IMV_4PEL : 0)); + bool checkAffine = (pu.cu->imv == 0 || pu.cu->slice->getSPS()->getAffineAmvrEnabledFlag()) && pu.cu->imv != IMV_HPEL; + bool checkNonAffine = pu.cu->imv == 0 || pu.cu->imv == IMV_HPEL || (pu.cu->slice->getSPS()->getAMVREnabledFlag() && + pu.cu->imv <= (pu.cu->slice->getSPS()->getAMVREnabledFlag() ? IMV_4PEL : 0)); CodingUnit *bestCU = pu.cu->cs->bestCS != nullptr ? pu.cu->cs->bestCS->getCU( CHANNEL_TYPE_LUMA ) : nullptr; bool trySmvd = ( bestCU != nullptr && pu.cu->imv == 2 && checkAffine ) ? ( !bestCU->firstPU->mergeFlag && !bestCU->affine ) : true; if ( pu.cu->imv && bestCU != nullptr && checkAffine ) @@ -1941,6 +2304,11 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner) } { + if (pu.cu->cs->bestParent != nullptr && pu.cu->cs->bestParent->getCU(CHANNEL_TYPE_LUMA) != nullptr && pu.cu->cs->bestParent->getCU(CHANNEL_TYPE_LUMA)->affine == false) + { + m_skipPROF = true; + } + m_encOnly = true; // motion estimation only evaluates luma component m_maxCompIDToPred = MAX_NUM_COMPONENT; // m_maxCompIDToPred = COMPONENT_Y; @@ -1980,9 +2348,9 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner) xGetBlkBits( cs.slice->isInterP(), puIdx, uiLastMode, uiMbBits ); - m_pcRdCost->selectMotionLambda( cu.transQuantBypass ); + m_pcRdCost->selectMotionLambda( ); - unsigned imvShift = pu.cu->imv << 1; + unsigned imvShift = pu.cu->imv == IMV_HPEL ? 1 : (pu.cu->imv << 1); if ( checkNonAffine ) { // Uni-directional prediction @@ -2005,7 +2373,7 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner) aaiMvpIdx[iRefList][iRefIdxTemp] = pu.mvpIdx[eRefPicList]; aaiMvpNum[iRefList][iRefIdxTemp] = pu.mvpNum[eRefPicList]; - if(cs.slice->getMvdL1ZeroFlag() && iRefList==1 && biPDistTemp < bestBiPDist) + if(cs.picHeader->getMvdL1ZeroFlag() && iRefList==1 && biPDistTemp < bestBiPDist) { bestBiPDist = biPDistTemp; bestBiPMvpL1 = aaiMvpIdx[iRefList][iRefIdxTemp]; @@ -2024,7 +2392,7 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner) uiCostTemp -= m_pcRdCost->getCost( uiBitsTempL0[cs.slice->getList1IdxToList0Idx( iRefIdxTemp )] ); /*correct the bit-rate part of the current ref*/ m_pcRdCost->setPredictor ( cMvPred[iRefList][iRefIdxTemp] ); - uiBitsTemp += m_pcRdCost->getBitsOfVectorWithPredictor( cMvTemp[1][iRefIdxTemp].getHor(), cMvTemp[1][iRefIdxTemp].getVer(), imvShift ); + uiBitsTemp += m_pcRdCost->getBitsOfVectorWithPredictor( cMvTemp[1][iRefIdxTemp].getHor(), cMvTemp[1][iRefIdxTemp].getVer(), imvShift + MV_FRACTIONAL_BITS_DIFF ); /*calculate the correct cost*/ uiCostTemp += m_pcRdCost->getCost( uiBitsTemp ); } @@ -2037,7 +2405,7 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner) { xMotionEstimation( pu, origBuf, eRefPicList, cMvPred[iRefList][iRefIdxTemp], iRefIdxTemp, cMvTemp[iRefList][iRefIdxTemp], aaiMvpIdx[iRefList][iRefIdxTemp], uiBitsTemp, uiCostTemp, amvp[eRefPicList] ); } - if( cu.cs->sps->getUseGBi() && cu.GBiIdx == GBI_DEFAULT && cu.cs->slice->isInterB() ) + if( cu.cs->sps->getUseBcw() && cu.BcwIdx == BCW_DEFAULT && cu.cs->slice->isInterB() ) { const bool checkIdentical = true; m_uniMotions.setReadMode(checkIdentical, (uint32_t)iRefList, (uint32_t)iRefIdxTemp); @@ -2073,20 +2441,23 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner) } } - if (cu.Y().width > 8 && cu.Y().height > 8 && cu.slice->getSPS()->getUseAffine() - && checkAffine - && (gbiIdx == GBI_DEFAULT || m_affineModeSelected || !m_pcEncCfg->getUseGBiFast()) - ) + ::memcpy(cMvHevcTemp, cMvTemp, sizeof(cMvTemp)); + if (cu.imv == 0 && (!cu.slice->getSPS()->getUseBcw() || bcwIdx == BCW_DEFAULT)) { - ::memcpy( cMvHevcTemp, cMvTemp, sizeof( cMvTemp ) ); + insertUniMvCands(pu.Y(), cMvTemp); + + unsigned idx1, idx2, idx3, idx4; + getAreaIdx(cu.Y(), *cu.slice->getPPS()->pcv, idx1, idx2, idx3, idx4); + ::memcpy(&(g_reusedUniMVs[idx1][idx2][idx3][idx4][0][0]), cMvTemp, 2 * 33 * sizeof(Mv)); + g_isReusedUniMVsFilled[idx1][idx2][idx3][idx4] = true; } // Bi-predictive Motion estimation if( ( cs.slice->isInterB() ) && ( PU::isBipredRestriction( pu ) == false ) - && (cu.slice->getCheckLDC() || gbiIdx == GBI_DEFAULT || !m_affineModeSelected || !m_pcEncCfg->getUseGBiFast()) + && (cu.slice->getCheckLDC() || bcwIdx == BCW_DEFAULT || !m_affineModeSelected || !m_pcEncCfg->getUseBcwFast()) ) { bool doBiPred = true; - tryBipred = 1; + tryBipred = 1; cMvBi[0] = cMv[0]; cMvBi[1] = cMv[1]; iRefIdxBi[0] = iRefIdx[0]; @@ -2097,7 +2468,7 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner) uint32_t uiMotBits[2]; - if(cs.slice->getMvdL1ZeroFlag()) + if(cs.picHeader->getMvdL1ZeroFlag()) { xCopyAMVPInfo(&aacAMVPInfo[1][bestBiPRefIdxL1], &amvp[REF_PIC_LIST_1]); aaiMvpIdxBi[1][bestBiPRefIdxL1] = bestBiPMvpL1; @@ -2106,23 +2477,22 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner) cMvBi [1] = cMvPredBi[1][bestBiPRefIdxL1]; iRefIdxBi[1] = bestBiPRefIdxL1; pu.mv [REF_PIC_LIST_1] = cMvBi[1]; - pu.mv[REF_PIC_LIST_1].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); pu.refIdx[REF_PIC_LIST_1] = iRefIdxBi[1]; pu.mvpIdx[REF_PIC_LIST_1] = bestBiPMvpL1; - if( m_pcEncCfg->getMCTSEncConstraint() ) + if( m_pcEncCfg->getMCTSEncConstraint() ) + { + Mv restrictedMv = pu.mv[REF_PIC_LIST_1]; + Area curTileAreaRestricted; + curTileAreaRestricted = pu.cs->picture->mctsInfo.getTileAreaSubPelRestricted( pu ); + MCTSHelper::clipMvToArea( restrictedMv, pu.cu->Y(), curTileAreaRestricted, *pu.cs->sps ); + // If sub-pel filter samples are not inside of allowed area + if( restrictedMv != pu.mv[REF_PIC_LIST_1] ) { - Mv restrictedMv = pu.mv[REF_PIC_LIST_1]; - Area curTileAreaRestricted; - curTileAreaRestricted = pu.cs->picture->mctsInfo.getTileAreaSubPelRestricted( pu ); - MCTSHelper::clipMvToArea( restrictedMv, pu.cu->Y(), curTileAreaRestricted, *pu.cs->sps ); - // If sub-pel filter samples are not inside of allowed area - if( restrictedMv != pu.mv[REF_PIC_LIST_1] ) - { - uiCostBi = std::numeric_limits<Distortion>::max(); - doBiPred = false; - } + uiCostBi = std::numeric_limits<Distortion>::max(); + doBiPred = false; } + } PelUnitBuf predBufTmp = m_tmpPredStorage[REF_PIC_LIST_1].getBuf( UnitAreaRelative(cu, pu) ); motionCompensation( pu, predBufTmp, REF_PIC_LIST_1 ); @@ -2157,12 +2527,12 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner) int iNumIter = 4; // fast encoder setting: only one iteration - if ( m_pcEncCfg->getFastInterSearchMode()==FASTINTERSEARCH_MODE1 || m_pcEncCfg->getFastInterSearchMode()==FASTINTERSEARCH_MODE2 || cs.slice->getMvdL1ZeroFlag() ) + if ( m_pcEncCfg->getFastInterSearchMode()==FASTINTERSEARCH_MODE1 || m_pcEncCfg->getFastInterSearchMode()==FASTINTERSEARCH_MODE2 || cs.picHeader->getMvdL1ZeroFlag() ) { iNumIter = 1; } - enforceGBiPred = (gbiIdx != GBI_DEFAULT); + enforceBcwPred = (bcwIdx != BCW_DEFAULT); for ( int iIter = 0; iIter < iNumIter; iIter++ ) { int iRefList = iIter % 2; @@ -2177,19 +2547,18 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner) { iRefList = 0; } - if( gbiIdx != GBI_DEFAULT ) + if( bcwIdx != BCW_DEFAULT ) { - iRefList = ( abs( getGbiWeight(gbiIdx, REF_PIC_LIST_0 ) ) > abs( getGbiWeight(gbiIdx, REF_PIC_LIST_1 ) ) ? 1 : 0 ); + iRefList = ( abs( getBcwWeight(bcwIdx, REF_PIC_LIST_0 ) ) > abs( getBcwWeight(bcwIdx, REF_PIC_LIST_1 ) ) ? 1 : 0 ); } } else if ( iIter == 0 ) { iRefList = 0; } - if ( iIter == 0 && !cs.slice->getMvdL1ZeroFlag()) + if ( iIter == 0 && !cs.picHeader->getMvdL1ZeroFlag()) { pu.mv [1 - iRefList] = cMv [1 - iRefList]; - pu.mv[1 - iRefList].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); pu.refIdx[1 - iRefList] = iRefIdx[1 - iRefList]; PelUnitBuf predBufTmp = m_tmpPredStorage[1 - iRefList].getBuf( UnitAreaRelative(cu, pu) ); @@ -2198,7 +2567,7 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner) RefPicList eRefPicList = ( iRefList ? REF_PIC_LIST_1 : REF_PIC_LIST_0 ); - if(cs.slice->getMvdL1ZeroFlag()) + if(cs.picHeader->getMvdL1ZeroFlag()) { iRefList = 0; eRefPicList = REF_PIC_LIST_0; @@ -2210,14 +2579,14 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner) iRefEnd = cs.slice->getNumRefIdx(eRefPicList)-1; for (int iRefIdxTemp = iRefStart; iRefIdxTemp <= iRefEnd; iRefIdxTemp++) { - if( m_pcEncCfg->getUseGBiFast() && (gbiIdx != GBI_DEFAULT) + if( m_pcEncCfg->getUseBcwFast() && (bcwIdx != BCW_DEFAULT) && (pu.cu->slice->getRefPic(eRefPicList, iRefIdxTemp)->getPOC() == pu.cu->slice->getRefPic(RefPicList(1 - iRefList), pu.refIdx[1 - iRefList])->getPOC()) && (!pu.cu->imv && pu.cu->slice->getTLayer()>1)) { continue; } uiBitsTemp = uiMbBits[2] + uiMotBits[1-iRefList]; - uiBitsTemp += ((cs.slice->getSPS()->getUseGBi() == true) ? getWeightIdxBits(gbiIdx) : 0); + uiBitsTemp += ((cs.slice->getSPS()->getUseBcw() == true) ? getWeightIdxBits(bcwIdx) : 0); if ( cs.slice->getNumRefIdx(eRefPicList) > 1 ) { uiBitsTemp += iRefIdxTemp+1; @@ -2244,14 +2613,13 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner) uiCostBi = uiCostTemp; uiMotBits[iRefList] = uiBitsTemp - uiMbBits[2] - uiMotBits[1-iRefList]; - uiMotBits[iRefList] -= ((cs.slice->getSPS()->getUseGBi() == true) ? getWeightIdxBits(gbiIdx) : 0); + uiMotBits[iRefList] -= ((cs.slice->getSPS()->getUseBcw() == true) ? getWeightIdxBits(bcwIdx) : 0); uiBits[2] = uiBitsTemp; if(iNumIter!=1) { // Set motion pu.mv [eRefPicList] = cMvBi [iRefList]; - pu.mv[eRefPicList].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); pu.refIdx[eRefPicList] = iRefIdxBi[iRefList]; PelUnitBuf predBufTmp = m_tmpPredStorage[iRefList].getBuf( UnitAreaRelative(cu, pu) ); @@ -2262,14 +2630,14 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner) if ( !bChanged ) { - if ((uiCostBi <= uiCost[0] && uiCostBi <= uiCost[1]) || enforceGBiPred) + if ((uiCostBi <= uiCost[0] && uiCostBi <= uiCost[1]) || enforceBcwPred) { xCopyAMVPInfo(&aacAMVPInfo[0][iRefIdxBi[0]], &amvp[REF_PIC_LIST_0]); - xCheckBestMVP( REF_PIC_LIST_0, cMvBi[0], cMvPredBi[0][iRefIdxBi[0]], aaiMvpIdxBi[0][iRefIdxBi[0]], amvp[eRefPicList], uiBits[2], uiCostBi, pu.cu->imv); - if(!cs.slice->getMvdL1ZeroFlag()) + xCheckBestMVP( REF_PIC_LIST_0, cMvBi[0], cMvPredBi[0][iRefIdxBi[0]], aaiMvpIdxBi[0][iRefIdxBi[0]], amvp[REF_PIC_LIST_0], uiBits[2], uiCostBi, pu.cu->imv); + if(!cs.picHeader->getMvdL1ZeroFlag()) { xCopyAMVPInfo(&aacAMVPInfo[1][iRefIdxBi[1]], &amvp[REF_PIC_LIST_1]); - xCheckBestMVP( REF_PIC_LIST_1, cMvBi[1], cMvPredBi[1][iRefIdxBi[1]], aaiMvpIdxBi[1][iRefIdxBi[1]], amvp[eRefPicList], uiBits[2], uiCostBi, pu.cu->imv); + xCheckBestMVP( REF_PIC_LIST_1, cMvBi[1], cMvPredBi[1][iRefIdxBi[1]], aaiMvpIdxBi[1][iRefIdxBi[1]], amvp[REF_PIC_LIST_1], uiBits[2], uiCostBi, pu.cu->imv); } } break; @@ -2291,6 +2659,11 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner) int refIdxCur = cs.slice->getSymRefIdx( curRefList ); int refIdxTar = cs.slice->getSymRefIdx( tarRefList ); + if ( aacAMVPInfo[curRefList][refIdxCur].mvCand[0] == aacAMVPInfo[curRefList][refIdxCur].mvCand[1] ) + aacAMVPInfo[curRefList][refIdxCur].numCand = 1; + if ( aacAMVPInfo[tarRefList][refIdxTar].mvCand[0] == aacAMVPInfo[tarRefList][refIdxTar].mvCand[1] ) + aacAMVPInfo[tarRefList][refIdxTar].numCand = 1; + MvField cCurMvField, cTarMvField; Distortion costStart = std::numeric_limits<Distortion>::max(); for ( int i = 0; i < aacAMVPInfo[curRefList][refIdxCur].numCand; i++ ) @@ -2299,7 +2672,7 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner) { cCurMvField.setMvField( aacAMVPInfo[curRefList][refIdxCur].mvCand[i], refIdxCur ); cTarMvField.setMvField( aacAMVPInfo[tarRefList][refIdxTar].mvCand[j], refIdxTar ); - Distortion cost = xGetSymmetricCost( pu, origBuf, eCurRefList, cCurMvField, cTarMvField, gbiIdx ); + Distortion cost = xGetSymmetricCost( pu, origBuf, eCurRefList, cCurMvField, cTarMvField, bcwIdx ); if ( cost < costStart ) { costStart = cost; @@ -2314,17 +2687,52 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner) cTarMvField.mv = cMvPredSym[tarRefList]; m_pcRdCost->setCostScale(0); - m_pcRdCost->setPredictor(cMvPredSym[curRefList]); - uint32_t bits = m_pcRdCost->getBitsOfVectorWithPredictor(cCurMvField.mv.hor, cCurMvField.mv.ver, (pu.cu->imv << 1)); + Mv pred = cMvPredSym[curRefList]; + pred.changeTransPrecInternal2Amvr(pu.cu->imv); + m_pcRdCost->setPredictor(pred); + Mv mv = cCurMvField.mv; + mv.changeTransPrecInternal2Amvr(pu.cu->imv); + uint32_t bits = m_pcRdCost->getBitsOfVectorWithPredictor(mv.hor, mv.ver, 0); bits += m_auiMVPIdxCost[mvpIdxSym[curRefList]][AMVP_MAX_NUM_CANDS]; bits += m_auiMVPIdxCost[mvpIdxSym[tarRefList]][AMVP_MAX_NUM_CANDS]; costStart += m_pcRdCost->getCost(bits); std::vector<Mv> symmvdCands; - symmvdCands.push_back(cMvTemp[curRefList][refIdxCur]); - if (iRefIdxBi[curRefList] == refIdxCur && cMvBi[curRefList] != cMvTemp[curRefList][refIdxCur]) + auto smmvdCandsGen = [&](Mv mvCand, bool mvPrecAdj) { - symmvdCands.push_back(cMvBi[curRefList]); + if (mvPrecAdj && pu.cu->imv) + { + mvCand.roundTransPrecInternal2Amvr(pu.cu->imv); + } + + bool toAddMvCand = true; + for (std::vector<Mv>::iterator pos = symmvdCands.begin(); pos != symmvdCands.end(); pos++) + { + if (*pos == mvCand) + { + toAddMvCand = false; + break; + } + } + + if (toAddMvCand) + { + symmvdCands.push_back(mvCand); + } + }; + + smmvdCandsGen(cMvHevcTemp[curRefList][refIdxCur], false); + smmvdCandsGen(cMvTemp[curRefList][refIdxCur], false); + if (iRefIdxBi[curRefList] == refIdxCur) + { + smmvdCandsGen(cMvBi[curRefList], false); + } + for (int i = 0; i < m_uniMvListSize; i++) + { + if ( symmvdCands.size() >= 5 ) + break; + BlkUniMvInfo* curMvInfo = m_uniMvList + ((m_uniMvListIdx - 1 - i + m_uniMvListMaxSize) % (m_uniMvListMaxSize)); + smmvdCandsGen(curMvInfo->uniMvs[curRefList][refIdxCur], true); } for (auto mvStart : symmvdCands) @@ -2335,10 +2743,12 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner) checked |= (mvStart == aacAMVPInfo[curRefList][refIdxCur].mvCand[i]); } if (checked) - break; + { + continue; + } Distortion bestCost = costStart; - symmvdCheckBestMvp(pu, origBuf, mvStart, (RefPicList)curRefList, aacAMVPInfo, gbiIdx, cMvPredSym, mvpIdxSym, costStart); + symmvdCheckBestMvp(pu, origBuf, mvStart, (RefPicList)curRefList, aacAMVPInfo, bcwIdx, cMvPredSym, mvpIdxSym, costStart); if (costStart < bestCost) { cCurMvField.setMvField(mvStart, refIdxCur); @@ -2351,18 +2761,18 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner) symCost = costStart - mvpCost; // ME - xSymmetricMotionEstimation( pu, origBuf, cMvPredSym[curRefList], cMvPredSym[tarRefList], eCurRefList, cCurMvField, cTarMvField, symCost, gbiIdx ); + xSymmetricMotionEstimation( pu, origBuf, cMvPredSym[curRefList], cMvPredSym[tarRefList], eCurRefList, cCurMvField, cTarMvField, symCost, bcwIdx ); symCost += mvpCost; if (startPtMv != cCurMvField.mv) { // if ME change MV, run a final check for best MVP. - symmvdCheckBestMvp(pu, origBuf, cCurMvField.mv, (RefPicList)curRefList, aacAMVPInfo, gbiIdx, cMvPredSym, mvpIdxSym, symCost, true); + symmvdCheckBestMvp(pu, origBuf, cCurMvField.mv, (RefPicList)curRefList, aacAMVPInfo, bcwIdx, cMvPredSym, mvpIdxSym, symCost, true); } bits = uiMbBits[2]; bits += 1; // add one bit for #symmetrical MVD mode - bits += ((cs.slice->getSPS()->getUseGBi() == true) ? getWeightIdxBits(gbiIdx) : 0); + bits += ((cs.slice->getSPS()->getUseBcw() == true) ? getWeightIdxBits(bcwIdx) : 0); symCost += m_pcRdCost->getCost(bits); cTarMvField.setMvField(cCurMvField.mv.getSymmvdMv(cMvPredSym[curRefList], cMvPredSym[tarRefList]), refIdxTar); @@ -2411,20 +2821,20 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner) iRefIdx[1] = refIdxValidList1; uiBits [1] = bitsValidList1; uiCost [1] = costValidList1; - if (cu.cs->pps->getWPBiPred() == true && tryBipred && (gbiIdx != GBI_DEFAULT)) - { - CHECK(iRefIdxBi[0]<0, "Invalid picture reference index"); - CHECK(iRefIdxBi[1]<0, "Invalid picture reference index"); - cu.cs->slice->getWpScaling(REF_PIC_LIST_0, iRefIdxBi[0], wp0); - cu.cs->slice->getWpScaling(REF_PIC_LIST_1, iRefIdxBi[1], wp1); - if ((wp0[COMPONENT_Y].bPresentFlag || wp0[COMPONENT_Cb].bPresentFlag || wp0[COMPONENT_Cr].bPresentFlag - || wp1[COMPONENT_Y].bPresentFlag || wp1[COMPONENT_Cb].bPresentFlag || wp1[COMPONENT_Cr].bPresentFlag)) - { - uiCostBi = MAX_UINT; - enforceGBiPred = false; - } - } - if( enforceGBiPred ) + if (cu.cs->pps->getWPBiPred() == true && tryBipred && (bcwIdx != BCW_DEFAULT)) + { + CHECK(iRefIdxBi[0]<0, "Invalid picture reference index"); + CHECK(iRefIdxBi[1]<0, "Invalid picture reference index"); + cu.cs->slice->getWpScaling(REF_PIC_LIST_0, iRefIdxBi[0], wp0); + cu.cs->slice->getWpScaling(REF_PIC_LIST_1, iRefIdxBi[1], wp1); + if ((wp0[COMPONENT_Y].bPresentFlag || wp0[COMPONENT_Cb].bPresentFlag || wp0[COMPONENT_Cr].bPresentFlag + || wp1[COMPONENT_Y].bPresentFlag || wp1[COMPONENT_Cb].bPresentFlag || wp1[COMPONENT_Cr].bPresentFlag)) + { + uiCostBi = MAX_UINT; + enforceBcwPred = false; + } + } + if( enforceBcwPred ) { uiCost[0] = uiCost[1] = MAX_UINT; } @@ -2435,8 +2845,6 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner) uiLastMode = 2; pu.mv [REF_PIC_LIST_0] = cMvBi[0]; pu.mv [REF_PIC_LIST_1] = cMvBi[1]; - pu.mv[REF_PIC_LIST_0].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); - pu.mv[REF_PIC_LIST_1].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); pu.mvd [REF_PIC_LIST_0] = cMvBi[0] - cMvPredBi[0][iRefIdxBi[0]]; pu.mvd [REF_PIC_LIST_1] = cMvBi[1] - cMvPredBi[1][iRefIdxBi[1]]; pu.refIdx[REF_PIC_LIST_0] = iRefIdxBi[0]; @@ -2453,7 +2861,6 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner) { uiLastMode = 0; pu.mv [REF_PIC_LIST_0] = cMv[0]; - pu.mv [REF_PIC_LIST_0].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); pu.mvd [REF_PIC_LIST_0] = cMv[0] - cMvPred[0][iRefIdx[0]]; pu.refIdx[REF_PIC_LIST_0] = iRefIdx[0]; pu.mvpIdx[REF_PIC_LIST_0] = aaiMvpIdx[0][iRefIdx[0]]; @@ -2464,7 +2871,6 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner) { uiLastMode = 1; pu.mv [REF_PIC_LIST_1] = cMv[1]; - pu.mv [REF_PIC_LIST_1].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); pu.mvd [REF_PIC_LIST_1] = cMv[1] - cMvPred[1][iRefIdx[1]]; pu.refIdx[REF_PIC_LIST_1] = iRefIdx[1]; pu.mvpIdx[REF_PIC_LIST_1] = aaiMvpIdx[1][iRefIdx[1]]; @@ -2472,16 +2878,16 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner) pu.interDir = 2; } - if( gbiIdx != GBI_DEFAULT ) + if( bcwIdx != BCW_DEFAULT ) { - cu.GBiIdx = GBI_DEFAULT; // Reset to default for the Non-NormalMC modes. + cu.BcwIdx = BCW_DEFAULT; // Reset to default for the Non-NormalMC modes. } uiHevcCost = ( uiCostBi <= uiCost[0] && uiCostBi <= uiCost[1] ) ? uiCostBi : ( ( uiCost[0] <= uiCost[1] ) ? uiCost[0] : uiCost[1] ); } if (cu.Y().width > 8 && cu.Y().height > 8 && cu.slice->getSPS()->getUseAffine() && checkAffine - && (gbiIdx == GBI_DEFAULT || m_affineModeSelected || !m_pcEncCfg->getUseGBiFast()) + && (bcwIdx == BCW_DEFAULT || m_affineModeSelected || !m_pcEncCfg->getUseBcwFast()) ) { m_hevcCost = uiHevcCost; @@ -2509,12 +2915,12 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner) Mv acMvAffine4Para[2][33][3]; int refIdx4Para[2] = { -1, -1 }; - xPredAffineInterSearch(pu, origBuf, puIdx, uiLastModeTemp, uiAffineCost, cMvHevcTemp, acMvAffine4Para, refIdx4Para, gbiIdx, enforceGBiPred, - ((cu.slice->getSPS()->getUseGBi() == true) ? getWeightIdxBits(gbiIdx) : 0)); + xPredAffineInterSearch(pu, origBuf, puIdx, uiLastModeTemp, uiAffineCost, cMvHevcTemp, acMvAffine4Para, refIdx4Para, bcwIdx, enforceBcwPred, + ((cu.slice->getSPS()->getUseBcw() == true) ? getWeightIdxBits(bcwIdx) : 0)); if ( pu.cu->imv == 0 ) { - storeAffineMotion( pu.mvAffi, pu.refIdx, AFFINEMODEL_4PARAM, gbiIdx ); + storeAffineMotion( pu.mvAffi, pu.refIdx, AFFINEMODEL_4PARAM, bcwIdx ); } if ( cu.slice->getSPS()->getUseAffineType() ) @@ -2549,12 +2955,12 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner) Distortion uiAffine6Cost = std::numeric_limits<Distortion>::max(); cu.affineType = AFFINEMODEL_6PARAM; - xPredAffineInterSearch(pu, origBuf, puIdx, uiLastModeTemp, uiAffine6Cost, cMvHevcTemp, acMvAffine4Para, refIdx4Para, gbiIdx, enforceGBiPred, - ((cu.slice->getSPS()->getUseGBi() == true) ? getWeightIdxBits(gbiIdx) : 0)); + xPredAffineInterSearch(pu, origBuf, puIdx, uiLastModeTemp, uiAffine6Cost, cMvHevcTemp, acMvAffine4Para, refIdx4Para, bcwIdx, enforceBcwPred, + ((cu.slice->getSPS()->getUseBcw() == true) ? getWeightIdxBits(bcwIdx) : 0)); if ( pu.cu->imv == 0 ) { - storeAffineMotion( pu.mvAffi, pu.refIdx, AFFINEMODEL_6PARAM, gbiIdx ); + storeAffineMotion( pu.mvAffi, pu.refIdx, AFFINEMODEL_6PARAM, bcwIdx ); } // reset to 4 parameter affine inter mode @@ -2575,12 +2981,8 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner) pu.mvdAffi[REF_PIC_LIST_1][verIdx] = bestMvd[1][verIdx]; } - PU::setAllAffineMv( pu, bestMv[0][0], bestMv[0][1], bestMv[0][2], REF_PIC_LIST_0 - , false - ); - PU::setAllAffineMv( pu, bestMv[1][0], bestMv[1][1], bestMv[1][2], REF_PIC_LIST_1 - , false - ); + PU::setAllAffineMv( pu, bestMv[0][0], bestMv[0][1], bestMv[0][2], REF_PIC_LIST_0); + PU::setAllAffineMv( pu, bestMv[1][0], bestMv[1][1], bestMv[1][2], REF_PIC_LIST_1); } else { @@ -2603,6 +3005,7 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner) // set hevc me result cu.affine = false; pu.mergeFlag = bMergeFlag; + pu.regularMergeFlag = false; pu.mergeIdx = uiMRGIndex; pu.interDir = uiInterDir; cu.smvdMode = iSymMode; @@ -2627,9 +3030,9 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner) if( cu.firstPU->interDir == 3 && !cu.firstPU->mergeFlag ) { - if (gbiIdx != GBI_DEFAULT) + if (bcwIdx != BCW_DEFAULT) { - cu.GBiIdx = gbiIdx; + cu.BcwIdx = bcwIdx; } } m_maxCompIDToPred = MAX_NUM_COMPONENT; @@ -2638,9 +3041,11 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner) PU::spanMotionInfo( pu, mergeCtx ); } + m_skipPROF = false; + m_encOnly = false; // MC PelUnitBuf predBuf = pu.cs->getPredBuf(pu); - if ( gbiIdx == GBI_DEFAULT || !m_affineMotion.affine4ParaAvail || !m_affineMotion.affine6ParaAvail ) + if ( bcwIdx == BCW_DEFAULT || !m_affineMotion.affine4ParaAvail || !m_affineMotion.affine6ParaAvail ) { m_affineMotion.hevcCost[pu.cu->imv] = uiHevcCost; } @@ -2653,33 +3058,21 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner) return; } -uint32_t InterSearch::xCalcAffineMVBits( PredictionUnit& pu, Mv acMvTemp[3], Mv acMvPred[3], bool mvHighPrec ) +uint32_t InterSearch::xCalcAffineMVBits( PredictionUnit& pu, Mv acMvTemp[3], Mv acMvPred[3] ) { int mvNum = pu.cu->affineType ? 3 : 2; - Mv tempMv0 = acMvTemp[0]; - const int shift = mvHighPrec ? MV_FRACTIONAL_BITS_DIFF : 0; - const unsigned int mvdShift = pu.cu->imv == 2 ? MV_FRACTIONAL_BITS_DIFF : 0; - Mv secondPred; - - if ( mvHighPrec ) - { - tempMv0.changePrecision( MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER ); - } - m_pcRdCost->setCostScale( 0 ); uint32_t bitsTemp = 0; for ( int verIdx = 0; verIdx < mvNum; verIdx++ ) { - m_pcRdCost->setPredictor( acMvPred[verIdx] ); - - if ( verIdx != 0 ) - { - secondPred = acMvPred[verIdx] + ( tempMv0 - acMvPred[0] ); - m_pcRdCost->setPredictor( secondPred ); - } + Mv pred = verIdx == 0 ? acMvPred[verIdx] : acMvPred[verIdx] + acMvTemp[0] - acMvPred[0]; + pred.changeAffinePrecInternal2Amvr(pu.cu->imv); + m_pcRdCost->setPredictor( pred ); + Mv mv = acMvTemp[verIdx]; + mv.changeAffinePrecInternal2Amvr(pu.cu->imv); - bitsTemp += m_pcRdCost->getBitsOfVectorWithPredictor( acMvTemp[verIdx].getHor() >> shift, acMvTemp[verIdx].getVer() >> shift, mvdShift ); + bitsTemp += m_pcRdCost->getBitsOfVectorWithPredictor( mv.getHor(), mv.getVer(), 0 ); } return bitsTemp; @@ -2774,11 +3167,10 @@ void InterSearch::xCopyAMVPInfo (AMVPInfo* pSrc, AMVPInfo* pDst) void InterSearch::xCheckBestMVP ( RefPicList eRefPicList, Mv cMv, Mv& rcMvPred, int& riMVPIdx, AMVPInfo& amvpInfo, uint32_t& ruiBits, Distortion& ruiCost, const uint8_t imv ) { - if( imv > 0 ) + if ( imv > 0 && imv < 3 ) { return; } - unsigned imvshift = imv << 1; AMVPInfo* pcAMVPInfo = &amvpInfo; @@ -2793,8 +3185,12 @@ void InterSearch::xCheckBestMVP ( RefPicList eRefPicList, Mv cMv, Mv& rcMvPred, int iBestMVPIdx = riMVPIdx; - m_pcRdCost->setPredictor( rcMvPred ); - int iOrgMvBits = m_pcRdCost->getBitsOfVectorWithPredictor(cMv.getHor(), cMv.getVer(), imvshift); + Mv pred = rcMvPred; + pred.changeTransPrecInternal2Amvr(imv); + m_pcRdCost->setPredictor( pred ); + Mv mv = cMv; + mv.changeTransPrecInternal2Amvr(imv); + int iOrgMvBits = m_pcRdCost->getBitsOfVectorWithPredictor(mv.getHor(), mv.getVer(), 0); iOrgMvBits += m_auiMVPIdxCost[riMVPIdx][AMVP_MAX_NUM_CANDS]; int iBestMvBits = iOrgMvBits; @@ -2805,8 +3201,10 @@ void InterSearch::xCheckBestMVP ( RefPicList eRefPicList, Mv cMv, Mv& rcMvPred, continue; } - m_pcRdCost->setPredictor( pcAMVPInfo->mvCand[iMVPIdx] ); - int iMvBits = m_pcRdCost->getBitsOfVectorWithPredictor(cMv.getHor(), cMv.getVer(), imvshift); + pred = pcAMVPInfo->mvCand[iMVPIdx]; + pred.changeTransPrecInternal2Amvr(imv); + m_pcRdCost->setPredictor( pred ); + int iMvBits = m_pcRdCost->getBitsOfVectorWithPredictor(mv.getHor(), mv.getVer(), 0); iMvBits += m_auiMVPIdxCost[iMVPIdx][AMVP_MAX_NUM_CANDS]; if (iMvBits < iBestMvBits) @@ -2841,12 +3239,7 @@ Distortion InterSearch::xGetTemplateCost( const PredictionUnit& pu, Distortion uiCost = std::numeric_limits<Distortion>::max(); const Picture* picRef = pu.cu->slice->getRefPic( eRefPicList, iRefIdx ); - cMvCand.changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); - clipMv( cMvCand, pu.cu->lumaPos(), - pu.cu->lumaSize(), - *pu.cs->sps ); - - + clipMv( cMvCand, pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps, *pu.cs->pps ); // prediction pattern const bool bi = pu.cu->slice->testWeightPred() && pu.cu->slice->getSliceType()==P_SLICE; @@ -2879,12 +3272,7 @@ Distortion InterSearch::xGetAffineTemplateCost( PredictionUnit& pu, PelUnitBuf& const bool bi = pu.cu->slice->testWeightPred() && pu.cu->slice->getSliceType()==P_SLICE; Mv mv[3]; memcpy(mv, acMvCand, sizeof(mv)); - if ( pu.cu->imv != 1 ) - { - mv[0].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); - mv[1].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); - mv[2].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); - } + m_iRefListIdx = eRefPicList; xPredAffineBlk(COMPONENT_Y, pu, picRef, mv, predBuf, bi, pu.cu->slice->clpRng(COMPONENT_Y)); if( bi ) { @@ -2892,9 +3280,9 @@ Distortion InterSearch::xGetAffineTemplateCost( PredictionUnit& pu, PelUnitBuf& } // calc distortion - + enum DFunc distFunc = (pu.cs->slice->getDisableSATDForRD()) ? DF_SAD : DF_HAD; uiCost = m_pcRdCost->getDistPart( origBuf.Y(), predBuf.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y - , DF_HAD + , distFunc ); uiCost += m_pcRdCost->getCost( m_auiMVPIdxCost[iMVPIdx][iMVPNum] ); DTRACE( g_trace_ctx, D_COMMON, " (%d) affineTemplateCost=%d\n", DTRACE_GET_COUNTER(g_trace_ctx,D_COMMON), uiCost ); @@ -2903,7 +3291,7 @@ Distortion InterSearch::xGetAffineTemplateCost( PredictionUnit& pu, PelUnitBuf& void InterSearch::xMotionEstimation(PredictionUnit& pu, PelUnitBuf& origBuf, RefPicList eRefPicList, Mv& rcMvPred, int iRefIdxPred, Mv& rcMv, int& riMVPIdx, uint32_t& ruiBits, Distortion& ruiCost, const AMVPInfo& amvpInfo, bool bBi) { - if( pu.cu->cs->sps->getUseGBi() && pu.cu->GBiIdx != GBI_DEFAULT && !bBi && xReadBufferedUniMv(pu, eRefPicList, iRefIdxPred, rcMvPred, rcMv, ruiBits, ruiCost) ) + if( pu.cu->cs->sps->getUseBcw() && pu.cu->BcwIdx != BCW_DEFAULT && !bBi && xReadBufferedUniMv(pu, eRefPicList, iRefIdxPred, rcMvPred, rcMv, ruiBits, ruiCost) ) { return; } @@ -2925,11 +3313,11 @@ void InterSearch::xMotionEstimation(PredictionUnit& pu, PelUnitBuf& origBuf, Ref PelUnitBuf otherBuf = m_tmpPredStorage[1 - (int)eRefPicList].getBuf( UnitAreaRelative(*pu.cu, pu )); origBufTmp.copyFrom(origBuf); origBufTmp.removeHighFreq( otherBuf, m_pcEncCfg->getClipForBiPredMeEnabled(), pu.cu->slice->clpRngs() - ,getGbiWeight( pu.cu->GBiIdx, eRefPicList ) + ,getBcwWeight( pu.cu->BcwIdx, eRefPicList ) ); pBuf = &origBufTmp; - fWeight = xGetMEDistortionWeight( pu.cu->GBiIdx, eRefPicList ); + fWeight = xGetMEDistortionWeight( pu.cu->BcwIdx, eRefPicList ); } m_cDistParam.isBiPred = bBi; @@ -2939,13 +3327,14 @@ void InterSearch::xMotionEstimation(PredictionUnit& pu, PelUnitBuf& origBuf, Ref m_lumaClpRng = pu.cs->slice->clpRng( COMPONENT_Y ); - CPelBuf buf = pu.cu->slice->getRefPic(eRefPicList, iRefIdxPred)->getRecoBuf(pu.blocks[COMPONENT_Y]); + CPelBuf buf = pu.cu->slice->getRefPic(eRefPicList, iRefIdxPred)->getRecoBuf(pu.blocks[COMPONENT_Y], pu.cs->sps->getWrapAroundEnabledFlag()); IntTZSearchStruct cStruct; cStruct.pcPatternKey = pcPatternKey; cStruct.iRefStride = buf.stride; cStruct.piRefY = buf.buf; - cStruct.imvShift = pu.cu->imv << 1; + cStruct.imvShift = pu.cu->imv == IMV_HPEL ? 1 : (pu.cu->imv << 1); + cStruct.useAltHpelIf = pu.cu->imv == IMV_HPEL; cStruct.inCtuSearch = false; cStruct.zeroMV = false; { @@ -2966,12 +3355,13 @@ void InterSearch::xMotionEstimation(PredictionUnit& pu, PelUnitBuf& origBuf, Ref if( bValid ) { bQTBTMV2 = true; - cIntMv <<= 2; + cIntMv.changePrecision( MV_PRECISION_INT, MV_PRECISION_INTERNAL); } } - - m_pcRdCost->setPredictor( rcMvPred ); + Mv predQuarter = rcMvPred; + predQuarter.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER); + m_pcRdCost->setPredictor( predQuarter ); m_pcRdCost->setCostScale(2); @@ -2984,13 +3374,54 @@ void InterSearch::xMotionEstimation(PredictionUnit& pu, PelUnitBuf& origBuf, Ref // Do integer search if( ( m_motionEstimationSearchMethod == MESEARCH_FULL ) || bBi || bQTBTMV ) { + cStruct.subShiftMode = m_pcEncCfg->getFastInterSearchMode() == FASTINTERSEARCH_MODE1 || m_pcEncCfg->getFastInterSearchMode() == FASTINTERSEARCH_MODE3 ? 2 : 0; + m_pcRdCost->setDistParam(m_cDistParam, *cStruct.pcPatternKey, cStruct.piRefY, cStruct.iRefStride, m_lumaClpRng.bd, COMPONENT_Y, cStruct.subShiftMode); + + Mv bestInitMv = (bBi ? rcMv : rcMvPred); + Mv cTmpMv = bestInitMv; + clipMv( cTmpMv, pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps, *pu.cs->pps ); + cTmpMv.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_INT); + m_cDistParam.cur.buf = cStruct.piRefY + (cTmpMv.ver * cStruct.iRefStride) + cTmpMv.hor; + Distortion uiBestSad = m_cDistParam.distFunc(m_cDistParam); + uiBestSad += m_pcRdCost->getCostOfVectorWithPredictor(cTmpMv.hor, cTmpMv.ver, cStruct.imvShift); + + for (int i = 0; i < m_uniMvListSize; i++) + { + BlkUniMvInfo* curMvInfo = m_uniMvList + ((m_uniMvListIdx - 1 - i + m_uniMvListMaxSize) % (m_uniMvListMaxSize)); + + int j = 0; + for (; j < i; j++) + { + BlkUniMvInfo *prevMvInfo = m_uniMvList + ((m_uniMvListIdx - 1 - j + m_uniMvListMaxSize) % (m_uniMvListMaxSize)); + if (curMvInfo->uniMvs[eRefPicList][iRefIdxPred] == prevMvInfo->uniMvs[eRefPicList][iRefIdxPred]) + { + break; + } + } + if (j < i) + continue; + + cTmpMv = curMvInfo->uniMvs[eRefPicList][iRefIdxPred]; + clipMv( cTmpMv, pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps, *pu.cs->pps ); + cTmpMv.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_INT); + m_cDistParam.cur.buf = cStruct.piRefY + (cTmpMv.ver * cStruct.iRefStride) + cTmpMv.hor; + + Distortion uiSad = m_cDistParam.distFunc(m_cDistParam); + uiSad += m_pcRdCost->getCostOfVectorWithPredictor(cTmpMv.hor, cTmpMv.ver, cStruct.imvShift); + if (uiSad < uiBestSad) + { + uiBestSad = uiSad; + bestInitMv = curMvInfo->uniMvs[eRefPicList][iRefIdxPred]; + m_cDistParam.maximumDistortionForEarlyExit = uiSad; + } + } + if( !bQTBTMV ) { - xSetSearchRange(pu, (bBi ? rcMv : rcMvPred), iSrchRng, cStruct.searchRange + xSetSearchRange(pu, bestInitMv, iSrchRng, cStruct.searchRange , cStruct ); } - cStruct.subShiftMode = m_pcEncCfg->getFastInterSearchMode() == FASTINTERSEARCH_MODE1 || m_pcEncCfg->getFastInterSearchMode() == FASTINTERSEARCH_MODE3 ? 2 : 0; xPatternSearch( cStruct, rcMv, ruiCost); } else if( bQTBTMV2 ) @@ -2999,7 +3430,7 @@ void InterSearch::xMotionEstimation(PredictionUnit& pu, PelUnitBuf& origBuf, Ref cStruct.subShiftMode = ( !m_pcEncCfg->getRestrictMESampling() && m_pcEncCfg->getMotionEstimationSearchMethod() == MESEARCH_SELECTIVE ) ? 1 : ( m_pcEncCfg->getFastInterSearchMode() == FASTINTERSEARCH_MODE1 || m_pcEncCfg->getFastInterSearchMode() == FASTINTERSEARCH_MODE3 ) ? 2 : 0; - xTZSearch( pu, cStruct, rcMv, ruiCost, NULL, false, true ); + xTZSearch(pu, eRefPicList, iRefIdxPred, cStruct, rcMv, ruiCost, NULL, false, true); } else { @@ -3007,7 +3438,7 @@ void InterSearch::xMotionEstimation(PredictionUnit& pu, PelUnitBuf& origBuf, Ref ( m_pcEncCfg->getFastInterSearchMode() == FASTINTERSEARCH_MODE1 || m_pcEncCfg->getFastInterSearchMode() == FASTINTERSEARCH_MODE3 ) ? 2 : 0; rcMv = rcMvPred; const Mv *pIntegerMv2Nx2NPred = 0; - xPatternSearchFast( pu, cStruct, rcMv, ruiCost, pIntegerMv2Nx2NPred ); + xPatternSearchFast(pu, eRefPicList, iRefIdxPred, cStruct, rcMv, ruiCost, pIntegerMv2Nx2NPred); if( blkCache ) { blkCache->setMv( pu.cs->area, eRefPicList, iRefIdxPred, rcMv ); @@ -3020,7 +3451,7 @@ void InterSearch::xMotionEstimation(PredictionUnit& pu, PelUnitBuf& origBuf, Ref DTRACE( g_trace_ctx, D_ME, "%d %d %d :MECostFPel<L%d,%d>: %d,%d,%dx%d, %d", DTRACE_GET_COUNTER( g_trace_ctx, D_ME ), pu.cu->slice->getPOC(), 0, ( int ) eRefPicList, ( int ) bBi, pu.Y().x, pu.Y().y, pu.Y().width, pu.Y().height, ruiCost ); // sub-pel refinement for sub-pel resolution - if( pu.cu->imv == 0 ) + if ( pu.cu->imv == 0 || pu.cu->imv == IMV_HPEL ) { if( m_pcEncCfg->getMCTSEncConstraint() ) { @@ -3043,9 +3474,11 @@ void InterSearch::xMotionEstimation(PredictionUnit& pu, PelUnitBuf& origBuf, Ref uint32_t uiMvBits = m_pcRdCost->getBitsOfVectorWithPredictor( rcMv.getHor(), rcMv.getVer(), cStruct.imvShift ); ruiBits += uiMvBits; ruiCost = ( Distortion ) ( floor( fWeight * ( ( double ) ruiCost - ( double ) m_pcRdCost->getCost( uiMvBits ) ) ) + ( double ) m_pcRdCost->getCost( ruiBits ) ); + rcMv.changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); } else // integer refinement for integer-pel and 4-pel resolution { + rcMv.changePrecision(MV_PRECISION_INT, MV_PRECISION_INTERNAL); xPatternSearchIntRefine( pu, cStruct, rcMv, rcMvPred, riMVPIdx, ruiBits, ruiCost, amvpInfo, fWeight); } DTRACE(g_trace_ctx, D_ME, " MECost<L%d,%d>: %6d (%d) MV:%d,%d\n", (int)eRefPicList, (int)bBi, ruiCost, ruiBits, rcMv.getHor() << 2, rcMv.getVer() << 2); @@ -3062,10 +3495,7 @@ void InterSearch::xSetSearchRange ( const PredictionUnit& pu, { const int iMvShift = MV_FRACTIONAL_BITS_INTERNAL; Mv cFPMvPred = cMvPred; - cFPMvPred.changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); - clipMv( cFPMvPred, pu.cu->lumaPos(), - pu.cu->lumaSize(), - *pu.cs->sps ); + clipMv( cFPMvPred, pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps, *pu.cs->pps ); Mv mvTL(cFPMvPred.getHor() - (iSrchRng << iMvShift), cFPMvPred.getVer() - (iSrchRng << iMvShift)); Mv mvBR(cFPMvPred.getHor() + (iSrchRng << iMvShift), cFPMvPred.getVer() + (iSrchRng << iMvShift)); @@ -3079,10 +3509,14 @@ void InterSearch::xSetSearchRange ( const PredictionUnit& pu, { xClipMv( mvTL, pu.cu->lumaPos(), pu.cu->lumaSize(), - *pu.cs->sps ); + *pu.cs->sps + , *pu.cs->pps + ); xClipMv( mvBR, pu.cu->lumaPos(), pu.cu->lumaSize(), - *pu.cs->sps ); + *pu.cs->sps + , *pu.cs->pps + ); } mvTL.divideByPowerOf2( iMvShift ); @@ -3168,6 +3602,8 @@ void InterSearch::xPatternSearch( IntTZSearchStruct& cStruct, void InterSearch::xPatternSearchFast( const PredictionUnit& pu, + RefPicList eRefPicList, + int iRefIdxPred, IntTZSearchStruct& cStruct, Mv& rcMv, Distortion& ruiSAD, @@ -3176,15 +3612,15 @@ void InterSearch::xPatternSearchFast( const PredictionUnit& pu, switch ( m_motionEstimationSearchMethod ) { case MESEARCH_DIAMOND: - xTZSearch ( pu, cStruct, rcMv, ruiSAD, pIntegerMv2Nx2NPred, false ); + xTZSearch ( pu, eRefPicList, iRefIdxPred, cStruct, rcMv, ruiSAD, pIntegerMv2Nx2NPred, false ); break; case MESEARCH_SELECTIVE: - xTZSearchSelective( pu, cStruct, rcMv, ruiSAD, pIntegerMv2Nx2NPred ); + xTZSearchSelective( pu, eRefPicList, iRefIdxPred, cStruct, rcMv, ruiSAD, pIntegerMv2Nx2NPred ); break; case MESEARCH_DIAMOND_ENHANCED: - xTZSearch ( pu, cStruct, rcMv, ruiSAD, pIntegerMv2Nx2NPred, true ); + xTZSearch ( pu, eRefPicList, iRefIdxPred, cStruct, rcMv, ruiSAD, pIntegerMv2Nx2NPred, true ); break; case MESEARCH_FULL: // shouldn't get here. @@ -3195,6 +3631,8 @@ void InterSearch::xPatternSearchFast( const PredictionUnit& pu, void InterSearch::xTZSearch( const PredictionUnit& pu, + RefPicList eRefPicList, + int iRefIdxPred, IntTZSearchStruct& cStruct, Mv& rcMv, Distortion& ruiSAD, @@ -3226,15 +3664,14 @@ void InterSearch::xTZSearch( const PredictionUnit& pu, const bool bNewZeroNeighbourhoodTest = bExtendedSettings; int iSearchRange = m_iSearchRange; - rcMv.changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); if( m_pcEncCfg->getMCTSEncConstraint() ) { MCTSHelper::clipMvToArea( rcMv, pu.Y(), pu.cs->picture->mctsInfo.getTileArea(), *pu.cs->sps ); } else - clipMv( rcMv, pu.cu->lumaPos(), - pu.cu->lumaSize(), - *pu.cs->sps ); + { + clipMv( rcMv, pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps, *pu.cs->pps ); + } rcMv.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER); rcMv.divideByPowerOf2(2); @@ -3273,9 +3710,9 @@ void InterSearch::xTZSearch( const PredictionUnit& pu, MCTSHelper::clipMvToArea( integerMv2Nx2NPred, pu.Y(), pu.cs->picture->mctsInfo.getTileArea(), *pu.cs->sps ); } else - clipMv( integerMv2Nx2NPred, pu.cu->lumaPos(), - pu.cu->lumaSize(), - *pu.cs->sps ); + { + clipMv( integerMv2Nx2NPred, pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps, *pu.cs->pps ); + } integerMv2Nx2NPred.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER); integerMv2Nx2NPred.divideByPowerOf2(2); @@ -3286,26 +3723,56 @@ void InterSearch::xTZSearch( const PredictionUnit& pu, xTZSearchHelp( cStruct, integerMv2Nx2NPred.getHor(), integerMv2Nx2NPred.getVer(), 0, 0); } } + + for (int i = 0; i < m_uniMvListSize; i++) + { + BlkUniMvInfo* curMvInfo = m_uniMvList + ((m_uniMvListIdx - 1 - i + m_uniMvListMaxSize) % (m_uniMvListMaxSize)); + + int j = 0; + for (; j < i; j++) + { + BlkUniMvInfo *prevMvInfo = m_uniMvList + ((m_uniMvListIdx - 1 - j + m_uniMvListMaxSize) % (m_uniMvListMaxSize)); + if (curMvInfo->uniMvs[eRefPicList][iRefIdxPred] == prevMvInfo->uniMvs[eRefPicList][iRefIdxPred]) + { + break; + } + } + if (j < i) + continue; + + Mv cTmpMv = curMvInfo->uniMvs[eRefPicList][iRefIdxPred]; + clipMv( cTmpMv, pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps, *pu.cs->pps ); + cTmpMv.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_INT); + m_cDistParam.cur.buf = cStruct.piRefY + (cTmpMv.ver * cStruct.iRefStride) + cTmpMv.hor; + + Distortion uiSad = m_cDistParam.distFunc(m_cDistParam); + uiSad += m_pcRdCost->getCostOfVectorWithPredictor(cTmpMv.hor, cTmpMv.ver, cStruct.imvShift); + if (uiSad < cStruct.uiBestSad) + { + cStruct.uiBestSad = uiSad; + cStruct.iBestX = cTmpMv.hor; + cStruct.iBestY = cTmpMv.ver; + m_cDistParam.maximumDistortionForEarlyExit = uiSad; + } + } + { // set search range Mv currBestMv(cStruct.iBestX, cStruct.iBestY ); - currBestMv <<= 2; + currBestMv <<= MV_FRACTIONAL_BITS_INTERNAL; xSetSearchRange(pu, currBestMv, m_iSearchRange >> (bFastSettings ? 1 : 0), sr , cStruct ); } - if (m_pcEncCfg->getUseHashME()) + if (m_pcEncCfg->getUseHashME() && (m_currRefPicList == 0 || pu.cu->slice->getList1IdxToList0Idx(m_currRefPicIndex) < 0)) { - int width = pu.cu->lumaSize().width; - int height = pu.cu->lumaSize().height; - if ((width == height && width <= 64 && width >= 4) || (width == 8 && height == 4) || (width == 4 && height == 8)) + int minSize = min(pu.cu->lumaSize().width, pu.cu->lumaSize().height); + if (minSize < 128 && minSize >= 4) { - Mv otherMvps[5]; - int numberOfOtherMvps; - numberOfOtherMvps = xHashInterPredME(pu, m_currRefPicList, m_currRefPicIndex, otherMvps); + int numberOfOtherMvps = m_numHashMVStoreds[m_currRefPicList][m_currRefPicIndex]; for (int i = 0; i < numberOfOtherMvps; i++) { - xTZSearchHelp(cStruct, otherMvps[i].getHor(), otherMvps[i].getVer(), 0, 0); + xTZSearchHelp(cStruct, m_hashMVStoreds[m_currRefPicList][m_currRefPicIndex][i].getHor(), m_hashMVStoreds[m_currRefPicList][m_currRefPicIndex][i].getVer(), 0, 0); } if (numberOfOtherMvps > 0) { @@ -3505,6 +3972,8 @@ void InterSearch::xTZSearch( const PredictionUnit& pu, void InterSearch::xTZSearchSelective( const PredictionUnit& pu, + RefPicList eRefPicList, + int iRefIdxPred, IntTZSearchStruct& cStruct, Mv &rcMv, Distortion &ruiSAD, @@ -3525,10 +3994,7 @@ void InterSearch::xTZSearchSelective( const PredictionUnit& pu, int iStartX = 0; int iStartY = 0; int iDist = 0; - rcMv.changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); - clipMv( rcMv, pu.cu->lumaPos(), - pu.cu->lumaSize(), - *pu.cs->sps ); + clipMv( rcMv, pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps, *pu.cs->pps ); rcMv.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER); rcMv.divideByPowerOf2(2); @@ -3556,15 +4022,46 @@ void InterSearch::xTZSearchSelective( const PredictionUnit& pu, { Mv integerMv2Nx2NPred = *pIntegerMv2Nx2NPred; integerMv2Nx2NPred.changePrecision(MV_PRECISION_INT, MV_PRECISION_INTERNAL); - clipMv( integerMv2Nx2NPred, pu.cu->lumaPos(), - pu.cu->lumaSize(), - *pu.cs->sps ); + clipMv( integerMv2Nx2NPred, pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps, *pu.cs->pps ); integerMv2Nx2NPred.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER); integerMv2Nx2NPred.divideByPowerOf2(2); xTZSearchHelp( cStruct, integerMv2Nx2NPred.getHor(), integerMv2Nx2NPred.getVer(), 0, 0); } + + for (int i = 0; i < m_uniMvListSize; i++) + { + BlkUniMvInfo* curMvInfo = m_uniMvList + ((m_uniMvListIdx - 1 - i + m_uniMvListMaxSize) % (m_uniMvListMaxSize)); + + int j = 0; + for (; j < i; j++) + { + BlkUniMvInfo *prevMvInfo = m_uniMvList + ((m_uniMvListIdx - 1 - j + m_uniMvListMaxSize) % (m_uniMvListMaxSize)); + if (curMvInfo->uniMvs[eRefPicList][iRefIdxPred] == prevMvInfo->uniMvs[eRefPicList][iRefIdxPred]) + { + break; + } + } + if (j < i) + continue; + + Mv cTmpMv = curMvInfo->uniMvs[eRefPicList][iRefIdxPred]; + clipMv( cTmpMv, pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps, *pu.cs->pps ); + cTmpMv.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_INT); + m_cDistParam.cur.buf = cStruct.piRefY + (cTmpMv.ver * cStruct.iRefStride) + cTmpMv.hor; + + Distortion uiSad = m_cDistParam.distFunc(m_cDistParam); + uiSad += m_pcRdCost->getCostOfVectorWithPredictor(cTmpMv.hor, cTmpMv.ver, cStruct.imvShift); + if (uiSad < cStruct.uiBestSad) + { + cStruct.uiBestSad = uiSad; + cStruct.iBestX = cTmpMv.hor; + cStruct.iBestY = cTmpMv.ver; + m_cDistParam.maximumDistortionForEarlyExit = uiSad; + } + } + { // set search range Mv currBestMv(cStruct.iBestX, cStruct.iBestY ); @@ -3573,21 +4070,16 @@ void InterSearch::xTZSearchSelective( const PredictionUnit& pu, , cStruct ); } - - if (m_pcEncCfg->getUseHashME()) + if (m_pcEncCfg->getUseHashME() && (m_currRefPicList == 0 || pu.cu->slice->getList1IdxToList0Idx(m_currRefPicIndex) < 0)) { - int width = pu.cu->lumaSize().width; - int height = pu.cu->lumaSize().height; - if ((width == height && width <= 64 && width >= 4) || (width == 8 && height == 4) || (width == 4 && height == 8)) + int minSize = min(pu.cu->lumaSize().width, pu.cu->lumaSize().height); + if (minSize < 128 && minSize >= 4) { - Mv otherMvps[5]; - int numberOfOtherMvps; - numberOfOtherMvps = xHashInterPredME(pu, m_currRefPicList, m_currRefPicIndex, otherMvps); + int numberOfOtherMvps = m_numHashMVStoreds[m_currRefPicList][m_currRefPicIndex]; for (int i = 0; i < numberOfOtherMvps; i++) { - xTZSearchHelp(cStruct, otherMvps[i].getHor(), otherMvps[i].getVer(), 0, 0); + xTZSearchHelp(cStruct, m_hashMVStoreds[m_currRefPicList][m_currRefPicIndex][i].getHor(), m_hashMVStoreds[m_currRefPicList][m_currRefPicIndex][i].getVer(), 0, 0); } - if (numberOfOtherMvps > 0) { // write out best match @@ -3676,15 +4168,12 @@ void InterSearch::xTZSearchSelective( const PredictionUnit& pu, void InterSearch::xPatternSearchIntRefine(PredictionUnit& pu, IntTZSearchStruct& cStruct, Mv& rcMv, Mv& rcMvPred, int& riMVPIdx, uint32_t& ruiBits, Distortion& ruiCost, const AMVPInfo& amvpInfo, double fWeight) { - CHECK( pu.cu->imv == 0, "xPatternSearchIntRefine(): IMV not used."); + CHECK( pu.cu->imv == 0 || pu.cu->imv == IMV_HPEL , "xPatternSearchIntRefine(): Sub-pel MV used."); CHECK( amvpInfo.mvCand[riMVPIdx] != rcMvPred, "xPatternSearchIntRefine(): MvPred issue."); const SPS &sps = *pu.cs->sps; - m_pcRdCost->setDistParam( m_cDistParam, *cStruct.pcPatternKey, cStruct.piRefY, cStruct.iRefStride, m_lumaClpRng.bd, COMPONENT_Y, 0, 1, m_pcEncCfg->getUseHADME() && !pu.cu->transQuantBypass ); + m_pcRdCost->setDistParam(m_cDistParam, *cStruct.pcPatternKey, cStruct.piRefY, cStruct.iRefStride, m_lumaClpRng.bd, COMPONENT_Y, 0, 1, m_pcEncCfg->getUseHADME() && !pu.cs->slice->getDisableSATDForRD()); - // input MV rcMV has integer resolution - // -> shift it to QPEL - rcMv <<= 2; // -> set MV scale for cost calculation to QPEL (0) m_pcRdCost->setCostScale ( 0 ); @@ -3697,7 +4186,7 @@ void InterSearch::xPatternSearchIntRefine(PredictionUnit& pu, IntTZSearchStruct& Mv cBaseMvd[2]; int iBestBits = 0; int iBestMVPIdx = riMVPIdx; - int testPos[9][2] = { { 0, 0}, { -1, -1},{ -1, 0},{ -1, 1},{ 0, -1},{ 0, 1},{ 1, -1},{ 1, 0},{ 1, 1} }; + Mv testPos[9] = { { 0, 0}, { -1, -1},{ -1, 0},{ -1, 1},{ 0, -1},{ 0, 1},{ 1, -1},{ 1, 0},{ 1, 1} }; cBaseMvd[0] = (rcMv - amvpInfo.mvCand[0]); @@ -3705,10 +4194,8 @@ void InterSearch::xPatternSearchIntRefine(PredictionUnit& pu, IntTZSearchStruct& CHECK( (cBaseMvd[0].getHor() & 0x03) != 0 || (cBaseMvd[0].getVer() & 0x03) != 0 , "xPatternSearchIntRefine(): AMVP cand 0 Mvd issue."); CHECK( (cBaseMvd[1].getHor() & 0x03) != 0 || (cBaseMvd[1].getVer() & 0x03) != 0 , "xPatternSearchIntRefine(): AMVP cand 1 Mvd issue."); - cBaseMvd[0].roundToAmvrSignalPrecision(MV_PRECISION_QUARTER, pu.cu->imv); - cBaseMvd[1].roundToAmvrSignalPrecision(MV_PRECISION_QUARTER, pu.cu->imv); - - int mvOffset = 1 << cStruct.imvShift; + cBaseMvd[0].roundTransPrecInternal2Amvr(pu.cu->imv); + cBaseMvd[1].roundTransPrecInternal2Amvr(pu.cu->imv); // test best integer position and all 8 neighboring positions for (int pos = 0; pos < 9; pos ++) @@ -3717,7 +4204,8 @@ void InterSearch::xPatternSearchIntRefine(PredictionUnit& pu, IntTZSearchStruct& // test both AMVP candidates for each position for (int iMVPIdx = 0; iMVPIdx < amvpInfo.numCand; iMVPIdx++) { - cTestMv[iMVPIdx].set(testPos[pos][0]*mvOffset, testPos[pos][1]*mvOffset); + cTestMv[iMVPIdx] = testPos[pos]; + cTestMv[iMVPIdx].changeTransPrecAmvr2Internal(pu.cu->imv); cTestMv[iMVPIdx] += cBaseMvd[iMVPIdx]; cTestMv[iMVPIdx] += amvpInfo.mvCand[iMVPIdx]; @@ -3725,9 +4213,7 @@ void InterSearch::xPatternSearchIntRefine(PredictionUnit& pu, IntTZSearchStruct& if( m_pcEncCfg->getMCTSEncConstraint() ) { Mv cTestMVRestr = cTestMv[iMVPIdx]; - cTestMVRestr.changePrecision( MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL ); MCTSHelper::clipMvToArea( cTestMVRestr, pu.cu->Y(), pu.cs->picture->mctsInfo.getTileAreaIntPelRestricted( pu ), *pu.cs->sps ); - cTestMVRestr.changePrecision( MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER ); if( cTestMVRestr != cTestMv[iMVPIdx] ) { @@ -3740,13 +4226,9 @@ void InterSearch::xPatternSearchIntRefine(PredictionUnit& pu, IntTZSearchStruct& Mv cTempMV = cTestMv[iMVPIdx]; if( !m_pcEncCfg->getMCTSEncConstraint() ) { - cTempMV.changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); - clipMv(cTempMV, pu.cu->lumaPos(), - pu.cu->lumaSize(), - sps); - cTempMV.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER); + clipMv( cTempMV, pu.cu->lumaPos(), pu.cu->lumaSize(), sps, *pu.cs->pps ); } - m_cDistParam.cur.buf = cStruct.piRefY + cStruct.iRefStride * (cTempMV.getVer() >> 2) + (cTempMV.getHor() >> 2); + m_cDistParam.cur.buf = cStruct.piRefY + cStruct.iRefStride * (cTempMV.getVer() >> MV_FRACTIONAL_BITS_INTERNAL) + (cTempMV.getHor() >> MV_FRACTIONAL_BITS_INTERNAL); uiDist = uiSATD = (Distortion) (m_cDistParam.distFunc( m_cDistParam ) * fWeight); } else @@ -3755,9 +4237,13 @@ void InterSearch::xPatternSearchIntRefine(PredictionUnit& pu, IntTZSearchStruct& } int iMvBits = m_auiMVPIdxCost[iMVPIdx][AMVP_MAX_NUM_CANDS]; - m_pcRdCost->setPredictor( amvpInfo.mvCand[iMVPIdx] ); - iMvBits += m_pcRdCost->getBitsOfVectorWithPredictor( cTestMv[iMVPIdx].getHor(), cTestMv[iMVPIdx].getVer(), cStruct.imvShift ); - uiDist += m_pcRdCost->getCostOfVectorWithPredictor( cTestMv[iMVPIdx].getHor(), cTestMv[iMVPIdx].getVer(), cStruct.imvShift ); + Mv pred = amvpInfo.mvCand[iMVPIdx]; + pred.changeTransPrecInternal2Amvr(pu.cu->imv); + m_pcRdCost->setPredictor( pred ); + Mv mv = cTestMv[iMVPIdx]; + mv.changeTransPrecInternal2Amvr(pu.cu->imv); + iMvBits += m_pcRdCost->getBitsOfVectorWithPredictor( mv.getHor(), mv.getVer(), 0 ); + uiDist += m_pcRdCost->getCost(iMvBits); if (uiDist < uiBestDist) { @@ -3787,7 +4273,6 @@ void InterSearch::xPatternSearchIntRefine(PredictionUnit& pu, IntTZSearchStruct& ruiCost = uiBestDist - m_pcRdCost->getCost(iBestBits) + m_pcRdCost->getCost(ruiBits); // taken from JEM 5.0 // verify since it makes no sense to add rate for MVDs twicce - ruiBits += m_pcRdCost->getBitsOfVectorWithPredictor(rcMv.getHor(), rcMv.getVer(), cStruct.imvShift); return; } @@ -3803,7 +4288,6 @@ void InterSearch::xPatternSearchFracDIF( Distortion& ruiCost ) { - const bool bIsLosslessCoded = pu.cu->transQuantBypass; // Reference pattern initialization (integer scale) int iOffset = rcMvInt.getHor() + rcMvInt.getVer() * cStruct.iRefStride; @@ -3813,16 +4297,16 @@ void InterSearch::xPatternSearchFracDIF( Mv baseRefMv(0, 0); rcMvHalf.setZero(); m_pcRdCost->setCostScale(0); - xExtDIFUpSamplingH(&cPatternRoi); + xExtDIFUpSamplingH(&cPatternRoi, cStruct.useAltHpelIf); rcMvQter = rcMvInt; rcMvQter <<= 2; // for mv-cost - ruiCost = xPatternRefinement(cStruct.pcPatternKey, baseRefMv, 1, rcMvQter, !bIsLosslessCoded); + ruiCost = xPatternRefinement(cStruct.pcPatternKey, baseRefMv, 1, rcMvQter, !pu.cs->slice->getDisableSATDForRD()); return; } - if (cStruct.imvShift || (m_useCompositeRef && cStruct.zeroMV)) + if (cStruct.imvShift > IMV_FPEL || (m_useCompositeRef && cStruct.zeroMV)) { - m_pcRdCost->setDistParam( m_cDistParam, *cStruct.pcPatternKey, cStruct.piRefY + iOffset, cStruct.iRefStride, m_lumaClpRng.bd, COMPONENT_Y, 0, 1, m_pcEncCfg->getUseHADME() && !bIsLosslessCoded ); + m_pcRdCost->setDistParam(m_cDistParam, *cStruct.pcPatternKey, cStruct.piRefY + iOffset, cStruct.iRefStride, m_lumaClpRng.bd, COMPONENT_Y, 0, 1, m_pcEncCfg->getUseHADME() && !pu.cs->slice->getDisableSATDForRD()); ruiCost = m_cDistParam.distFunc( m_cDistParam ); ruiCost += m_pcRdCost->getCostOfVectorWithPredictor( rcMvInt.getHor(), rcMvInt.getVer(), cStruct.imvShift ); return; @@ -3830,13 +4314,15 @@ void InterSearch::xPatternSearchFracDIF( // Half-pel refinement m_pcRdCost->setCostScale(1); - xExtDIFUpSamplingH ( &cPatternRoi ); + xExtDIFUpSamplingH(&cPatternRoi, cStruct.useAltHpelIf); rcMvHalf = rcMvInt; rcMvHalf <<= 1; // for mv-cost Mv baseRefMv(0, 0); - ruiCost = xPatternRefinement(cStruct.pcPatternKey, baseRefMv, 2, rcMvHalf, !bIsLosslessCoded); + ruiCost = xPatternRefinement(cStruct.pcPatternKey, baseRefMv, 2, rcMvHalf, (!pu.cs->slice->getDisableSATDForRD())); // quarter-pel refinement + if (cStruct.imvShift == IMV_OFF) + { m_pcRdCost->setCostScale( 0 ); xExtDIFUpSamplingQ ( &cPatternRoi, rcMvHalf ); baseRefMv = rcMvHalf; @@ -3844,10 +4330,11 @@ void InterSearch::xPatternSearchFracDIF( rcMvQter = rcMvInt; rcMvQter <<= 1; // for mv-cost rcMvQter += rcMvHalf; rcMvQter <<= 1; - ruiCost = xPatternRefinement( cStruct.pcPatternKey, baseRefMv, 1, rcMvQter, !bIsLosslessCoded ); + ruiCost = xPatternRefinement(cStruct.pcPatternKey, baseRefMv, 1, rcMvQter, (!pu.cs->slice->getDisableSATDForRD())); + } } -Distortion InterSearch::xGetSymmetricCost( PredictionUnit& pu, PelUnitBuf& origBuf, RefPicList eCurRefPicList, const MvField& cCurMvField, MvField& cTarMvField, int gbiIdx ) +Distortion InterSearch::xGetSymmetricCost( PredictionUnit& pu, PelUnitBuf& origBuf, RefPicList eCurRefPicList, const MvField& cCurMvField, MvField& cTarMvField, int bcwIdx ) { Distortion cost = std::numeric_limits<Distortion>::max(); RefPicList eTarRefPicList = (RefPicList)(1 - (int)eCurRefPicList); @@ -3856,32 +4343,51 @@ Distortion InterSearch::xGetSymmetricCost( PredictionUnit& pu, PelUnitBuf& origB PelUnitBuf predBufA = m_tmpPredStorage[eCurRefPicList].getBuf( UnitAreaRelative( *pu.cu, pu ) ); const Picture* picRefA = pu.cu->slice->getRefPic( eCurRefPicList, cCurMvField.refIdx ); Mv mvA = cCurMvField.mv; - mvA.changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); - clipMv( mvA, pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps ); - xPredInterBlk( COMPONENT_Y, pu, picRefA, mvA, predBufA, true, pu.cu->slice->clpRng( COMPONENT_Y ), false, false ); + clipMv( mvA, pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps, *pu.cs->pps ); + if ( (mvA.hor & 15) == 0 && (mvA.ver & 15) == 0 ) + { + Position offset = pu.blocks[COMPONENT_Y].pos().offset( mvA.getHor() >> 4, mvA.getVer() >> 4 ); + CPelBuf pelBufA = picRefA->getRecoBuf( CompArea( COMPONENT_Y, pu.chromaFormat, offset, pu.blocks[COMPONENT_Y].size() ), false ); + predBufA.bufs[0].buf = const_cast<Pel *>(pelBufA.buf); + predBufA.bufs[0].stride = pelBufA.stride; + predBufA.bufs[0].width = pelBufA.width; + predBufA.bufs[0].height = pelBufA.height; + } + else + { + xPredInterBlk( COMPONENT_Y, pu, picRefA, mvA, predBufA, false, pu.cu->slice->clpRng( COMPONENT_Y ), false, false ); + } // get prediction of eTarRefPicList PelUnitBuf predBufB = m_tmpPredStorage[eTarRefPicList].getBuf( UnitAreaRelative( *pu.cu, pu ) ); const Picture* picRefB = pu.cu->slice->getRefPic( eTarRefPicList, cTarMvField.refIdx ); Mv mvB = cTarMvField.mv; - mvB.changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); - clipMv( mvB, pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps ); - xPredInterBlk( COMPONENT_Y, pu, picRefB, mvB, predBufB, true, pu.cu->slice->clpRng( COMPONENT_Y ), false, false ); + clipMv( mvB, pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps, *pu.cs->pps ); + if ( (mvB.hor & 15) == 0 && (mvB.ver & 15) == 0 ) + { + Position offset = pu.blocks[COMPONENT_Y].pos().offset( mvB.getHor() >> 4, mvB.getVer() >> 4 ); + CPelBuf pelBufB = picRefB->getRecoBuf( CompArea( COMPONENT_Y, pu.chromaFormat, offset, pu.blocks[COMPONENT_Y].size() ), false ); + predBufB.bufs[0].buf = const_cast<Pel *>(pelBufB.buf); + predBufB.bufs[0].stride = pelBufB.stride; + } + else + { + xPredInterBlk( COMPONENT_Y, pu, picRefB, mvB, predBufB, false, pu.cu->slice->clpRng( COMPONENT_Y ), false, false ); + } PelUnitBuf bufTmp = m_tmpStorageLCU.getBuf( UnitAreaRelative( *pu.cu, pu ) ); - if (gbiIdx != GBI_DEFAULT) - bufTmp.Y().addWeightedAvg(predBufA.Y(), predBufB.Y(), pu.cu->slice->clpRng(COMPONENT_Y), gbiIdx); - else - bufTmp.Y().addAvg( predBufA.Y(), predBufB.Y(), pu.cu->slice->clpRng( COMPONENT_Y ) ); + bufTmp.copyFrom( origBuf ); + bufTmp.removeHighFreq( predBufA, m_pcEncCfg->getClipForBiPredMeEnabled(), pu.cu->slice->clpRngs(), getBcwWeight( pu.cu->BcwIdx, eTarRefPicList ) ); + double fWeight = xGetMEDistortionWeight( pu.cu->BcwIdx, eTarRefPicList ); // calc distortion - cost = m_pcRdCost->getDistPart(bufTmp.Y(), origBuf.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, DF_HAD); - + DFunc distFunc = (!pu.cu->slice->getDisableSATDForRD()) ? DF_HAD : DF_SAD; + cost = (Distortion)floor( fWeight * (double)m_pcRdCost->getDistPart( bufTmp.Y(), predBufB.Y(), pu.cs->sps->getBitDepth( CHANNEL_TYPE_LUMA ), COMPONENT_Y, distFunc ) ); return(cost); } Distortion InterSearch::xSymmeticRefineMvSearch( PredictionUnit &pu, PelUnitBuf& origBuf, Mv& rcMvCurPred, Mv& rcMvTarPred - , RefPicList eRefPicList, MvField& rCurMvField, MvField& rTarMvField, Distortion uiMinCost, int SearchPattern, int nSearchStepShift, uint32_t uiMaxSearchRounds, int gbiIdx ) + , RefPicList eRefPicList, MvField& rCurMvField, MvField& rTarMvField, Distortion uiMinCost, int SearchPattern, int nSearchStepShift, uint32_t uiMaxSearchRounds, int bcwIdx ) { const Mv mvSearchOffsetCross[4] = { Mv( 0 , 1 ) , Mv( 1 , 0 ) , Mv( 0 , -1 ) , Mv( -1 , 0 ) }; const Mv mvSearchOffsetSquare[8] = { Mv( -1 , 1 ) , Mv( 0 , 1 ) , Mv( 1 , 1 ) , Mv( 1 , 0 ) , Mv( 1 , -1 ) , Mv( 0 , -1 ) , Mv( -1 , -1 ) , Mv( -1 , 0 ) }; @@ -3949,9 +4455,13 @@ Distortion InterSearch::xSymmeticRefineMvSearch( PredictionUnit &pu, PelUnitBuf& continue; // Skip this this pos } // get MVD cost - m_pcRdCost->setPredictor( rcMvCurPred ); + Mv pred = rcMvCurPred; + pred.changeTransPrecInternal2Amvr(pu.cu->imv); + m_pcRdCost->setPredictor( pred ); m_pcRdCost->setCostScale( 0 ); - uint32_t uiMvBits = m_pcRdCost->getBitsOfVectorWithPredictor( mvCand.mv.getHor(), mvCand.mv.getVer(), (pu.cu->imv << 1) ); + Mv mv = mvCand.mv; + mv.changeTransPrecInternal2Amvr(pu.cu->imv); + uint32_t uiMvBits = m_pcRdCost->getBitsOfVectorWithPredictor( mv.getHor(), mv.getVer(), 0 ); Distortion uiCost = m_pcRdCost->getCost( uiMvBits ); // get MVD pair and set target MV @@ -3962,7 +4472,7 @@ Distortion InterSearch::xSymmeticRefineMvSearch( PredictionUnit &pu, PelUnitBuf& if( !( MCTSHelper::checkMvForMCTSConstraint( pu, mvPair.mv ) ) ) continue; // Skip this this pos } - uiCost += xGetSymmetricCost( pu, origBuf, eRefPicList, mvCand, mvPair, gbiIdx ); + uiCost += xGetSymmetricCost( pu, origBuf, eRefPicList, mvCand, mvPair, bcwIdx ); if ( uiCost < uiMinCost ) { uiMinCost = uiCost; @@ -3989,18 +4499,18 @@ Distortion InterSearch::xSymmeticRefineMvSearch( PredictionUnit &pu, PelUnitBuf& } -void InterSearch::xSymmetricMotionEstimation( PredictionUnit& pu, PelUnitBuf& origBuf, Mv& rcMvCurPred, Mv& rcMvTarPred, RefPicList eRefPicList, MvField& rCurMvField, MvField& rTarMvField, Distortion& ruiCost, int gbiIdx ) +void InterSearch::xSymmetricMotionEstimation( PredictionUnit& pu, PelUnitBuf& origBuf, Mv& rcMvCurPred, Mv& rcMvTarPred, RefPicList eRefPicList, MvField& rCurMvField, MvField& rTarMvField, Distortion& ruiCost, int bcwIdx ) { // Refine Search - int nSearchStepShift = 0; + int nSearchStepShift = MV_FRACTIONAL_BITS_DIFF; int nDiamondRound = 8; int nCrossRound = 1; - nSearchStepShift += (pu.cu->imv << 1); + nSearchStepShift += pu.cu->imv == IMV_HPEL ? 1 : (pu.cu->imv << 1); nDiamondRound >>= pu.cu->imv; - ruiCost = xSymmeticRefineMvSearch( pu, origBuf, rcMvCurPred, rcMvTarPred, eRefPicList, rCurMvField, rTarMvField, ruiCost, 2, nSearchStepShift, nDiamondRound, gbiIdx ); - ruiCost = xSymmeticRefineMvSearch( pu, origBuf, rcMvCurPred, rcMvTarPred, eRefPicList, rCurMvField, rTarMvField, ruiCost, 0, nSearchStepShift, nCrossRound, gbiIdx ); + ruiCost = xSymmeticRefineMvSearch( pu, origBuf, rcMvCurPred, rcMvTarPred, eRefPicList, rCurMvField, rTarMvField, ruiCost, 2, nSearchStepShift, nDiamondRound, bcwIdx ); + ruiCost = xSymmeticRefineMvSearch( pu, origBuf, rcMvCurPred, rcMvTarPred, eRefPicList, rCurMvField, rTarMvField, ruiCost, 0, nSearchStepShift, nCrossRound, bcwIdx ); } void InterSearch::xPredAffineInterSearch( PredictionUnit& pu, @@ -4011,9 +4521,9 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit& pu, Mv hevcMv[2][33] , Mv mvAffine4Para[2][33][3] , int refIdx4Para[2] - , uint8_t gbiIdx - , bool enforceGBiPred - , uint32_t gbiIdxBits + , uint8_t bcwIdx + , bool enforceBcwPred + , uint32_t bcwIdxBits ) { const Slice &slice = *pu.cu->slice; @@ -4071,7 +4581,6 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit& pu, uint32_t bitsValidList1 = MAX_UINT; Distortion costValidList1 = std::numeric_limits<Distortion>::max(); Mv mvHevc[3]; - const bool changeToHighPrec = pu.cu->imv != 1; const bool affineAmvrEnabled = pu.cu->slice->getSPS()->getAffineAmvrEnabledFlag(); int tryBipred = 0; WPScalingParam *wp0; @@ -4080,16 +4589,17 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit& pu, pu.cu->affine = true; pu.mergeFlag = false; - - if( gbiIdx != GBI_DEFAULT ) + pu.regularMergeFlag = false; + if( bcwIdx != BCW_DEFAULT ) { - pu.cu->GBiIdx = gbiIdx; + pu.cu->BcwIdx = bcwIdx; } // Uni-directional prediction for ( int iRefList = 0; iRefList < iNumPredDir; iRefList++ ) { RefPicList eRefPicList = ( iRefList ? REF_PIC_LIST_1 : REF_PIC_LIST_0 ); + pu.interDir = ( iRefList ? 2 : 1 ); for (int iRefIdxTemp = 0; iRefIdxTemp < slice.getNumRefIdx(eRefPicList); iRefIdxTemp++) { // Get RefIdx bits @@ -4121,14 +4631,7 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit& pu, for ( int i=0; i<3; i++ ) { mvHevc[i] = hevcMv[iRefList][iRefIdxTemp]; - if ( pu.cu->imv == 1 ) - { - mvHevc[i].changePrecision( MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL ); - } - else if ( pu.cu->imv == 2 ) - { - mvHevc[i].roundToPrecision( MV_PRECISION_QUARTER, MV_PRECISION_INT ); - } + mvHevc[i].roundAffinePrecInternal2Amvr(pu.cu->imv); } PelUnitBuf predBuf = m_tmpStorageLCU.getBuf( UnitAreaRelative(*pu.cu, pu) ); @@ -4151,11 +4654,7 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit& pu, for ( int i = 0; i < mvNum; i++ ) { mvFour[i] = affine4Para ? m_affineMotion.acMvAffine4Para[iRefList][i] : m_affineMotion.acMvAffine6Para[iRefList][i]; - if ( pu.cu->imv != 1 ) - { - mvFour[i].roundToPrecision( MV_PRECISION_INTERNAL, pu.cu->imv == 2 ? MV_PRECISION_INT : MV_PRECISION_QUARTER ); - mvFour[i].changePrecision( MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER ); - } + mvFour[i].roundAffinePrecInternal2Amvr(pu.cu->imv); } Distortion candCostInherit = xGetAffineTemplateCost( pu, origBuf, predBuf, mvFour, aaiMvpIdx[iRefList][iRefIdxTemp], AMVP_MAX_NUM_CANDS, eRefPicList, iRefIdxTemp ); @@ -4169,7 +4668,7 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit& pu, } if (pu.cu->affineType == AFFINEMODEL_4PARAM && m_affMVListSize - && (!pu.cu->cs->sps->getUseGBi() || gbiIdx == GBI_DEFAULT) + && (!pu.cu->cs->sps->getUseBcw() || bcwIdx == BCW_DEFAULT) ) { int shift = MAX_CU_DEPTH; @@ -4199,11 +4698,8 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit& pu, int mvScaleHor = nbMv[0].getHor() << shift; int mvScaleVer = nbMv[0].getVer() << shift; Mv dMv = nbMv[1] - nbMv[0]; - mvScaleHor <<= MV_FRACTIONAL_BITS_DIFF; - mvScaleVer <<= MV_FRACTIONAL_BITS_DIFF; - dMv <<= MV_FRACTIONAL_BITS_DIFF; - dMvHorX = dMv.getHor() << (shift - g_aucLog2[mvInfo->w]); - dMvHorY = dMv.getVer() << (shift - g_aucLog2[mvInfo->w]); + dMvHorX = dMv.getHor() << (shift - floorLog2(mvInfo->w)); + dMvHorY = dMv.getVer() << (shift - floorLog2(mvInfo->w)); dMvVerX = -dMvHorY; dMvVerY = dMvHorX; vx = mvScaleHor + dMvHorX * (pu.Y().x - mvInfo->x) + dMvVerX * (pu.Y().y - mvInfo->y); @@ -4211,29 +4707,16 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit& pu, roundAffineMv(vx, vy, shift); mvTmp[0] = Mv(vx, vy); mvTmp[0].clipToStorageBitDepth(); - clipMv(mvTmp[0], pu.cu->lumaPos(), - pu.cu->lumaSize(), - *pu.cs->sps); - if ( pu.cu->imv == 2 ) - { - mvTmp[0].roundToPrecision( MV_PRECISION_INTERNAL, MV_PRECISION_INT ); - } - else if ( pu.cu->imv == 0 ) - mvTmp[0].roundToPrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER); + clipMv( mvTmp[0], pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps, *pu.cs->pps ); + mvTmp[0].roundAffinePrecInternal2Amvr(pu.cu->imv); vx = mvScaleHor + dMvHorX * (pu.Y().x + pu.Y().width - mvInfo->x) + dMvVerX * (pu.Y().y - mvInfo->y); vy = mvScaleVer + dMvHorY * (pu.Y().x + pu.Y().width - mvInfo->x) + dMvVerY * (pu.Y().y - mvInfo->y); roundAffineMv(vx, vy, shift); mvTmp[1] = Mv(vx, vy); mvTmp[1].clipToStorageBitDepth(); - clipMv(mvTmp[1], pu.cu->lumaPos(), - pu.cu->lumaSize(), - *pu.cs->sps); - if ( pu.cu->imv != 1 ) - { - mvTmp[1].roundToPrecision( MV_PRECISION_INTERNAL, pu.cu->imv == 2 ? MV_PRECISION_INT : MV_PRECISION_QUARTER ); - mvTmp[0].changePrecision( MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER ); - mvTmp[1].changePrecision( MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER ); - } + clipMv( mvTmp[1], pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps, *pu.cs->pps ); + mvTmp[0].roundAffinePrecInternal2Amvr(pu.cu->imv); + mvTmp[1].roundAffinePrecInternal2Amvr(pu.cu->imv); Distortion tmpCost = xGetAffineTemplateCost(pu, origBuf, predBuf, mvTmp, aaiMvpIdx[iRefList][iRefIdxTemp], AMVP_MAX_NUM_CANDS, eRefPicList, iRefIdxTemp); if ( affineAmvrEnabled ) { @@ -4249,39 +4732,23 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit& pu, if ( pu.cu->affineType == AFFINEMODEL_6PARAM ) { Mv mvFour[3]; - if ( pu.cu->imv != 1 ) - { - mvAffine4Para[iRefList][iRefIdxTemp][0].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); - mvAffine4Para[iRefList][iRefIdxTemp][1].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); - } mvFour[0] = mvAffine4Para[iRefList][iRefIdxTemp][0]; mvFour[1] = mvAffine4Para[iRefList][iRefIdxTemp][1]; - if ( pu.cu->imv != 1 ) - { - mvAffine4Para[iRefList][iRefIdxTemp][0].changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER); - mvAffine4Para[iRefList][iRefIdxTemp][1].changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER); - } + mvAffine4Para[iRefList][iRefIdxTemp][0].roundAffinePrecInternal2Amvr(pu.cu->imv); + mvAffine4Para[iRefList][iRefIdxTemp][1].roundAffinePrecInternal2Amvr(pu.cu->imv); + int shift = MAX_CU_DEPTH; - int vx2 = (mvFour[0].getHor() << shift) - ((mvFour[1].getVer() - mvFour[0].getVer()) << (shift + g_aucLog2[pu.lheight()] - g_aucLog2[pu.lwidth()])); - int vy2 = (mvFour[0].getVer() << shift) + ((mvFour[1].getHor() - mvFour[0].getHor()) << (shift + g_aucLog2[pu.lheight()] - g_aucLog2[pu.lwidth()])); - vx2 >>= shift; - vy2 >>= shift; + int vx2 = (mvFour[0].getHor() << shift) - ((mvFour[1].getVer() - mvFour[0].getVer()) << (shift + floorLog2(pu.lheight()) - floorLog2(pu.lwidth()))); + int vy2 = (mvFour[0].getVer() << shift) + ((mvFour[1].getHor() - mvFour[0].getHor()) << (shift + floorLog2(pu.lheight()) - floorLog2(pu.lwidth()))); + int offset = (1 << (shift - 1)); + vx2 = (vx2 + offset - (vx2 >= 0)) >> shift; + vy2 = (vy2 + offset - (vy2 >= 0)) >> shift; mvFour[2].hor = vx2; mvFour[2].ver = vy2; mvFour[2].clipToStorageBitDepth(); - if ( pu.cu->imv != 1 ) - { - mvFour[0].roundToPrecision( MV_PRECISION_INTERNAL, pu.cu->imv == 2 ? MV_PRECISION_INT : MV_PRECISION_QUARTER ); - mvFour[1].roundToPrecision( MV_PRECISION_INTERNAL, pu.cu->imv == 2 ? MV_PRECISION_INT : MV_PRECISION_QUARTER ); - mvFour[2].roundToPrecision( MV_PRECISION_INTERNAL, pu.cu->imv == 2 ? MV_PRECISION_INT : MV_PRECISION_QUARTER ); - } - for (int i = 0; i < 3; i++) - { - if ( pu.cu->imv != 1 ) - { - mvFour[i].changePrecision( MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER ); - } - } + mvFour[0].roundAffinePrecInternal2Amvr(pu.cu->imv); + mvFour[1].roundAffinePrecInternal2Amvr(pu.cu->imv); + mvFour[2].roundAffinePrecInternal2Amvr(pu.cu->imv); Distortion uiCandCostInherit = xGetAffineTemplateCost( pu, origBuf, predBuf, mvFour, aaiMvpIdx[iRefList][iRefIdxTemp], AMVP_MAX_NUM_CANDS, eRefPicList, iRefIdxTemp ); if ( affineAmvrEnabled ) { @@ -4307,7 +4774,7 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit& pu, } // GPB list 1, save the best MvpIdx, RefIdx and Cost - if ( slice.getMvdL1ZeroFlag() && iRefList==1 && biPDistTemp < bestBiPDist ) + if ( slice.getPicHeader()->getMvdL1ZeroFlag() && iRefList==1 && biPDistTemp < bestBiPDist ) { bestBiPDist = biPDistTemp; bestBiPMvpL1 = aaiMvpIdx[iRefList][iRefIdxTemp]; @@ -4344,7 +4811,7 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit& pu, , aaiMvpIdx[iRefList][iRefIdxTemp], affiAMVPInfoTemp[eRefPicList] ); } - if(pu.cu->cs->sps->getUseGBi() && pu.cu->GBiIdx == GBI_DEFAULT && pu.cu->slice->isInterB()) + if(pu.cu->cs->sps->getUseBcw() && pu.cu->BcwIdx == BCW_DEFAULT && pu.cu->slice->isInterB()) { m_uniMotions.setReadModeAffine(true, (uint8_t)iRefList, (uint8_t)iRefIdxTemp, pu.cu->affineType); m_uniMotions.copyAffineMvFrom(cMvTemp[iRefList][iRefIdxTemp], uiCostTemp - m_pcRdCost->getCost(uiBitsTemp), (uint8_t)iRefList, (uint8_t)iRefIdxTemp, pu.cu->affineType @@ -4387,7 +4854,7 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit& pu, if ( pu.cu->affineType == AFFINEMODEL_4PARAM ) { ::memcpy( mvAffine4Para, cMvTemp, sizeof( cMvTemp ) ); - if ( pu.cu->imv == 0 && ( !pu.cu->cs->sps->getUseGBi() || gbiIdx == GBI_DEFAULT ) ) + if ( pu.cu->imv == 0 && ( !pu.cu->cs->sps->getUseBcw() || bcwIdx == BCW_DEFAULT ) ) { AffineMVInfo *affMVInfo = m_affMVList + m_affMVListIdx; @@ -4421,7 +4888,9 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit& pu, // Bi-directional prediction if ( slice.isInterB() && !PU::isBipredRestriction(pu) ) { - tryBipred = 1; + tryBipred = 1; + pu.interDir = 3; + m_isBi = true; // Set as best list0 and list1 iRefIdxBi[0] = iRefIdx[0]; iRefIdxBi[1] = iRefIdx[1]; @@ -4433,7 +4902,7 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit& pu, uint32_t uiMotBits[2]; bool doBiPred = true; - if ( slice.getMvdL1ZeroFlag() ) // GPB, list 1 only use Mvp + if ( slice.getPicHeader()->getMvdL1ZeroFlag() ) // GPB, list 1 only use Mvp { xCopyAffineAMVPInfo( aacAffineAMVPInfo[1][bestBiPRefIdxL1], affiAMVPInfoTemp[REF_PIC_LIST_1] ); pu.mvpIdx[REF_PIC_LIST_1] = bestBiPMvpL1; @@ -4455,9 +4924,7 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit& pu, for( int i = 0; i < mvNum; i++ ) { Mv restrictedMv = pcMvTemp[i]; - restrictedMv.changePrecision( MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL ); MCTSHelper::clipMvToArea( restrictedMv, pu.cu->Y(), curTileAreaRestricted, *pu.cs->sps ); - restrictedMv.changePrecision( MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER ); // If sub-pel filter samples are not inside of allowed area if( restrictedMv != pcMvTemp[i] ) @@ -4468,9 +4935,7 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit& pu, } } // Get list1 prediction block - PU::setAllAffineMv( pu, cMvBi[1][0], cMvBi[1][1], cMvBi[1][2], REF_PIC_LIST_1 - , changeToHighPrec - ); + PU::setAllAffineMv( pu, cMvBi[1][0], cMvBi[1][1], cMvBi[1][2], REF_PIC_LIST_1); pu.refIdx[REF_PIC_LIST_1] = iRefIdxBi[1]; PelUnitBuf predBufTmp = m_tmpPredStorage[REF_PIC_LIST_1].getBuf( UnitAreaRelative(*pu.cu, pu) ); @@ -4503,7 +4968,7 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit& pu, // 4-times iteration (default) int iNumIter = 4; // fast encoder setting or GPB: only one iteration - if ( m_pcEncCfg->getFastInterSearchMode()==FASTINTERSEARCH_MODE1 || m_pcEncCfg->getFastInterSearchMode()==FASTINTERSEARCH_MODE2 || slice.getMvdL1ZeroFlag() ) + if ( m_pcEncCfg->getFastInterSearchMode()==FASTINTERSEARCH_MODE1 || m_pcEncCfg->getFastInterSearchMode()==FASTINTERSEARCH_MODE2 || slice.getPicHeader()->getMvdL1ZeroFlag() ) { iNumIter = 1; } @@ -4522,9 +4987,9 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit& pu, { iRefList = 0; } - if( gbiIdx != GBI_DEFAULT ) + if( bcwIdx != BCW_DEFAULT ) { - iRefList = ( abs( getGbiWeight( gbiIdx, REF_PIC_LIST_0 ) ) > abs( getGbiWeight( gbiIdx, REF_PIC_LIST_1 ) ) ? 1 : 0 ); + iRefList = ( abs( getBcwWeight( bcwIdx, REF_PIC_LIST_0 ) ) > abs( getBcwWeight( bcwIdx, REF_PIC_LIST_1 ) ) ? 1 : 0 ); } } else if ( iIter == 0 ) @@ -4533,11 +4998,9 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit& pu, } // First iterate, get prediction block of opposite direction - if( iIter == 0 && !slice.getMvdL1ZeroFlag() ) + if( iIter == 0 && !slice.getPicHeader()->getMvdL1ZeroFlag() ) { - PU::setAllAffineMv( pu, aacMv[1-iRefList][0], aacMv[1-iRefList][1], aacMv[1-iRefList][2], RefPicList(1-iRefList) - , changeToHighPrec - ); + PU::setAllAffineMv( pu, aacMv[1-iRefList][0], aacMv[1-iRefList][1], aacMv[1-iRefList][2], RefPicList(1-iRefList)); pu.refIdx[1-iRefList] = iRefIdx[1-iRefList]; PelUnitBuf predBufTmp = m_tmpPredStorage[1 - iRefList].getBuf( UnitAreaRelative(*pu.cu, pu) ); @@ -4546,7 +5009,7 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit& pu, RefPicList eRefPicList = ( iRefList ? REF_PIC_LIST_1 : REF_PIC_LIST_0 ); - if ( slice.getMvdL1ZeroFlag() ) // GPB, fix List 1, search List 0 + if ( slice.getPicHeader()->getMvdL1ZeroFlag() ) // GPB, fix List 1, search List 0 { iRefList = 0; eRefPicList = REF_PIC_LIST_0; @@ -4562,7 +5025,7 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit& pu, { continue; } - if(m_pcEncCfg->getUseGBiFast() && (gbiIdx != GBI_DEFAULT) + if(m_pcEncCfg->getUseBcwFast() && (bcwIdx != BCW_DEFAULT) && (pu.cu->slice->getRefPic(eRefPicList, iRefIdxTemp)->getPOC() == pu.cu->slice->getRefPic(RefPicList(1 - iRefList), pu.refIdx[1 - iRefList])->getPOC()) && (pu.cu->affineType == AFFINEMODEL_4PARAM && pu.cu->slice->getTLayer()>1)) { @@ -4570,7 +5033,7 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit& pu, } // update bits uiBitsTemp = uiMbBits[2] + uiMotBits[1-iRefList]; - uiBitsTemp += ((pu.cu->slice->getSPS()->getUseGBi() == true) ? gbiIdxBits : 0); + uiBitsTemp += ((pu.cu->slice->getSPS()->getUseBcw() == true) ? bcwIdxBits : 0); if( slice.getNumRefIdx(eRefPicList) > 1 ) { uiBitsTemp += iRefIdxTemp+1; @@ -4597,15 +5060,13 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit& pu, uiCostBi = uiCostTemp; uiMotBits[iRefList] = uiBitsTemp - uiMbBits[2] - uiMotBits[1-iRefList]; - uiMotBits[iRefList] -= ((pu.cu->slice->getSPS()->getUseGBi() == true) ? gbiIdxBits : 0); + uiMotBits[iRefList] -= ((pu.cu->slice->getSPS()->getUseBcw() == true) ? bcwIdxBits : 0); uiBits[2] = uiBitsTemp; if ( iNumIter != 1 ) // MC for next iter { // Set motion - PU::setAllAffineMv( pu, cMvBi[iRefList][0], cMvBi[iRefList][1], cMvBi[iRefList][2], eRefPicList - , changeToHighPrec - ); + PU::setAllAffineMv( pu, cMvBi[iRefList][0], cMvBi[iRefList][1], cMvBi[iRefList][2], eRefPicList); pu.refIdx[eRefPicList] = iRefIdxBi[eRefPicList]; PelUnitBuf predBufTmp = m_tmpPredStorage[iRefList].getBuf( UnitAreaRelative(*pu.cu, pu) ); motionCompensation( pu, predBufTmp, eRefPicList ); @@ -4615,12 +5076,12 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit& pu, if ( !bChanged ) { - if ((uiCostBi <= uiCost[0] && uiCostBi <= uiCost[1]) || enforceGBiPred) + if ((uiCostBi <= uiCost[0] && uiCostBi <= uiCost[1]) || enforceBcwPred) { xCopyAffineAMVPInfo( aacAffineAMVPInfo[0][iRefIdxBi[0]], affiAMVPInfoTemp[REF_PIC_LIST_0] ); xCheckBestAffineMVP( pu, affiAMVPInfoTemp[REF_PIC_LIST_0], REF_PIC_LIST_0, cMvBi[0], cMvPredBi[0][iRefIdxBi[0]], aaiMvpIdxBi[0][iRefIdxBi[0]], uiBits[2], uiCostBi ); - if ( !slice.getMvdL1ZeroFlag() ) + if ( !slice.getPicHeader()->getMvdL1ZeroFlag() ) { xCopyAffineAMVPInfo( aacAffineAMVPInfo[1][iRefIdxBi[1]], affiAMVPInfoTemp[REF_PIC_LIST_1] ); xCheckBestAffineMVP( pu, affiAMVPInfoTemp[REF_PIC_LIST_1], REF_PIC_LIST_1, cMvBi[1], cMvPredBi[1][iRefIdxBi[1]], aaiMvpIdxBi[1][iRefIdxBi[1]], uiBits[2], uiCostBi ); @@ -4630,6 +5091,7 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit& pu, } } // for loop-iter } + m_isBi = false; } // if (B_SLICE) pu.mv [REF_PIC_LIST_0] = Mv(); @@ -4654,20 +5116,20 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit& pu, iRefIdx[1] = refIdxValidList1; uiBits[1] = bitsValidList1; uiCost[1] = costValidList1; - if (pu.cs->pps->getWPBiPred() == true && tryBipred && (gbiIdx != GBI_DEFAULT)) + if (pu.cs->pps->getWPBiPred() == true && tryBipred && (bcwIdx != BCW_DEFAULT)) { - CHECK(iRefIdxBi[0]<0, "Invalid picture reference index"); - CHECK(iRefIdxBi[1]<0, "Invalid picture reference index"); - pu.cs->slice->getWpScaling(REF_PIC_LIST_0, iRefIdxBi[0], wp0); - pu.cs->slice->getWpScaling(REF_PIC_LIST_1, iRefIdxBi[1], wp1); - if ((wp0[COMPONENT_Y].bPresentFlag || wp0[COMPONENT_Cb].bPresentFlag || wp0[COMPONENT_Cr].bPresentFlag - || wp1[COMPONENT_Y].bPresentFlag || wp1[COMPONENT_Cb].bPresentFlag || wp1[COMPONENT_Cr].bPresentFlag)) - { - uiCostBi = MAX_UINT; - enforceGBiPred = false; - } + CHECK(iRefIdxBi[0]<0, "Invalid picture reference index"); + CHECK(iRefIdxBi[1]<0, "Invalid picture reference index"); + pu.cs->slice->getWpScaling(REF_PIC_LIST_0, iRefIdxBi[0], wp0); + pu.cs->slice->getWpScaling(REF_PIC_LIST_1, iRefIdxBi[1], wp1); + if ((wp0[COMPONENT_Y].bPresentFlag || wp0[COMPONENT_Cb].bPresentFlag || wp0[COMPONENT_Cr].bPresentFlag + || wp1[COMPONENT_Y].bPresentFlag || wp1[COMPONENT_Cb].bPresentFlag || wp1[COMPONENT_Cr].bPresentFlag)) + { + uiCostBi = MAX_UINT; + enforceBcwPred = false; + } } - if( enforceGBiPred ) + if( enforceBcwPred ) { uiCost[0] = uiCost[1] = MAX_UINT; } @@ -4677,13 +5139,9 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit& pu, { lastMode = 2; affineCost = uiCostBi; - - PU::setAllAffineMv( pu, cMvBi[0][0], cMvBi[0][1], cMvBi[0][2], REF_PIC_LIST_0 - , changeToHighPrec - ); - PU::setAllAffineMv( pu, cMvBi[1][0], cMvBi[1][1], cMvBi[1][2], REF_PIC_LIST_1 - , changeToHighPrec - ); + pu.interDir = 3; + PU::setAllAffineMv( pu, cMvBi[0][0], cMvBi[0][1], cMvBi[0][2], REF_PIC_LIST_0); + PU::setAllAffineMv( pu, cMvBi[1][0], cMvBi[1][1], cMvBi[1][2], REF_PIC_LIST_1); pu.refIdx[REF_PIC_LIST_0] = iRefIdxBi[0]; pu.refIdx[REF_PIC_LIST_1] = iRefIdxBi[1]; @@ -4698,7 +5156,6 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit& pu, } } - pu.interDir = 3; pu.mvpIdx[REF_PIC_LIST_0] = aaiMvpIdxBi[0][iRefIdxBi[0]]; pu.mvpNum[REF_PIC_LIST_0] = aaiMvpNum[0][iRefIdxBi[0]]; @@ -4709,10 +5166,8 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit& pu, { lastMode = 0; affineCost = uiCost[0]; - - PU::setAllAffineMv( pu, aacMv[0][0], aacMv[0][1], aacMv[0][2], REF_PIC_LIST_0 - , changeToHighPrec - ); + pu.interDir = 1; + PU::setAllAffineMv( pu, aacMv[0][0], aacMv[0][1], aacMv[0][2], REF_PIC_LIST_0); pu.refIdx[REF_PIC_LIST_0] = iRefIdx[0]; for ( int verIdx = 0; verIdx < mvNum; verIdx++ ) @@ -4723,7 +5178,6 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit& pu, pu.mvdAffi[0][verIdx] = pu.mvdAffi[0][verIdx] - pu.mvdAffi[0][0]; } } - pu.interDir = 1; pu.mvpIdx[REF_PIC_LIST_0] = aaiMvpIdx[0][iRefIdx[0]]; pu.mvpNum[REF_PIC_LIST_0] = aaiMvpNum[0][iRefIdx[0]]; @@ -4732,10 +5186,8 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit& pu, { lastMode = 1; affineCost = uiCost[1]; - - PU::setAllAffineMv( pu, aacMv[1][0], aacMv[1][1], aacMv[1][2], REF_PIC_LIST_1 - , changeToHighPrec - ); + pu.interDir = 2; + PU::setAllAffineMv( pu, aacMv[1][0], aacMv[1][1], aacMv[1][2], REF_PIC_LIST_1); pu.refIdx[REF_PIC_LIST_1] = iRefIdx[1]; for ( int verIdx = 0; verIdx < mvNum; verIdx++ ) @@ -4746,18 +5198,17 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit& pu, pu.mvdAffi[1][verIdx] = pu.mvdAffi[1][verIdx] - pu.mvdAffi[1][0]; } } - pu.interDir = 2; pu.mvpIdx[REF_PIC_LIST_1] = aaiMvpIdx[1][iRefIdx[1]]; pu.mvpNum[REF_PIC_LIST_1] = aaiMvpNum[1][iRefIdx[1]]; } - if( gbiIdx != GBI_DEFAULT ) + if( bcwIdx != BCW_DEFAULT ) { - pu.cu->GBiIdx = GBI_DEFAULT; + pu.cu->BcwIdx = BCW_DEFAULT; } } -void solveEqual( double** dEqualCoeff, int iOrder, double* dAffinePara ) +void solveEqual(double dEqualCoeff[7][7], int iOrder, double *dAffinePara) { for ( int k = 0; k < iOrder; k++ ) { @@ -4837,7 +5288,7 @@ void InterSearch::xCheckBestAffineMVP( PredictionUnit &pu, AffineAMVPInfo &affin int mvNum = pu.cu->affineType ? 3 : 2; - m_pcRdCost->selectMotionLambda( pu.cu->transQuantBypass ); + m_pcRdCost->selectMotionLambda( ); m_pcRdCost->setCostScale ( 0 ); int iBestMVPIdx = riMVPIdx; @@ -4894,7 +5345,7 @@ void InterSearch::xAffineMotionEstimation( PredictionUnit& pu, const AffineAMVPInfo& aamvpi, bool bBi) { - if( pu.cu->cs->sps->getUseGBi() && pu.cu->GBiIdx != GBI_DEFAULT && !bBi && xReadBufferedAffineUniMv(pu, eRefPicList, iRefIdxPred, acMvPred, acMv, ruiBits, ruiCost + if( pu.cu->cs->sps->getUseBcw() && pu.cu->BcwIdx != BCW_DEFAULT && !bBi && xReadBufferedAffineUniMv(pu, eRefPicList, iRefIdxPred, acMvPred, acMv, ruiBits, ruiCost , mvpIdx, aamvpi ) ) { @@ -4913,6 +5364,8 @@ void InterSearch::xAffineMotionEstimation( PredictionUnit& pu, double fWeight = 1.0; PelUnitBuf origBufTmp = m_tmpStorageLCU.getBuf( UnitAreaRelative( *pu.cu, pu ) ); + enum DFunc distFunc = (pu.cs->slice->getDisableSATDForRD()) ? DF_SAD : DF_HAD; + m_iRefListIdx = eRefPicList; // if Bi, set to ( 2 * Org - ListX ) if ( bBi ) @@ -4921,11 +5374,11 @@ void InterSearch::xAffineMotionEstimation( PredictionUnit& pu, PelUnitBuf otherBuf = m_tmpPredStorage[1 - (int)eRefPicList].getBuf( UnitAreaRelative( *pu.cu, pu ) ); origBufTmp.copyFrom(origBuf); origBufTmp.removeHighFreq(otherBuf, m_pcEncCfg->getClipForBiPredMeEnabled(), pu.cu->slice->clpRngs() - ,getGbiWeight(pu.cu->GBiIdx, eRefPicList) + ,getBcwWeight(pu.cu->BcwIdx, eRefPicList) ); pBuf = &origBufTmp; - fWeight = xGetMEDistortionWeight( pu.cu->GBiIdx, eRefPicList ); + fWeight = xGetMEDistortionWeight( pu.cu->BcwIdx, eRefPicList ); } // pred YUV @@ -4934,23 +5387,12 @@ void InterSearch::xAffineMotionEstimation( PredictionUnit& pu, // Set start Mv position, use input mv as started search mv Mv acMvTemp[3]; ::memcpy( acMvTemp, acMv, sizeof(Mv)*3 ); - if ( pu.cu->imv != 1 ) - { - acMvTemp[0].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); - acMvTemp[1].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); - acMvTemp[2].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); - } // Set delta mv // malloc buffer int iParaNum = pu.cu->affineType ? 7 : 5; int affineParaNum = iParaNum - 1; int mvNum = pu.cu->affineType ? 3 : 2; - double **pdEqualCoeff; - pdEqualCoeff = new double *[iParaNum]; - for ( int i = 0; i < iParaNum; i++ ) - { - pdEqualCoeff[i] = new double[iParaNum]; - } + double pdEqualCoeff[7][7]; int64_t i64EqualCoeff[7][7]; Pel *piError = m_tmpAffiError; @@ -4973,32 +5415,24 @@ void InterSearch::xAffineMotionEstimation( PredictionUnit& pu, } } else - clipMv( acMvTemp[0], pu.cu->lumaPos(), - pu.cu->lumaSize(), - *pu.cs->sps ); - clipMv( acMvTemp[1], pu.cu->lumaPos(), - pu.cu->lumaSize(), - *pu.cs->sps ); - if ( pu.cu->affineType == AFFINEMODEL_6PARAM ) { - clipMv( acMvTemp[2], pu.cu->lumaPos(), - pu.cu->lumaSize(), - *pu.cs->sps ); - } - int mvdPrecision = ( pu.cu->imv == 1 ) ? 2 : 0; - if ( pu.cu->imv == 2 ) - { - acMvTemp[0].roundToPrecision( MV_PRECISION_INTERNAL, MV_PRECISION_INT ); - acMvTemp[1].roundToPrecision( MV_PRECISION_INTERNAL, MV_PRECISION_INT ); - if ( pu.cu->affineType == AFFINEMODEL_6PARAM ) + clipMv( acMvTemp[0], pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps, *pu.cs->pps ); + clipMv( acMvTemp[1], pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps, *pu.cs->pps ); + if( pu.cu->affineType == AFFINEMODEL_6PARAM ) { - acMvTemp[2].roundToPrecision( MV_PRECISION_INTERNAL, MV_PRECISION_INT ); + clipMv( acMvTemp[2], pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps, *pu.cs->pps ); } } + acMvTemp[0].roundAffinePrecInternal2Amvr(pu.cu->imv); + acMvTemp[1].roundAffinePrecInternal2Amvr(pu.cu->imv); + if (pu.cu->affineType == AFFINEMODEL_6PARAM) + { + acMvTemp[2].roundAffinePrecInternal2Amvr(pu.cu->imv); + } xPredAffineBlk( COMPONENT_Y, pu, refPic, acMvTemp, predBuf, false, pu.cs->slice->clpRng( COMPONENT_Y ) ); // get error - uiCostBest = m_pcRdCost->getDistPart( predBuf.Y(), pBuf->Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, DF_HAD ); + uiCostBest = m_pcRdCost->getDistPart(predBuf.Y(), pBuf->Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, distFunc); // get cost with mv m_pcRdCost->setCostScale(0); @@ -5013,7 +5447,7 @@ void InterSearch::xAffineMotionEstimation( PredictionUnit& pu, else { DTRACE( g_trace_ctx, D_COMMON, " (%d) xx uiBitsBest=%d\n", DTRACE_GET_COUNTER(g_trace_ctx,D_COMMON), uiBitsBest ); - uiBitsBest += xCalcAffineMVBits( pu, acMvTemp, acMvPred, pu.cu->imv != 1 ); + uiBitsBest += xCalcAffineMVBits( pu, acMvTemp, acMvPred ); DTRACE( g_trace_ctx, D_COMMON, " (%d) yy uiBitsBest=%d\n", DTRACE_GET_COUNTER(g_trace_ctx,D_COMMON), uiBitsBest ); } uiCostBest = (Distortion)( floor( fWeight * (double)uiCostBest ) + (double)m_pcRdCost->getCost( uiBitsBest ) ); @@ -5090,7 +5524,7 @@ void InterSearch::xAffineMotionEstimation( PredictionUnit& pu, } double dAffinePara[6]; - double dDeltaMv[6]; + double dDeltaMv[6]={0.0, 0.0, 0.0, 0.0, 0.0, 0.0,}; Mv acDeltaMv[3]; solveEqual( pdEqualCoeff, affineParaNum, dAffinePara ); @@ -5110,12 +5544,13 @@ void InterSearch::xAffineMotionEstimation( PredictionUnit& pu, dDeltaMv[1] = dAffinePara[1] * width + dAffinePara[0]; dDeltaMv[3] = -dAffinePara[3] * width + dAffinePara[2]; } - int mvShift = MV_FRACTIONAL_BITS_DIFF - mvdPrecision; - int multiShift = 1 << ( MV_FRACTIONAL_BITS_DIFF + mvdPrecision ); + const int normShiftTab[3] = { MV_PRECISION_QUARTER - MV_PRECISION_INT, MV_PRECISION_SIXTEENTH - MV_PRECISION_INT, MV_PRECISION_QUARTER - MV_PRECISION_INT }; + const int stepShiftTab[3] = { MV_PRECISION_INTERNAL - MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL - MV_PRECISION_SIXTEENTH, MV_PRECISION_INTERNAL - MV_PRECISION_QUARTER }; + const int multiShift = 1 << normShiftTab[pu.cu->imv]; + const int mvShift = stepShiftTab[pu.cu->imv]; acDeltaMv[0] = Mv( ( int ) ( dDeltaMv[0] * multiShift + SIGN( dDeltaMv[0] ) * 0.5 ) << mvShift, ( int ) ( dDeltaMv[2] * multiShift + SIGN( dDeltaMv[2] ) * 0.5 ) << mvShift ); acDeltaMv[1] = Mv( ( int ) ( dDeltaMv[1] * multiShift + SIGN( dDeltaMv[1] ) * 0.5 ) << mvShift, ( int ) ( dDeltaMv[3] * multiShift + SIGN( dDeltaMv[3] ) * 0.5 ) << mvShift ); - if ( pu.cu->affineType == AFFINEMODEL_6PARAM ) { acDeltaMv[2] = Mv( ( int ) ( dDeltaMv[4] * multiShift + SIGN( dDeltaMv[4] ) * 0.5 ) << mvShift, ( int ) ( dDeltaMv[5] * multiShift + SIGN( dDeltaMv[5] ) * 0.5 ) << mvShift ); @@ -5145,25 +5580,16 @@ void InterSearch::xAffineMotionEstimation( PredictionUnit& pu, for ( int i = 0; i < mvNum; i++ ) { acMvTemp[i] += acDeltaMv[i]; - acMvTemp[i].hor = Clip3( -131072, 131071, acMvTemp[i].hor ); - acMvTemp[i].ver = Clip3( -131072, 131071, acMvTemp[i].ver ); - if ( pu.cu->imv == 0 ) - { - acMvTemp[i].roundToPrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER); - } - else if ( pu.cu->imv == 2 ) - { - acMvTemp[i].roundToPrecision( MV_PRECISION_INTERNAL, MV_PRECISION_INT ); - } + acMvTemp[i].hor = Clip3(MV_MIN, MV_MAX, acMvTemp[i].hor ); + acMvTemp[i].ver = Clip3(MV_MIN, MV_MAX, acMvTemp[i].ver ); + acMvTemp[i].roundAffinePrecInternal2Amvr(pu.cu->imv); if( m_pcEncCfg->getMCTSEncConstraint() ) { MCTSHelper::clipMvToArea( acMvTemp[i], pu.cu->Y(), pu.cs->picture->mctsInfo.getTileAreaSubPelRestricted( pu ), *pu.cs->sps ); } else { - clipMv(acMvTemp[i], pu.cu->lumaPos(), - pu.cu->lumaSize(), - *pu.cs->sps); + clipMv( acMvTemp[i], pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps, *pu.cs->pps ); } } @@ -5190,7 +5616,7 @@ void InterSearch::xAffineMotionEstimation( PredictionUnit& pu, xPredAffineBlk( COMPONENT_Y, pu, refPic, acMvTemp, predBuf, false, pu.cu->slice->clpRng( COMPONENT_Y ) ); // get error - Distortion uiCostTemp = m_pcRdCost->getDistPart( predBuf.Y(), pBuf->Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, DF_HAD ); + Distortion uiCostTemp = m_pcRdCost->getDistPart(predBuf.Y(), pBuf->Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, distFunc); DTRACE( g_trace_ctx, D_COMMON, " (%d) uiCostTemp=%d\n", DTRACE_GET_COUNTER(g_trace_ctx,D_COMMON), uiCostTemp ); // get cost with mv @@ -5205,7 +5631,7 @@ void InterSearch::xAffineMotionEstimation( PredictionUnit& pu, } else { - uiBitsTemp += xCalcAffineMVBits( pu, acMvTemp, acMvPred, pu.cu->imv != 1 ); + uiBitsTemp += xCalcAffineMVBits( pu, acMvTemp, acMvPred ); } uiCostTemp = (Distortion)( floor( fWeight * (double)uiCostTemp ) + (double)m_pcRdCost->getCost( uiBitsTemp ) ); @@ -5223,11 +5649,11 @@ void InterSearch::xAffineMotionEstimation( PredictionUnit& pu, { xPredAffineBlk(COMPONENT_Y, pu, refPic, ctrlPtMv, predBuf, false, pu.cu->slice->clpRng(COMPONENT_Y)); // get error - Distortion costTemp = m_pcRdCost->getDistPart(predBuf.Y(), pBuf->Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, DF_HAD); + Distortion costTemp = m_pcRdCost->getDistPart(predBuf.Y(), pBuf->Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, distFunc); // get cost with mv m_pcRdCost->setCostScale(0); uint32_t bitsTemp = ruiBits; - bitsTemp += xCalcAffineMVBits( pu, ctrlPtMv, acMvPred, pu.cu->imv != 1 ); + bitsTemp += xCalcAffineMVBits( pu, ctrlPtMv, acMvPred ); costTemp = (Distortion)(floor(fWeight * (double)costTemp) + (double)m_pcRdCost->getCost(bitsTemp)); // store best cost and mv if (costTemp < uiCostBest) @@ -5238,63 +5664,12 @@ void InterSearch::xAffineMotionEstimation( PredictionUnit& pu, } }; + const uint32_t mvShiftTable[3] = {MV_PRECISION_INTERNAL - MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL - MV_PRECISION_INTERNAL, MV_PRECISION_INTERNAL - MV_PRECISION_INT}; + const uint32_t mvShift = mvShiftTable[pu.cu->imv]; if (uiCostBest <= AFFINE_ME_LIST_MVP_TH*m_hevcCost) { - //search 8 nearest neighbors; integer distance - int testPos[8][2] = { { -1, 0 },{ 0, -1 },{ 0, 1 },{ 1, 0 },{ -1, -1 },{ -1, 1 },{ 1, 1 },{ 1, -1 } }; - const uint32_t mvShift = pu.cu->imv == 1 ? 0 : ( pu.cu->imv == 2 ? ( MV_FRACTIONAL_BITS_DIFF << 1 ) : MV_FRACTIONAL_BITS_DIFF ); - const int maxSearchRound = 3; - - if ( m_pcEncCfg->getUseAffineAmvrEncOpt() && m_pcEncCfg->getIntraPeriod() != ( uint32_t ) -1 && pu.cu->imv ) - { - for ( int rnd = 0; rnd < ( pu.cu->slice->getTLayer() <= 2 ? maxSearchRound : maxSearchRound - 1 ); rnd++ ) - { - bool modelChange = false; - //search the model parameters with finear granularity; - for ( int j = 0; j < mvNum; j++ ) - { - for ( int iter = 0; iter < 2; iter++ ) - { - Mv centerMv[3]; - memcpy( centerMv, acMv, sizeof( Mv ) * 3 ); - memcpy( acMvTemp, acMv, sizeof( Mv ) * 3 ); - for ( int i = ( iter ? 0: 4 ); i < ( iter ? 4 : 8 ); i++ ) - { - acMvTemp[j].set( centerMv[j].getHor() + ( testPos[i][0] << mvShift ), centerMv[j].getVer() + ( testPos[i][1] << mvShift ) ); - - clipMv( acMvTemp[j], pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps ); - xPredAffineBlk( COMPONENT_Y, pu, refPic, acMvTemp, predBuf, false, pu.cu->slice->clpRng( COMPONENT_Y ) ); - - Distortion costTemp = m_pcRdCost->getDistPart( predBuf.Y(), pBuf->Y(), pu.cs->sps->getBitDepth( CHANNEL_TYPE_LUMA ), COMPONENT_Y, DF_HAD ); - uint32_t bitsTemp = ruiBits; - bitsTemp += xCalcAffineMVBits( pu, acMvTemp, acMvPred, pu.cu->imv != 1 ); - costTemp = ( Distortion ) ( floor( fWeight * ( double ) costTemp ) + ( double ) m_pcRdCost->getCost( bitsTemp ) ); - - if ( costTemp < uiCostBest ) - { - uiCostBest = costTemp; - uiBitsBest = bitsTemp; - ::memcpy( acMv, acMvTemp, sizeof( Mv ) * 3 ); - modelChange = true; - } - } - } - } - - if ( !modelChange ) - { - break; - } - } - } Mv mvPredTmp[3] = { acMvPred[0], acMvPred[1], acMvPred[2] }; - if ( pu.cu->imv != 1 ) - { - mvPredTmp[0].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); - mvPredTmp[1].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); - mvPredTmp[2].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); - } Mv mvME[3]; ::memcpy(mvME, acMv, sizeof(Mv) * 3); Mv dMv = mvME[0] - mvPredTmp[0]; @@ -5337,42 +5712,63 @@ void InterSearch::xAffineMotionEstimation( PredictionUnit& pu, checkCPMVRdCost(acMvTemp); } + // 8 nearest neighbor search + int testPos[8][2] = { { -1, 0 },{ 0, -1 },{ 0, 1 },{ 1, 0 },{ -1, -1 },{ -1, 1 },{ 1, 1 },{ 1, -1 } }; + const int maxSearchRound = (pu.cu->imv) ? 3 : ((m_pcEncCfg->getUseAffineAmvrEncOpt() && m_pcEncCfg->getIntraPeriod() == (uint32_t)-1) ? 2 : 3); + + for (int rnd = 0; rnd < maxSearchRound; rnd++) { - dMv = acMv[1] - acMv[0]; - if (pu.cu->affineType == AFFINEMODEL_4PARAM && (dMv.getAbsHor() > 4 || dMv.getAbsVer() > 4)) + bool modelChange = false; + //search the model parameters with finear granularity; + for (int j = 0; j < mvNum; j++) { - int testPos[4][2] = { { -1, 0 },{ 0, -1 },{ 0, 1 },{ 1, 0 } }; - Mv centerMv[3]; - const uint32_t mvShift = pu.cu->imv == 1 ? 0 : ( pu.cu->imv == 2 ? ( MV_FRACTIONAL_BITS_DIFF << 1 ) : MV_FRACTIONAL_BITS_DIFF ); - ::memcpy(centerMv, acMv, sizeof(Mv) * 3); - acMvTemp[0] = centerMv[0]; - for (int i = 0; i < 4; i++) + bool loopChange = false; + for (int iter = 0; iter < 2; iter++) { - acMvTemp[1].set( centerMv[1].getHor() + ( testPos[i][0] << mvShift ), centerMv[1].getVer() + ( testPos[i][1] << mvShift ) ); - checkCPMVRdCost(acMvTemp); + if (iter == 1 && !loopChange) + { + break; + } + Mv centerMv[3]; + memcpy(centerMv, acMv, sizeof(Mv) * 3); + memcpy(acMvTemp, acMv, sizeof(Mv) * 3); + + for (int i = ((iter == 0) ? 0 : 4); i < ((iter == 0) ? 4 : 8); i++) + { + acMvTemp[j].set(centerMv[j].getHor() + (testPos[i][0] << mvShift), centerMv[j].getVer() + (testPos[i][1] << mvShift)); + clipMv( acMvTemp[j], pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps, *pu.cs->pps ); + xPredAffineBlk(COMPONENT_Y, pu, refPic, acMvTemp, predBuf, false, pu.cu->slice->clpRng(COMPONENT_Y)); + + Distortion costTemp = m_pcRdCost->getDistPart(predBuf.Y(), pBuf->Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, distFunc); + uint32_t bitsTemp = ruiBits; + bitsTemp += xCalcAffineMVBits(pu, acMvTemp, acMvPred); + costTemp = (Distortion)(floor(fWeight * (double)costTemp) + (double)m_pcRdCost->getCost(bitsTemp)); + + if (costTemp < uiCostBest) + { + uiCostBest = costTemp; + uiBitsBest = bitsTemp; + ::memcpy(acMv, acMvTemp, sizeof(Mv) * 3); + modelChange = true; + loopChange = true; + } + } } } + + if (!modelChange) + { + break; + } } } - if ( pu.cu->imv != 1 ) - { - acMv[0].changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER); - acMv[1].changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER); - acMv[2].changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER); - } acMvPred[0] = aamvpi.mvCandLT[mvpIdx]; acMvPred[1] = aamvpi.mvCandRT[mvpIdx]; acMvPred[2] = aamvpi.mvCandLB[mvpIdx]; - // free buffer - for (int i = 0; i<iParaNum; i++) - delete[]pdEqualCoeff[i]; - delete[]pdEqualCoeff; - ruiBits = uiBitsBest; ruiCost = uiCostBest; DTRACE( g_trace_ctx, D_COMMON, " (%d) uiBitsBest=%d, uiCostBest=%d\n", DTRACE_GET_COUNTER(g_trace_ctx,D_COMMON), uiBitsBest, uiCostBest ); - } void InterSearch::xEstimateAffineAMVP( PredictionUnit& pu, @@ -5438,7 +5834,7 @@ void InterSearch::xCopyAffineAMVPInfo (AffineAMVPInfo& src, AffineAMVPInfo& dst) * \param pattern Reference picture ROI * \param biPred Flag indicating whether block is for biprediction */ -void InterSearch::xExtDIFUpSamplingH( CPelBuf* pattern ) +void InterSearch::xExtDIFUpSamplingH(CPelBuf* pattern, bool useAltHpelIf) { const ClpRng& clpRng = m_lumaClpRng; int width = pattern->width; @@ -5455,15 +5851,15 @@ void InterSearch::xExtDIFUpSamplingH( CPelBuf* pattern ) const ChromaFormat chFmt = m_currChromaFormat; - m_if.filterHor(COMPONENT_Y, srcPtr, srcStride, m_filteredBlockTmp[0][0], intStride, width + 1, height + filterSize, 0 << MV_FRACTIONAL_BITS_DIFF, false, chFmt, clpRng); + m_if.filterHor(COMPONENT_Y, srcPtr, srcStride, m_filteredBlockTmp[0][0], intStride, width + 1, height + filterSize, 0 << MV_FRACTIONAL_BITS_DIFF, false, chFmt, clpRng, 0, false, useAltHpelIf); if (!m_skipFracME) { - m_if.filterHor(COMPONENT_Y, srcPtr, srcStride, m_filteredBlockTmp[2][0], intStride, width + 1, height + filterSize, 2 << MV_FRACTIONAL_BITS_DIFF, false, chFmt, clpRng); + m_if.filterHor(COMPONENT_Y, srcPtr, srcStride, m_filteredBlockTmp[2][0], intStride, width + 1, height + filterSize, 2 << MV_FRACTIONAL_BITS_DIFF, false, chFmt, clpRng, 0, false, useAltHpelIf); } intPtr = m_filteredBlockTmp[0][0] + halfFilterSize * intStride + 1; dstPtr = m_filteredBlock[0][0][0]; - m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width + 0, height + 0, 0 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng); + m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width + 0, height + 0, 0 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, 0, false, useAltHpelIf); if (m_skipFracME) { return; @@ -5471,15 +5867,15 @@ void InterSearch::xExtDIFUpSamplingH( CPelBuf* pattern ) intPtr = m_filteredBlockTmp[0][0] + (halfFilterSize - 1) * intStride + 1; dstPtr = m_filteredBlock[2][0][0]; - m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width + 0, height + 1, 2 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng); + m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width + 0, height + 1, 2 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, 0, false, useAltHpelIf); intPtr = m_filteredBlockTmp[2][0] + halfFilterSize * intStride; dstPtr = m_filteredBlock[0][2][0]; - m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width + 1, height + 0, 0 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng); + m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width + 1, height + 0, 0 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, 0, false, useAltHpelIf); intPtr = m_filteredBlockTmp[2][0] + (halfFilterSize - 1) * intStride; dstPtr = m_filteredBlock[2][2][0]; - m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width + 1, height + 1, 2 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng); + m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width + 1, height + 1, 2 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, 0, false, useAltHpelIf); } @@ -5701,7 +6097,7 @@ void InterSearch::setWpScalingDistParam( int iRefIdx, RefPicList eRefPicListCur, void InterSearch::xEncodeInterResidualQT(CodingStructure &cs, Partitioner &partitioner, const ComponentID &compID) { const UnitArea& currArea = partitioner.currArea(); - const TransformUnit &currTU = *cs.getTU(currArea.lumaPos(), partitioner.chType); + const TransformUnit &currTU = *cs.getTU(isLuma(partitioner.chType) ? currArea.lumaPos() : currArea.chromaPos(), partitioner.chType); const CodingUnit &cu = *currTU.cu; const unsigned currDepth = partitioner.currTrDepth; @@ -5724,26 +6120,27 @@ void InterSearch::xEncodeInterResidualQT(CodingStructure &cs, Partitioner &parti CHECK(CU::isIntra(cu), "Inter search provided with intra CU"); - if( cu.chromaFormat != CHROMA_400 ) + if( cu.chromaFormat != CHROMA_400 + && (!cu.isSepTree() || isChroma(partitioner.chType)) + ) { - const bool firstCbfOfCU = ( currDepth == 0 ); { - if( firstCbfOfCU || TU::getCbfAtDepth( currTU, COMPONENT_Cb, currDepth - 1 ) ) { const bool chroma_cbf = TU::getCbfAtDepth( currTU, COMPONENT_Cb, currDepth ); - if( !( cu.sbtInfo && currDepth == 1 ) ) + if (!(cu.sbtInfo && (currDepth == 0 || (currDepth == 1 && currTU.noResidual)))) m_CABACEstimator->cbf_comp( cs, chroma_cbf, currArea.blocks[COMPONENT_Cb], currDepth ); } - if( firstCbfOfCU || TU::getCbfAtDepth( currTU, COMPONENT_Cr, currDepth - 1 ) ) { const bool chroma_cbf = TU::getCbfAtDepth( currTU, COMPONENT_Cr, currDepth ); - if( !( cu.sbtInfo && currDepth == 1 ) ) + if (!(cu.sbtInfo && (currDepth == 0 || (currDepth == 1 && currTU.noResidual)))) m_CABACEstimator->cbf_comp( cs, chroma_cbf, currArea.blocks[COMPONENT_Cr], currDepth, TU::getCbfAtDepth( currTU, COMPONENT_Cb, currDepth ) ); } } } - if( !bSubdiv && !( cu.sbtInfo && currTU.noResidual ) ) + if( !bSubdiv && !( cu.sbtInfo && currTU.noResidual ) + && !isChroma(partitioner.chType) + ) { m_CABACEstimator->cbf_comp( cs, TU::getCbfAtDepth( currTU, COMPONENT_Y, currDepth ), currArea.Y(), currDepth ); } @@ -5755,6 +6152,11 @@ void InterSearch::xEncodeInterResidualQT(CodingStructure &cs, Partitioner &parti { if( currArea.blocks[compID].valid() ) { + if( compID == COMPONENT_Cr ) + { + const int cbfMask = ( TU::getCbf( currTU, COMPONENT_Cb ) ? 2 : 0) + ( TU::getCbf( currTU, COMPONENT_Cr ) ? 1 : 0 ); + m_CABACEstimator->joint_cb_cr( currTU, cbfMask ); + } if( TU::hasCrossCompPredInfo( currTU, compID ) ) { m_CABACEstimator->cross_comp_pred( currTU, compID ); @@ -6038,14 +6440,18 @@ uint8_t InterSearch::skipSbtByRDCost( int width, int height, int mtDepth, uint8_ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &partitioner, Distortion *puiZeroDist /*= NULL*/ , const bool luma, const bool chroma + , PelUnitBuf* orgResi ) { const UnitArea& currArea = partitioner.currArea(); const SPS &sps = *cs.sps; + m_pcRdCost->setChromaFormat(sps.getChromaFormatIdc()); + const uint32_t numValidComp = getNumberValidComponents( sps.getChromaFormatIdc() ); const uint32_t numTBlocks = getNumberValidTBlocks ( *cs.pcv ); const CodingUnit &cu = *cs.getCU(partitioner.chType); const unsigned currDepth = partitioner.currTrDepth; + const bool colorTransFlag = cs.cus[0]->colorTransform; bool bCheckFull = !partitioner.canSplit( TU_MAX_TR_SPLIT, cs ); if( cu.sbtInfo && partitioner.canSplit( PartSplit( cu.getSbtTuSplit() ), cs ) ) @@ -6068,6 +6474,7 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par Distortion uiSingleDist = 0; Distortion uiSingleDistComp [3] = { 0, 0, 0 }; + uint64_t uiSingleFracBits[3] = { 0, 0, 0 }; TCoeff uiAbsSum [3] = { 0, 0, 0 }; const TempCtx ctxStart ( m_CtxCache, m_CABACEstimator->getCtx() ); @@ -6075,23 +6482,19 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par if (bCheckFull) { - TransformUnit &tu = csFull->addTU(CS::isDualITree(cs) ? cu : currArea, partitioner.chType); + TransformUnit &tu = csFull->addTU(CS::getArea(cs, currArea, partitioner.chType), partitioner.chType); tu.depth = currDepth; - tu.mtsIdx = 0; + for (int i = 0; i<MAX_NUM_TBLOCKS; i++) tu.mtsIdx[i] = MTS_DCT2_DCT2; tu.checkTuNoResidual( partitioner.currPartIdx() ); + Position tuPos = tu.Y(); + tuPos.relativeTo(cu.Y()); + const UnitArea relativeUnitArea(tu.chromaFormat, Area(tuPos, tu.Y().size())); const Slice &slice = *cs.slice; - if (slice.getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag() && slice.getReshapeInfo().getSliceReshapeChromaAdj()) + if (slice.getPicHeader()->getLmcsEnabledFlag() && slice.getPicHeader()->getLmcsChromaResidualScaleFlag() && !(CS::isDualITree(cs) && slice.isIntra() && tu.cu->predMode==MODE_IBC )) { const CompArea &areaY = tu.blocks[COMPONENT_Y]; - PelBuf piPredY = cs.getPredBuf(areaY); - CompArea tmpArea(COMPONENT_Y, areaY.chromaFormat, Position(0, 0), areaY.size()); - PelBuf tmpPred = m_tmpStorageLCU.getBuf(tmpArea); - tmpPred.copyFrom(piPredY); - if (!cu.firstPU->mhIntraFlag && !CU::isIBC(cu)) - tmpPred.rspSignal(m_pcReshape->getFwdLUT()); - const Pel avgLuma = tmpPred.computeAvg(); - int adj = m_pcReshape->calculateChromaAdj(avgLuma); + int adj = m_pcReshape->calculateChromaAdjVpduNei(tu, areaY); tu.setChromaAdj(adj); } @@ -6111,7 +6514,7 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par saveCS.picture = cs.picture; saveCS.area.repositionTo(currArea); saveCS.clearTUs(); - TransformUnit & bestTU = saveCS.addTU(CS::isDualITree(cs) ? cu : currArea, partitioner.chType); + TransformUnit & bestTU = saveCS.addTU(CS::getArea(cs, currArea, partitioner.chType), partitioner.chType); for( uint32_t c = 0; c < numTBlocks; c++ ) { @@ -6140,8 +6543,9 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par preCalcAlpha = xCalcCrossComponentPredictionAlpha( tu, compID, m_pcEncCfg->getUseReconBasedCrossCPredictionEstimate() ); } - const bool tsAllowed = TU::isTSAllowed ( tu, compID ); - const bool mtsAllowed = TU::isMTSAllowed( tu, compID ); + const bool tsAllowed = TU::isTSAllowed(tu, compID) && (isLuma(compID) || (isChroma(compID) && m_pcEncCfg->getUseChromaTS())); + const bool mtsAllowed = CU::isMTSAllowed( *tu.cu, compID ); + uint8_t nNumTransformCands = 1 + ( tsAllowed ? 1 : 0 ) + ( mtsAllowed ? 4 : 0 ); // DCT + TS + 4 MTS = 6 tests std::vector<TrMode> trModes; trModes.push_back( TrMode( 0, true ) ); //DCT2 @@ -6174,6 +6578,13 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par #endif } } + + if (colorTransFlag) + { + m_pcTrQuant->lambdaAdjustColorTrans(true); + m_pcRdCost->lambdaAdjustColorTrans(true, compID); + } + const int crossCPredictionModesToTest = preCalcAlpha != 0 ? 2 : 1; const int numTransformCandidates = nNumTransformCands; const bool isOneMode = crossCPredictionModesToTest == 1 && numTransformCandidates == 1; @@ -6193,9 +6604,12 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par m_CABACEstimator->getCtx() = ctxStart; m_CABACEstimator->resetBits(); - if( isLuma( compID ) ) { - if( bestTU.mtsIdx == 1 && m_pcEncCfg->getUseTransformSkipFast() ) +#if JVET_AHG14_LOSSLESS + if( !( m_pcEncCfg->getCostMode() == COST_LOSSLESS_CODING ) ) + { +#endif + if (bestTU.mtsIdx[compID] == MTS_SKIP && m_pcEncCfg->getUseTransformSkipFast()) { continue; } @@ -6203,20 +6617,37 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par { continue; } - tu.mtsIdx = trModes[transformMode].first; +#if JVET_AHG14_LOSSLESS + } +#endif + tu.mtsIdx[compID] = trModes[transformMode].first; } tu.compAlpha[compID] = bUseCrossCPrediction ? preCalcAlpha : 0; - const QpParam cQP(tu, compID); // note: uses tu.transformSkip[compID] + QpParam cQP(tu, compID); // note: uses tu.transformSkip[compID] + if (colorTransFlag) + { + for (int qpIdx = 0; qpIdx < 2; qpIdx++) + { + cQP.Qps[qpIdx] = cQP.Qps[qpIdx] + (compID == COMPONENT_Cr ? DELTA_QP_FOR_Co : DELTA_QP_FOR_Y_Cg); + cQP.pers[qpIdx] = cQP.Qps[qpIdx] / 6; + cQP.rems[qpIdx] = cQP.Qps[qpIdx] % 6; + } + } #if RDOQ_CHROMA_LAMBDA m_pcTrQuant->selectLambda(compID); #endif - if (slice.getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag() && isChroma(compID) && slice.getReshapeInfo().getSliceReshapeChromaAdj()) + if (slice.getPicHeader()->getLmcsEnabledFlag() && isChroma(compID) && slice.getPicHeader()->getLmcsChromaResidualScaleFlag()) { - double cRescale = round((double)(1 << CSCALE_FP_PREC) / (double)(tu.getChromaAdj())); + double cRescale = (double)(1 << CSCALE_FP_PREC) / (double)(tu.getChromaAdj()); m_pcTrQuant->setLambda(m_pcTrQuant->getLambda() / (cRescale*cRescale)); } + if ( sps.getJointCbCrEnabledFlag() && isChroma( compID ) && ( tu.cu->cs->slice->getSliceQp() > 18 ) ) + { + m_pcTrQuant->setLambda( 1.05 * m_pcTrQuant->getLambda() ); + } + TCoeff currAbsSum = 0; uint64_t currCompFracBits = 0; Distortion currCompDist = 0; @@ -6230,7 +6661,7 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par PelBuf resiBuf = csFull->getResiBuf( compArea ); crossComponentPrediction( tu, compID, lumaResi, resiBuf, resiBuf, false ); } - if (slice.getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag() && isChroma(compID) && slice.getReshapeInfo().getSliceReshapeChromaAdj() && tu.blocks[compID].width*tu.blocks[compID].height > 4 ) + if (slice.getPicHeader()->getLmcsEnabledFlag() && isChroma(compID) && slice.getPicHeader()->getLmcsChromaResidualScaleFlag() && tu.blocks[compID].width*tu.blocks[compID].height > 4) { PelBuf resiBuf = csFull->getResiBuf(compArea); resiBuf.scaleSignal(tu.getChromaAdj(), 1, tu.cu->cs->slice->clpRng(compID)); @@ -6239,10 +6670,17 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par { if( transformMode == 0 ) { - m_pcTrQuant->transformNxN( tu, compID, cQP, &trModes, CU::isIntra( *tu.cu ) ? m_pcEncCfg->getIntraMTSMaxCand() : m_pcEncCfg->getInterMTSMaxCand() ); - tu.mtsIdx = trModes[0].first; + m_pcTrQuant->transformNxN( tu, compID, cQP, &trModes, m_pcEncCfg->getMTSInterMaxCand() ); + tu.mtsIdx[compID] = trModes[0].first; + } +#if JVET_AHG14_LOSSLESS + if( !( m_pcEncCfg->getCostMode() == COST_LOSSLESS_CODING && tu.mtsIdx[compID] == 0 ) ) + { + m_pcTrQuant->transformNxN( tu, compID, cQP, currAbsSum, m_CABACEstimator->getCtx(), true ); } +#else m_pcTrQuant->transformNxN( tu, compID, cQP, currAbsSum, m_CABACEstimator->getCtx(), true ); +#endif } else { @@ -6284,7 +6722,14 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par } else #endif - nonCoeffCost = m_pcRdCost->calcRdCost(nonCoeffFracBits, nonCoeffDist); + if (cs.slice->getSPS()->getUseColorTrans()) + { + nonCoeffCost = m_pcRdCost->calcRdCost(nonCoeffFracBits, nonCoeffDist, false); + } + else + { + nonCoeffCost = m_pcRdCost->calcRdCost(nonCoeffFracBits, nonCoeffDist); + } } if ((puiZeroDist != NULL) && isFirstMode) @@ -6292,6 +6737,13 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par *puiZeroDist += nonCoeffDist; // initialized with zero residual distortion } +#if JVET_AHG14_LOSSLESS + if( m_pcEncCfg->getCostMode() == COST_LOSSLESS_CODING && tu.mtsIdx[compID] == 0 ) + { + currAbsSum = 0; + } +#endif + if (currAbsSum > 0) //if non-zero coefficients are present, a residual needs to be derived for further prediction { if (isFirstMode) @@ -6302,6 +6754,11 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par const bool prevCbf = ( compID == COMPONENT_Cr ? tu.cbf[COMPONENT_Cb] : false ); m_CABACEstimator->cbf_comp( *csFull, true, compArea, currDepth, prevCbf ); + if( compID == COMPONENT_Cr ) + { + const int cbfMask = ( tu.cbf[COMPONENT_Cb] ? 2 : 0 ) + 1; + m_CABACEstimator->joint_cb_cr( tu, cbfMask ); + } if( isCrossCPredictionAvailable ) { @@ -6315,7 +6772,7 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par CPelBuf orgResiBuf = csFull->getOrgResiBuf(compArea); m_pcTrQuant->invTransformNxN(tu, compID, resiBuf, cQP); - if (slice.getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag() && isChroma(compID) && slice.getReshapeInfo().getSliceReshapeChromaAdj() && tu.blocks[compID].width*tu.blocks[compID].height > 4 ) + if (slice.getPicHeader()->getLmcsEnabledFlag() && isChroma(compID) && slice.getPicHeader()->getLmcsChromaResidualScaleFlag() && tu.blocks[compID].width*tu.blocks[compID].height > 4) { resiBuf.scaleSignal(tu.getChromaAdj(), 0, tu.cu->cs->slice->clpRng(compID)); } @@ -6332,11 +6789,6 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par #else currCompCost = m_pcRdCost->calcRdCost(currCompFracBits, currCompDist); #endif - - if (csFull->isLossless) - { - nonCoeffCost = MAX_DOUBLE; - } } else if( transformMode > 0 && !bUseCrossCPrediction ) { @@ -6369,6 +6821,7 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par uiAbsSum[compID] = currAbsSum; uiSingleDistComp[compID] = currCompDist; + uiSingleFracBits[compID] = currCompFracBits; minCost[compID] = currCompCost; if (uiAbsSum[compID] == 0) @@ -6382,7 +6835,7 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par } } - if( !isLastMode ) + if( !isLastMode || (compID != COMPONENT_Y && !tu.noResidual) ) { bestTU.copyComponentFrom( tu, compID ); saveCS.getResiBuf( compArea ).copyFrom( csFull->getResiBuf( compArea ) ); @@ -6403,8 +6856,227 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par tu.copyComponentFrom( bestTU, compID ); csFull->getResiBuf( compArea ).copyFrom( saveCS.getResiBuf( compArea ) ); } + + if (colorTransFlag) + { + m_pcTrQuant->lambdaAdjustColorTrans(false); + m_pcRdCost->lambdaAdjustColorTrans(false, compID); + } + } // component loop + if (colorTransFlag) + { + PelUnitBuf orgResidual = orgResi->subBuf(relativeUnitArea); + PelUnitBuf invColorTransResidual = m_colorTransResiBuf[2].getBuf(relativeUnitArea); + csFull->getResiBuf(currArea).colorSpaceConvert(invColorTransResidual, false); + + for (uint32_t c = 0; c < numTBlocks; c++) + { + const ComponentID compID = (ComponentID)c; + uiSingleDistComp[c] = m_pcRdCost->getDistPart(orgResidual.bufs[c], invColorTransResidual.bufs[c], sps.getBitDepth(toChannelType(compID)), compID, DF_SSE); + minCost[c] = m_pcRdCost->calcRdCost(uiSingleFracBits[c], uiSingleDistComp[c]); + } + } + + if ( chroma && tu.blocks[COMPONENT_Cb].valid() ) + { + const CompArea& cbArea = tu.blocks[COMPONENT_Cb]; + const CompArea& crArea = tu.blocks[COMPONENT_Cr]; + bool checkJointCbCr = (sps.getJointCbCrEnabledFlag()) && (!tu.noResidual) && (TU::getCbf(tu, COMPONENT_Cb) || TU::getCbf(tu, COMPONENT_Cr)); + const int channelBitDepth = sps.getBitDepth(toChannelType(COMPONENT_Cb)); + bool reshape = slice.getPicHeader()->getLmcsEnabledFlag() && slice.getPicHeader()->getLmcsChromaResidualScaleFlag() + && tu.blocks[COMPONENT_Cb].width * tu.blocks[COMPONENT_Cb].height > 4; + double minCostCbCr = minCost[COMPONENT_Cb] + minCost[COMPONENT_Cr]; + if (colorTransFlag) + { + minCostCbCr += minCost[COMPONENT_Y]; // ACT should consider three-component cost + } + bool isLastBest = false; + + CompStorage orgResiCb[4], orgResiCr[4]; // 0:std, 1-3:jointCbCr + std::vector<int> jointCbfMasksToTest; + if ( checkJointCbCr ) + { + orgResiCb[0].create(cbArea); + orgResiCr[0].create(crArea); + orgResiCb[0].copyFrom(cs.getOrgResiBuf(cbArea)); + orgResiCr[0].copyFrom(cs.getOrgResiBuf(crArea)); + if (reshape) + { + orgResiCb[0].scaleSignal(tu.getChromaAdj(), 1, tu.cu->cs->slice->clpRng(COMPONENT_Cb)); + orgResiCr[0].scaleSignal(tu.getChromaAdj(), 1, tu.cu->cs->slice->clpRng(COMPONENT_Cr)); + } + jointCbfMasksToTest = m_pcTrQuant->selectICTCandidates(tu, orgResiCb, orgResiCr); + } + + for (int cbfMask: jointCbfMasksToTest) + { + TCoeff currAbsSum = 0; + uint64_t currCompFracBits = 0; + Distortion currCompDistCb = 0; + Distortion currCompDistCr = 0; + double currCompCost = 0; + + tu.jointCbCr = (uint8_t) cbfMask; + tu.compAlpha[COMPONENT_Cb] = tu.compAlpha[COMPONENT_Cr] = 0; + // encoder bugfix: initialize mtsIdx for chroma under JointCbCrMode. + tu.mtsIdx[COMPONENT_Cb] = tu.mtsIdx[COMPONENT_Cr] = MTS_DCT2_DCT2; + int codedCbfMask = 0; + ComponentID codeCompId = (tu.jointCbCr >> 1 ? COMPONENT_Cb : COMPONENT_Cr); + ComponentID otherCompId = (codeCompId == COMPONENT_Cr ? COMPONENT_Cb : COMPONENT_Cr); + + if (colorTransFlag) + { + m_pcTrQuant->lambdaAdjustColorTrans(true); + m_pcTrQuant->selectLambda(codeCompId); + } + else + { + m_pcTrQuant->selectLambda(codeCompId); + } + // Lambda is loosened for the joint mode with respect to single modes as the same residual is used for both chroma blocks + const int absIct = abs( TU::getICTMode(tu) ); + const double lfact = ( absIct == 1 || absIct == 3 ? 0.8 : 0.5 ); + m_pcTrQuant->setLambda( lfact * m_pcTrQuant->getLambda() ); + if ( checkJointCbCr && (tu.cu->cs->slice->getSliceQp() > 18)) + { + m_pcTrQuant->setLambda( 1.05 * m_pcTrQuant->getLambda() ); + } + + m_CABACEstimator->getCtx() = ctxStart; + m_CABACEstimator->resetBits(); + + PelBuf cbResi = csFull->getResiBuf(cbArea); + PelBuf crResi = csFull->getResiBuf(crArea); + cbResi.copyFrom(orgResiCb[cbfMask]); + crResi.copyFrom(orgResiCr[cbfMask]); + + if ( reshape ) + { + double cRescale = (double)(1 << CSCALE_FP_PREC) / (double)(tu.getChromaAdj()); + m_pcTrQuant->setLambda(m_pcTrQuant->getLambda() / (cRescale*cRescale)); + } + + Distortion currCompDistY = MAX_UINT64; + QpParam qpCbCr(tu, codeCompId); + if (colorTransFlag) + { + for (int qpIdx = 0; qpIdx < 2; qpIdx++) + { + qpCbCr.Qps[qpIdx] = qpCbCr.Qps[qpIdx] + (codeCompId == COMPONENT_Cr ? DELTA_QP_FOR_Co : DELTA_QP_FOR_Y_Cg); + qpCbCr.pers[qpIdx] = qpCbCr.Qps[qpIdx] / 6; + qpCbCr.rems[qpIdx] = qpCbCr.Qps[qpIdx] % 6; + } + } + + tu.getCoeffs(otherCompId).fill(0); // do we need that? + TU::setCbfAtDepth(tu, otherCompId, tu.depth, false); + + PelBuf &codeResi = (codeCompId == COMPONENT_Cr ? crResi : cbResi); + TCoeff compAbsSum = 0; + m_pcTrQuant->transformNxN(tu, codeCompId, qpCbCr, compAbsSum, m_CABACEstimator->getCtx()); + if (compAbsSum > 0) + { + m_pcTrQuant->invTransformNxN(tu, codeCompId, codeResi, qpCbCr); + codedCbfMask += (codeCompId == COMPONENT_Cb ? 2 : 1); + } + else + { + codeResi.fill(0); + } + + if (tu.jointCbCr == 3 && codedCbfMask == 2) + { + codedCbfMask = 3; + TU::setCbfAtDepth(tu, COMPONENT_Cr, tu.depth, true); + } + if (codedCbfMask && tu.jointCbCr != codedCbfMask) + { + codedCbfMask = 0; + } + currAbsSum = codedCbfMask; + + if (currAbsSum > 0) + { + m_CABACEstimator->cbf_comp(cs, codedCbfMask >> 1, cbArea, currDepth, false); + m_CABACEstimator->cbf_comp(cs, codedCbfMask & 1, crArea, currDepth, codedCbfMask >> 1); + m_CABACEstimator->joint_cb_cr(tu, codedCbfMask); + if (codedCbfMask >> 1) + m_CABACEstimator->residual_coding(tu, COMPONENT_Cb); + if (codedCbfMask & 1) + m_CABACEstimator->residual_coding(tu, COMPONENT_Cr); + currCompFracBits = m_CABACEstimator->getEstFracBits(); + + m_pcTrQuant->invTransformICT(tu, cbResi, crResi); + if (reshape) + { + cbResi.scaleSignal(tu.getChromaAdj(), 0, tu.cu->cs->slice->clpRng(COMPONENT_Cb)); + crResi.scaleSignal(tu.getChromaAdj(), 0, tu.cu->cs->slice->clpRng(COMPONENT_Cr)); + } + + if (colorTransFlag) + { + PelUnitBuf orgResidual = orgResi->subBuf(relativeUnitArea); + PelUnitBuf invColorTransResidual = m_colorTransResiBuf[2].getBuf(relativeUnitArea); + csFull->getResiBuf(currArea).colorSpaceConvert(invColorTransResidual, false); + + currCompDistY = m_pcRdCost->getDistPart(orgResidual.bufs[COMPONENT_Y], invColorTransResidual.bufs[COMPONENT_Y], sps.getBitDepth(toChannelType(COMPONENT_Y)), COMPONENT_Y, DF_SSE); + currCompDistCb = m_pcRdCost->getDistPart(orgResidual.bufs[COMPONENT_Cb], invColorTransResidual.bufs[COMPONENT_Cb], sps.getBitDepth(toChannelType(COMPONENT_Cb)), COMPONENT_Cb, DF_SSE); + currCompDistCr = m_pcRdCost->getDistPart(orgResidual.bufs[COMPONENT_Cr], invColorTransResidual.bufs[COMPONENT_Cr], sps.getBitDepth(toChannelType(COMPONENT_Cr)), COMPONENT_Cr, DF_SSE); + currCompCost = m_pcRdCost->calcRdCost(uiSingleFracBits[COMPONENT_Y] + currCompFracBits, currCompDistY + currCompDistCr + currCompDistCb, false); + } + else + { + currCompDistCb = m_pcRdCost->getDistPart(csFull->getOrgResiBuf(cbArea), cbResi, channelBitDepth, COMPONENT_Cb, DF_SSE); + currCompDistCr = m_pcRdCost->getDistPart(csFull->getOrgResiBuf(crArea), crResi, channelBitDepth, COMPONENT_Cr, DF_SSE); +#if WCG_EXT + currCompCost = m_pcRdCost->calcRdCost(currCompFracBits, currCompDistCr + currCompDistCb, false); +#else + currCompCost = m_pcRdCost->calcRdCost(currCompFracBits, currCompDistCr + currCompDistCb); +#endif + } + } + else + currCompCost = MAX_DOUBLE; + + // evaluate + if( currCompCost < minCostCbCr ) + { + uiAbsSum[COMPONENT_Cb] = currAbsSum; + uiAbsSum[COMPONENT_Cr] = currAbsSum; + uiSingleDistComp[COMPONENT_Cb] = currCompDistCb; + uiSingleDistComp[COMPONENT_Cr] = currCompDistCr; + if (colorTransFlag) + { + uiSingleDistComp[COMPONENT_Y] = currCompDistY; + } + minCostCbCr = currCompCost; + isLastBest = (cbfMask == jointCbfMasksToTest.back()); + if (!isLastBest) + { + bestTU.copyComponentFrom(tu, COMPONENT_Cb); + bestTU.copyComponentFrom(tu, COMPONENT_Cr); + saveCS.getResiBuf(cbArea).copyFrom(csFull->getResiBuf(cbArea)); + saveCS.getResiBuf(crArea).copyFrom(csFull->getResiBuf(crArea)); + } + } + + if( !isLastBest ) + { + // copy component + tu.copyComponentFrom( bestTU, COMPONENT_Cb ); + tu.copyComponentFrom( bestTU, COMPONENT_Cr ); + csFull->getResiBuf( cbArea ).copyFrom( saveCS.getResiBuf( cbArea ) ); + csFull->getResiBuf( crArea ).copyFrom( saveCS.getResiBuf( crArea ) ); + } + if (colorTransFlag) + { + m_pcTrQuant->lambdaAdjustColorTrans(false); + } + } + } + m_CABACEstimator->getCtx() = ctxStart; m_CABACEstimator->resetBits(); if( !tu.noResidual ) @@ -6434,6 +7106,11 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par continue; if (tu.blocks[compID].valid()) { + if( compID == COMPONENT_Cr ) + { + const int cbfMask = ( TU::getCbf( tu, COMPONENT_Cb ) ? 2 : 0 ) + ( TU::getCbf( tu, COMPONENT_Cr ) ? 1 : 0 ); + m_CABACEstimator->joint_cb_cr(tu, cbfMask); + } if( cs.pps->getPpsRangeExtension().getCrossComponentPredictionEnabledFlag() && isChroma(compID) && uiAbsSum[COMPONENT_Y] ) { m_CABACEstimator->cross_comp_pred( tu, compID ); @@ -6485,6 +7162,7 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par { xEstimateInterResidualQT(*csSplit, partitioner, bCheckFull ? nullptr : puiZeroDist , luma, chroma + , orgResi ); csSplit->cost = m_pcRdCost->calcRdCost( csSplit->fracBits, csSplit->dist ); @@ -6563,22 +7241,33 @@ void InterSearch::encodeResAndCalcRdInterCU(CodingStructure &cs, Partitioner &pa , const bool luma, const bool chroma ) { + m_pcRdCost->setChromaFormat(cs.sps->getChromaFormatIdc()); + CodingUnit &cu = *cs.getCU( partitioner.chType ); + if( cu.predMode == MODE_INTER ) + CHECK( cu.isSepTree(), "CU with Inter mode must be in single tree" ); const ChromaFormat format = cs.area.chromaFormat;; const int numValidComponents = getNumberValidComponents(format); const SPS &sps = *cs.sps; - const PPS &pps = *cs.pps; + + bool colorTransAllowed = cs.slice->getSPS()->getUseColorTrans() && luma && chroma; + if (cs.slice->getSPS()->getUseColorTrans()) + { + CHECK(cu.treeType != TREE_D || partitioner.treeType != TREE_D, "localtree should not be applied when adaptive color transform is enabled"); + CHECK(cu.modeType != MODE_TYPE_ALL || partitioner.modeType != MODE_TYPE_ALL, "localtree should not be applied when adaptive color transform is enabled"); + } if( skipResidual ) // No residual coding : SKIP mode { cu.skip = true; cu.rootCbf = false; + cu.colorTransform = false; CHECK( cu.sbtInfo != 0, "sbtInfo shall be 0 if CU has no residual" ); cs.getResiBuf().fill(0); { cs.getRecoBuf().copyFrom(cs.getPredBuf() ); - if (m_pcEncCfg->getReshaper() && (cs.slice->getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag()) && !cu.firstPU->mhIntraFlag && !CU::isIBC(cu)) + if (m_pcEncCfg->getLmcs() && (cs.picHeader->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag()) && !cu.firstPU->ciipFlag && !CU::isIBC(cu)) { cs.getRecoBuf().Y().rspSignal(m_pcReshape->getFwdLUT()); } @@ -6586,7 +7275,7 @@ void InterSearch::encodeResAndCalcRdInterCU(CodingStructure &cs, Partitioner &pa // add an empty TU - cs.addTU(CS::isDualITree(cs) ? cu : cs.area, partitioner.chType); + cs.addTU(CS::getArea(cs, cs.area, partitioner.chType), partitioner.chType); Distortion distortion = 0; for (int comp = 0; comp < numValidComponents; comp++) @@ -6600,7 +7289,7 @@ void InterSearch::encodeResAndCalcRdInterCU(CodingStructure &cs, Partitioner &pa CPelBuf org = cs.getOrgBuf (compID); #if WCG_EXT if (m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled() || ( - m_pcEncCfg->getReshaper() && (cs.slice->getReshapeInfo().getUseSliceReshaper()&& m_pcReshape->getCTUFlag()))) + m_pcEncCfg->getLmcs() && (cs.picHeader->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag())) ) { const CPelBuf orgLuma = cs.getOrgBuf( cs.area.blocks[COMPONENT_Y] ); if (compID == COMPONENT_Y && !(m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled())) @@ -6622,29 +7311,10 @@ void InterSearch::encodeResAndCalcRdInterCU(CodingStructure &cs, Partitioner &pa m_CABACEstimator->resetBits(); - if( pps.getTransquantBypassEnabledFlag() ) - { - m_CABACEstimator->cu_transquant_bypass_flag( cu ); - } - PredictionUnit &pu = *cs.getPU( partitioner.chType ); m_CABACEstimator->cu_skip_flag ( cu ); - if (CU::isIBC(cu)) - { - m_CABACEstimator->merge_idx(pu); - } - else - { - m_CABACEstimator->subblock_merge_flag( cu ); - m_CABACEstimator->triangle_mode ( cu ); - if (cu.mmvdSkip) - { - m_CABACEstimator->mmvd_merge_idx(pu); - } - else - m_CABACEstimator->merge_idx ( pu ); - } + m_CABACEstimator->merge_data(pu); cs.dist = distortion; cs.fracBits = m_CABACEstimator->getEstFracBits(); @@ -6657,14 +7327,14 @@ void InterSearch::encodeResAndCalcRdInterCU(CodingStructure &cs, Partitioner &pa if (luma) { cs.getResiBuf().bufs[0].copyFrom(cs.getOrgBuf().bufs[0]); - if (cs.slice->getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag()) + if (cs.picHeader->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag()) { const CompArea &areaY = cu.Y(); CompArea tmpArea(COMPONENT_Y, areaY.chromaFormat, Position(0, 0), areaY.size()); PelBuf tmpPred = m_tmpStorageLCU.getBuf(tmpArea); tmpPred.copyFrom(cs.getPredBuf(COMPONENT_Y)); - if (!cu.firstPU->mhIntraFlag && !CU::isIBC(cu)) + if (!cu.firstPU->ciipFlag && !CU::isIBC(cu)) tmpPred.rspSignal(m_pcReshape->getFwdLUT()); cs.getResiBuf(COMPONENT_Y).rspSignal(m_pcReshape->getFwdLUT()); cs.getResiBuf(COMPONENT_Y).subtract(tmpPred); @@ -6679,20 +7349,133 @@ void InterSearch::encodeResAndCalcRdInterCU(CodingStructure &cs, Partitioner &pa cs.getResiBuf().bufs[1].subtract(cs.getPredBuf().bufs[1]); cs.getResiBuf().bufs[2].subtract(cs.getPredBuf().bufs[2]); } - Distortion zeroDistortion = 0; + const UnitArea curUnitArea = partitioner.currArea(); + CodingStructure &saveCS = *m_pSaveCS[1]; + saveCS.pcv = cs.pcv; + saveCS.picture = cs.picture; + saveCS.area.repositionTo(curUnitArea); + saveCS.clearCUs(); + saveCS.clearPUs(); + saveCS.clearTUs(); + for (const auto &ppcu : cs.cus) + { + CodingUnit &pcu = saveCS.addCU(*ppcu, ppcu->chType); + pcu = *ppcu; + } + for (const auto &ppu : cs.pus) + { + PredictionUnit &pu = saveCS.addPU(*ppu, ppu->chType); + pu = *ppu; + } + + PelUnitBuf orgResidual, colorTransResidual; + const UnitArea localUnitArea(cs.area.chromaFormat, Area(0, 0, cu.Y().width, cu.Y().height)); + orgResidual = m_colorTransResiBuf[0].getBuf(localUnitArea); + colorTransResidual = m_colorTransResiBuf[1].getBuf(localUnitArea); + orgResidual.copyFrom(cs.getResiBuf()); + if (colorTransAllowed) + { + cs.getResiBuf().colorSpaceConvert(colorTransResidual, true); + } + + const TempCtx ctxStart(m_CtxCache, m_CABACEstimator->getCtx()); + int numAllowedColorSpace = (colorTransAllowed ? 2 : 1); + Distortion zeroDistortion = 0; - const TempCtx ctxStart( m_CtxCache, m_CABACEstimator->getCtx() ); + double bestCost = MAX_DOUBLE; + bool bestColorTrans = false; + bool bestRootCbf = false; + uint8_t bestsbtInfo = 0; + uint8_t orgSbtInfo = cu.sbtInfo; + int bestIter = 0; + auto blkCache = dynamic_cast<CacheBlkInfoCtrl*>(m_modeCtrl); + bool rootCbfFirstColorSpace = true; + + for (int iter = 0; iter < numAllowedColorSpace; iter++) + { + if (colorTransAllowed && !m_pcEncCfg->getRGBFormatFlag() && iter) + { + continue; + } + char colorSpaceOption = blkCache->getSelectColorSpaceOption(cu); + if (colorTransAllowed) + { + if (colorSpaceOption) + { + CHECK(colorSpaceOption > 2 || colorSpaceOption < 0, "invalid color space selection option"); + if (colorSpaceOption == 1 && iter) + { + continue; + } + if (colorSpaceOption == 2 && !iter) + { + continue; + } + } + } + if (!colorSpaceOption) + { + if (iter && !rootCbfFirstColorSpace) + { + continue; + } + if (colorTransAllowed && cs.bestParent && cs.bestParent->tmpColorSpaceCost != MAX_DOUBLE) + { + if (cs.bestParent->firstColorSpaceSelected && iter) + { + continue; + } + if (m_pcEncCfg->getRGBFormatFlag()) + { + if (!cs.bestParent->firstColorSpaceSelected && !iter) + { + continue; + } + } + } + } + bool colorTransFlag = (colorTransAllowed && m_pcEncCfg->getRGBFormatFlag()) ? (1 - iter) : iter; + cu.colorTransform = colorTransFlag; + cu.sbtInfo = orgSbtInfo; + + m_CABACEstimator->resetBits(); + m_CABACEstimator->getCtx() = ctxStart; + cs.clearTUs(); + cs.fracBits = 0; + cs.dist = 0; + cs.cost = 0; + + if (colorTransFlag) + { + cs.getOrgResiBuf().bufs[0].copyFrom(colorTransResidual.bufs[0]); + cs.getOrgResiBuf().bufs[1].copyFrom(colorTransResidual.bufs[1]); + cs.getOrgResiBuf().bufs[2].copyFrom(colorTransResidual.bufs[2]); + + memset(m_pTempPel, 0, sizeof(Pel) * localUnitArea.blocks[0].area()); + zeroDistortion = 0; + for (int compIdx = 0; compIdx < 3; compIdx++) + { + ComponentID componentID = (ComponentID)compIdx; + const CPelBuf zeroBuf(m_pTempPel, localUnitArea.blocks[compIdx]); + zeroDistortion += m_pcRdCost->getDistPart(zeroBuf, orgResidual.bufs[compIdx], sps.getBitDepth(toChannelType(componentID)), componentID, DF_SSE); + } + xEstimateInterResidualQT(cs, partitioner, NULL, luma, chroma, &orgResidual); + } + else + { + zeroDistortion = 0; if (luma) { - cs.getOrgResiBuf().bufs[0].copyFrom(cs.getResiBuf().bufs[0]); + cs.getOrgResiBuf().bufs[0].copyFrom(orgResidual.bufs[0]); } if (chroma) { - cs.getOrgResiBuf().bufs[1].copyFrom(cs.getResiBuf().bufs[1]); - cs.getOrgResiBuf().bufs[2].copyFrom(cs.getResiBuf().bufs[2]); + cs.getOrgResiBuf().bufs[1].copyFrom(orgResidual.bufs[1]); + cs.getOrgResiBuf().bufs[2].copyFrom(orgResidual.bufs[2]); } xEstimateInterResidualQT(cs, partitioner, &zeroDistortion, luma, chroma); + } TransformUnit &firstTU = *cs.getTU( partitioner.chType ); cu.rootCbf = false; @@ -6704,11 +7487,11 @@ void InterSearch::encodeResAndCalcRdInterCU(CodingStructure &cs, Partitioner &pa #if WCG_EXT if( m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled() ) { - zeroCost = cs.isLossless ? ( cs.cost + 1 ) : m_pcRdCost->calcRdCost( zeroFracBits, zeroDistortion, false ); + zeroCost = m_pcRdCost->calcRdCost( zeroFracBits, zeroDistortion, false ); } else #endif - zeroCost = cs.isLossless ? ( cs.cost + 1 ) : m_pcRdCost->calcRdCost( zeroFracBits, zeroDistortion ); + zeroCost = m_pcRdCost->calcRdCost( zeroFracBits, zeroDistortion ); } const int numValidTBlocks = ::getNumberValidTBlocks( *cs.pcv ); @@ -6723,6 +7506,8 @@ void InterSearch::encodeResAndCalcRdInterCU(CodingStructure &cs, Partitioner &pa if (zeroCost < cs.cost || !cu.rootCbf) { + cs.cost = zeroCost; + cu.colorTransform = false; cu.sbtInfo = 0; cu.rootCbf = false; @@ -6737,7 +7522,50 @@ void InterSearch::encodeResAndCalcRdInterCU(CodingStructure &cs, Partitioner &pa } cu.firstTU = cu.lastTU = &tu; } + if (!iter) + { + rootCbfFirstColorSpace = cu.rootCbf; + } + if (cs.cost < bestCost) + { + bestIter = iter; + if (cu.rootCbf && cu.colorTransform) + { + cs.getResiBuf(curUnitArea).colorSpaceConvert(cs.getResiBuf(curUnitArea), false); + } + + if (iter != (numAllowedColorSpace - 1)) + { + bestCost = cs.cost; + bestColorTrans = cu.colorTransform; + bestRootCbf = cu.rootCbf; + bestsbtInfo = cu.sbtInfo; + saveCS.clearTUs(); + for (const auto &ptu : cs.tus) + { + TransformUnit &tu = saveCS.addTU(*ptu, ptu->chType); + tu = *ptu; + } + saveCS.getResiBuf(curUnitArea).copyFrom(cs.getResiBuf(curUnitArea)); + } + } + } + + if (bestIter != (numAllowedColorSpace - 1)) + { + cu.colorTransform = bestColorTrans; + cu.rootCbf = bestRootCbf; + cu.sbtInfo = bestsbtInfo; + + cs.clearTUs(); + for (const auto &ptu : saveCS.tus) + { + TransformUnit &tu = cs.addTU(*ptu, ptu->chType); + tu = *ptu; + } + cs.getResiBuf(curUnitArea).copyFrom(saveCS.getResiBuf(curUnitArea)); + } // all decisions now made. Fully encode the CU, including the headers: m_CABACEstimator->getCtx() = ctxStart; @@ -6759,14 +7587,14 @@ void InterSearch::encodeResAndCalcRdInterCU(CodingStructure &cs, Partitioner &pa if (luma) { - if (cu.rootCbf && cs.slice->getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag()) + if (cu.rootCbf && cs.picHeader->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag()) { const CompArea &areaY = cu.Y(); CompArea tmpArea(COMPONENT_Y, areaY.chromaFormat, Position(0, 0), areaY.size()); PelBuf tmpPred = m_tmpStorageLCU.getBuf(tmpArea); tmpPred.copyFrom(cs.getPredBuf(COMPONENT_Y)); - if (!cu.firstPU->mhIntraFlag && !CU::isIBC(cu)) + if (!cu.firstPU->ciipFlag && !CU::isIBC(cu)) tmpPred.rspSignal(m_pcReshape->getFwdLUT()); cs.getRecoBuf(COMPONENT_Y).reconstruct(tmpPred, cs.getResiBuf(COMPONENT_Y), cs.slice->clpRng(COMPONENT_Y)); @@ -6774,7 +7602,7 @@ void InterSearch::encodeResAndCalcRdInterCU(CodingStructure &cs, Partitioner &pa else { cs.getRecoBuf().bufs[0].reconstruct(cs.getPredBuf().bufs[0], cs.getResiBuf().bufs[0], cs.slice->clpRngs().comp[0]); - if (cs.slice->getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag() && !cu.firstPU->mhIntraFlag && !CU::isIBC(cu)) + if (cs.picHeader->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag() && !cu.firstPU->ciipFlag && !CU::isIBC(cu)) { cs.getRecoBuf().bufs[0].rspSignal(m_pcReshape->getFwdLUT()); } @@ -6801,7 +7629,7 @@ void InterSearch::encodeResAndCalcRdInterCU(CodingStructure &cs, Partitioner &pa #if WCG_EXT if (m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled() || ( - m_pcEncCfg->getReshaper() && (cs.slice->getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag() ) ) ) + m_pcEncCfg->getLmcs() && (cs.picHeader->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag()))) { const CPelBuf orgLuma = cs.getOrgBuf( cs.area.blocks[COMPONENT_Y] ); if (compID == COMPONENT_Y && !(m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled()) ) @@ -6826,6 +7654,21 @@ void InterSearch::encodeResAndCalcRdInterCU(CodingStructure &cs, Partitioner &pa cs.dist = finalDistortion; cs.fracBits = finalFracBits; cs.cost = m_pcRdCost->calcRdCost(cs.fracBits, cs.dist); + if (cs.slice->getSPS()->getUseColorTrans()) + { + if (cs.cost < cs.tmpColorSpaceCost) + { + cs.tmpColorSpaceCost = cs.cost; + if (m_pcEncCfg->getRGBFormatFlag()) + { + cs.firstColorSpaceSelected = cu.colorTransform || !cu.rootCbf; + } + else + { + cs.firstColorSpaceSelected = !cu.colorTransform || !cu.rootCbf; + } + } + } CHECK(cs.tus.size() == 0, "No TUs present"); } @@ -6840,31 +7683,22 @@ uint64_t InterSearch::xGetSymbolFracBitsInter(CodingStructure &cs, Partitioner & if( cu.firstPU->mergeFlag && !cu.rootCbf ) { cu.skip = true; - - if( cs.pps->getTransquantBypassEnabledFlag() ) - { - m_CABACEstimator->cu_transquant_bypass_flag( cu ); - } - + CHECK(cu.colorTransform, "ACT should not be enabled for skip mode"); m_CABACEstimator->cu_skip_flag ( cu ); - m_CABACEstimator->subblock_merge_flag( cu ); - m_CABACEstimator->triangle_mode ( cu ); - if (cu.mmvdSkip) + if (cu.firstPU->ciipFlag) { - m_CABACEstimator->mmvd_merge_idx(*cu.firstPU); + // CIIP shouldn't be skip, the upper level function will deal with it, i.e. setting the overall cost to MAX_DOUBLE } else - m_CABACEstimator->merge_idx ( *cu.firstPU ); + { + m_CABACEstimator->merge_data(*cu.firstPU); + } fracBits += m_CABACEstimator->getEstFracBits(); } else { CHECK( cu.skip, "Skip flag has to be off at this point!" ); - if( cs.pps->getTransquantBypassEnabledFlag() ) - { - m_CABACEstimator->cu_transquant_bypass_flag( cu ); - } if (cu.Y().valid()) m_CABACEstimator->cu_skip_flag( cu ); m_CABACEstimator->pred_mode ( cu ); @@ -6879,11 +7713,11 @@ uint64_t InterSearch::xGetSymbolFracBitsInter(CodingStructure &cs, Partitioner & return fracBits; } -double InterSearch::xGetMEDistortionWeight(uint8_t gbiIdx, RefPicList eRefPicList) +double InterSearch::xGetMEDistortionWeight(uint8_t bcwIdx, RefPicList eRefPicList) { - if( gbiIdx != GBI_DEFAULT ) + if( bcwIdx != BCW_DEFAULT ) { - return fabs((double)getGbiWeight(gbiIdx, eRefPicList) / (double)g_GbiWeightBase); + return fabs((double)getBcwWeight(bcwIdx, eRefPicList) / (double)g_BcwWeightBase); } else { @@ -6896,13 +7730,16 @@ bool InterSearch::xReadBufferedUniMv(PredictionUnit& pu, RefPicList eRefPicList, { m_uniMotions.copyTo(rcMv, ruiCost, (uint32_t)eRefPicList, (uint32_t)iRefIdx); - m_pcRdCost->setPredictor(pcMvPred); + Mv pred = pcMvPred; + pred.changeTransPrecInternal2Amvr(pu.cu->imv); + m_pcRdCost->setPredictor(pred); m_pcRdCost->setCostScale(0); - unsigned imvShift = pu.cu->imv << 1; - uint32_t uiMvBits = m_pcRdCost->getBitsOfVectorWithPredictor(rcMv.getHor(), rcMv.getVer(), imvShift); + Mv mv = rcMv; + mv.changeTransPrecInternal2Amvr(pu.cu->imv); + uint32_t mvBits = m_pcRdCost->getBitsOfVectorWithPredictor(mv.getHor(), mv.getVer(), 0); - ruiBits += uiMvBits; + ruiBits += mvBits; ruiCost += m_pcRdCost->getCost(ruiBits); return true; } @@ -6915,28 +7752,23 @@ bool InterSearch::xReadBufferedAffineUniMv(PredictionUnit& pu, RefPicList eRefPi { if (m_uniMotions.isReadModeAffine((uint32_t)eRefPicList, (uint32_t)iRefIdx, pu.cu->affineType)) { - m_uniMotions.copyAffineMvTo(acMv, ruiCost, (uint32_t)eRefPicList, (uint32_t)iRefIdx, pu.cu->affineType - , mvpIdx - ); + m_uniMotions.copyAffineMvTo(acMv, ruiCost, (uint32_t)eRefPicList, (uint32_t)iRefIdx, pu.cu->affineType, mvpIdx); m_pcRdCost->setCostScale(0); acMvPred[0] = aamvpi.mvCandLT[mvpIdx]; acMvPred[1] = aamvpi.mvCandRT[mvpIdx]; acMvPred[2] = aamvpi.mvCandLB[mvpIdx]; - uint32_t uiMvBits = 0; - for (int iVerIdx = 0; iVerIdx<(pu.cu->affineType ? 3 : 2); iVerIdx++) + uint32_t mvBits = 0; + for (int verIdx = 0; verIdx<(pu.cu->affineType ? 3 : 2); verIdx++) { - if (iVerIdx) - { - m_pcRdCost->setPredictor(acMvPred[iVerIdx] + acMv[0] - acMvPred[0]); - } - else - { - m_pcRdCost->setPredictor(acMvPred[iVerIdx]); - } - uiMvBits += m_pcRdCost->getBitsOfVectorWithPredictor(acMv[iVerIdx].getHor(), acMv[iVerIdx].getVer(), 0); + Mv pred = verIdx ? acMvPred[verIdx] + acMv[0] - acMvPred[0] : acMvPred[verIdx]; + pred.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER); + m_pcRdCost->setPredictor(pred); + Mv mv = acMv[verIdx]; + mv.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER); + mvBits += m_pcRdCost->getBitsOfVectorWithPredictor(mv.getHor(), mv.getVer(), 0); } - ruiBits += uiMvBits; + ruiBits += mvBits; ruiCost += m_pcRdCost->getCost(ruiBits); return true; } @@ -6944,25 +7776,25 @@ bool InterSearch::xReadBufferedAffineUniMv(PredictionUnit& pu, RefPicList eRefPi } void InterSearch::initWeightIdxBits() { - for (int n = 0; n < GBI_NUM; ++n) + for (int n = 0; n < BCW_NUM; ++n) { m_estWeightIdxBits[n] = deriveWeightIdxBits(n); } } -void InterSearch::xClipMv( Mv& rcMv, const Position& pos, const struct Size& size, const SPS& sps ) +void InterSearch::xClipMv( Mv& rcMv, const Position& pos, const struct Size& size, const SPS& sps, const PPS& pps ) { int mvShift = MV_FRACTIONAL_BITS_INTERNAL; int offset = 8; - int horMax = ( sps.getPicWidthInLumaSamples() + offset - ( int ) pos.x - 1 ) << mvShift; + int horMax = ( pps.getPicWidthInLumaSamples() + offset - (int)pos.x - 1 ) << mvShift; int horMin = ( -( int ) sps.getMaxCUWidth() - offset - ( int ) pos.x + 1 ) << mvShift; - int verMax = ( sps.getPicHeightInLumaSamples() + offset - ( int ) pos.y - 1 ) << mvShift; + int verMax = ( pps.getPicHeightInLumaSamples() + offset - (int)pos.y - 1 ) << mvShift; int verMin = ( -( int ) sps.getMaxCUHeight() - offset - ( int ) pos.y + 1 ) << mvShift; if( sps.getWrapAroundEnabledFlag() ) { - int horMax = ( sps.getPicWidthInLumaSamples() + sps.getMaxCUWidth() - size.width + offset - ( int ) pos.x - 1 ) << mvShift; + int horMax = ( pps.getPicWidthInLumaSamples() + sps.getMaxCUWidth() - size.width + offset - (int)pos.x - 1 ) << mvShift; int horMin = ( -( int ) sps.getMaxCUWidth() - offset - ( int ) pos.x + 1 ) << mvShift; rcMv.setHor( std::min( horMax, std::max( horMin, rcMv.getHor() ) ) ); rcMv.setVer( std::min( verMax, std::max( verMin, rcMv.getVer() ) ) ); @@ -6981,7 +7813,7 @@ uint32_t InterSearch::xDetermineBestMvp( PredictionUnit& pu, Mv acMvTemp[3], int { Mv mvPred[3] = { aamvpi.mvCandLT[i], aamvpi.mvCandRT[i], aamvpi.mvCandLB[i] }; uint32_t candBits = m_auiMVPIdxCost[i][aamvpi.numCand]; - candBits += xCalcAffineMVBits( pu, acMvTemp, mvPred, pu.cu->imv != 1 ); + candBits += xCalcAffineMVBits( pu, acMvTemp, mvPred ); if ( candBits < minBits ) { @@ -7000,7 +7832,7 @@ void InterSearch::symmvdCheckBestMvp( Mv curMv, RefPicList curRefList, AMVPInfo amvpInfo[2][33], - int32_t gbiIdx, + int32_t bcwIdx, Mv cMvPredSym[2], int32_t mvpIdxSym[2], Distortion& bestCost, @@ -7022,9 +7854,23 @@ void InterSearch::symmvdCheckBestMvp( PelUnitBuf predBufA = m_tmpPredStorage[curRefList].getBuf(UnitAreaRelative(*pu.cu, pu)); const Picture* picRefA = pu.cu->slice->getRefPic(curRefList, cCurMvField.refIdx); Mv mvA = cCurMvField.mv; - mvA.changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); - clipMv(mvA, pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps); - xPredInterBlk(COMPONENT_Y, pu, picRefA, mvA, predBufA, true, pu.cu->slice->clpRng(COMPONENT_Y), false, false); + clipMv( mvA, pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps, *pu.cs->pps ); + if ( (mvA.hor & 15) == 0 && (mvA.ver & 15) == 0 ) + { + Position offset = pu.blocks[COMPONENT_Y].pos().offset( mvA.getHor() >> 4, mvA.getVer() >> 4 ); + CPelBuf pelBufA = picRefA->getRecoBuf( CompArea( COMPONENT_Y, pu.chromaFormat, offset, pu.blocks[COMPONENT_Y].size() ), false ); + predBufA.bufs[0].buf = const_cast<Pel *>(pelBufA.buf); + predBufA.bufs[0].stride = pelBufA.stride; + } + else + { + xPredInterBlk( COMPONENT_Y, pu, picRefA, mvA, predBufA, false, pu.cu->slice->clpRng( COMPONENT_Y ), false, false ); + } + PelUnitBuf bufTmp = m_tmpStorageLCU.getBuf( UnitAreaRelative( *pu.cu, pu ) ); + bufTmp.copyFrom( origBuf ); + bufTmp.removeHighFreq( predBufA, m_pcEncCfg->getClipForBiPredMeEnabled(), pu.cu->slice->clpRngs(), getBcwWeight( pu.cu->BcwIdx, tarRefList ) ); + + double fWeight = xGetMEDistortionWeight( pu.cu->BcwIdx, tarRefList ); int32_t skipMvpIdx[2]; skipMvpIdx[0] = skip ? mvpIdxSym[0] : -1; @@ -7043,21 +7889,28 @@ void InterSearch::symmvdCheckBestMvp( PelUnitBuf predBufB = m_tmpPredStorage[tarRefList].getBuf(UnitAreaRelative(*pu.cu, pu)); const Picture* picRefB = pu.cu->slice->getRefPic(tarRefList, cTarMvField.refIdx); Mv mvB = cTarMvField.mv; - mvB.changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL); - clipMv(mvB, pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps); - xPredInterBlk(COMPONENT_Y, pu, picRefB, mvB, predBufB, true, pu.cu->slice->clpRng(COMPONENT_Y), false, false); - - PelUnitBuf bufTmp = m_tmpStorageLCU.getBuf(UnitAreaRelative(*pu.cu, pu)); - if (gbiIdx != GBI_DEFAULT) - bufTmp.Y().addWeightedAvg(predBufA.Y(), predBufB.Y(), pu.cu->slice->clpRng(COMPONENT_Y), gbiIdx); + clipMv( mvB, pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps, *pu.cs->pps ); + if ( (mvB.hor & 15) == 0 && (mvB.ver & 15) == 0 ) + { + Position offset = pu.blocks[COMPONENT_Y].pos().offset( mvB.getHor() >> 4, mvB.getVer() >> 4 ); + CPelBuf pelBufB = picRefB->getRecoBuf( CompArea( COMPONENT_Y, pu.chromaFormat, offset, pu.blocks[COMPONENT_Y].size() ), false ); + predBufB.bufs[0].buf = const_cast<Pel *>(pelBufB.buf); + predBufB.bufs[0].stride = pelBufB.stride; + } else - bufTmp.Y().addAvg(predBufA.Y(), predBufB.Y(), pu.cu->slice->clpRng(COMPONENT_Y)); - + { + xPredInterBlk( COMPONENT_Y, pu, picRefB, mvB, predBufB, false, pu.cu->slice->clpRng( COMPONENT_Y ), false, false ); + } // calc distortion - Distortion cost = m_pcRdCost->getDistPart(bufTmp.Y(), origBuf.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, DF_HAD); - - m_pcRdCost->setPredictor(amvpCur.mvCand[i]); - uint32_t bits = m_pcRdCost->getBitsOfVectorWithPredictor(curMv.hor, curMv.ver, (pu.cu->imv << 1)); + DFunc distFunc = (!pu.cu->slice->getDisableSATDForRD()) ? DF_HAD : DF_SAD; + Distortion cost = (Distortion)floor( fWeight * (double)m_pcRdCost->getDistPart( bufTmp.Y(), predBufB.Y(), pu.cs->sps->getBitDepth( CHANNEL_TYPE_LUMA ), COMPONENT_Y, distFunc ) ); + + Mv pred = amvpCur.mvCand[i]; + pred.changeTransPrecInternal2Amvr(pu.cu->imv); + m_pcRdCost->setPredictor(pred); + Mv mv = curMv; + mv.changeTransPrecInternal2Amvr(pu.cu->imv); + uint32_t bits = m_pcRdCost->getBitsOfVectorWithPredictor(mv.hor, mv.ver, 0); bits += m_auiMVPIdxCost[i][AMVP_MAX_NUM_CANDS]; bits += m_auiMVPIdxCost[j][AMVP_MAX_NUM_CANDS]; cost += m_pcRdCost->getCost(bits); @@ -7072,3 +7925,116 @@ void InterSearch::symmvdCheckBestMvp( } } } + +uint64_t InterSearch::xCalcPuMeBits(PredictionUnit& pu) +{ + assert(pu.mergeFlag); + assert(!CU::isIBC(*pu.cu)); + m_CABACEstimator->resetBits(); + m_CABACEstimator->merge_flag(pu); + if (pu.mergeFlag) + { + m_CABACEstimator->merge_data(pu); + } + return m_CABACEstimator->getEstFracBits(); +} + +bool InterSearch::searchBv(PredictionUnit& pu, int xPos, int yPos, int width, int height, int picWidth, int picHeight, int xBv, int yBv, int ctuSize) +{ + const int ctuSizeLog2 = floorLog2(ctuSize); + + int refRightX = xPos + xBv + width - 1; + int refBottomY = yPos + yBv + height - 1; + + int refLeftX = xPos + xBv; + int refTopY = yPos + yBv; + + if ((xPos + xBv) < 0) + { + return false; + } + if (refRightX >= picWidth) + { + return false; + } + + if ((yPos + yBv) < 0) + { + return false; + } + if (refBottomY >= picHeight) + { + return false; + } + if ((xBv + width) > 0 && (yBv + height) > 0) + { + return false; + } + + // Don't search the above CTU row + if (refTopY >> ctuSizeLog2 < yPos >> ctuSizeLog2) + return false; + + // Don't search the below CTU row + if (refBottomY >> ctuSizeLog2 > yPos >> ctuSizeLog2) + { + return false; + } + + unsigned curTileIdx = pu.cs->pps->getTileIdx(pu.lumaPos()); + unsigned refTileIdx = pu.cs->pps->getTileIdx(Position(refLeftX, refTopY)); + if (curTileIdx != refTileIdx) + { + return false; + } + refTileIdx = pu.cs->pps->getTileIdx(Position(refLeftX, refBottomY)); + if (curTileIdx != refTileIdx) + { + return false; + } + refTileIdx = pu.cs->pps->getTileIdx(Position(refRightX, refTopY)); + if (curTileIdx != refTileIdx) + { + return false; + } + refTileIdx = pu.cs->pps->getTileIdx(Position(refRightX, refBottomY)); + if (curTileIdx != refTileIdx) + { + return false; + } + + // in the same CTU line + int numLeftCTUs = (1 << ((7 - ctuSizeLog2) << 1)) - ((ctuSizeLog2 < 7) ? 1 : 0); + if ((refRightX >> ctuSizeLog2 <= xPos >> ctuSizeLog2) && (refLeftX >> ctuSizeLog2 >= (xPos >> ctuSizeLog2) - numLeftCTUs)) + { + + // in the same CTU, or left CTU + // if part of ref block is in the left CTU, some area can be referred from the not-yet updated local CTU buffer + if (((refLeftX >> ctuSizeLog2) == ((xPos >> ctuSizeLog2) - 1)) && (ctuSizeLog2 == 7)) + { + // ref block's collocated block in current CTU + const Position refPosCol = pu.Y().topLeft().offset(xBv + ctuSize, yBv); + int offset64x = (refPosCol.x >> (ctuSizeLog2 - 1)) << (ctuSizeLog2 - 1); + int offset64y = (refPosCol.y >> (ctuSizeLog2 - 1)) << (ctuSizeLog2 - 1); + const Position refPosCol64x64 = {offset64x, offset64y}; + if (pu.cs->isDecomp(refPosCol64x64, toChannelType(COMPONENT_Y))) + return false; + if (refPosCol64x64 == pu.Y().topLeft()) + return false; + } + } + else + return false; + + // in the same CTU, or valid area from left CTU. Check if the reference block is already coded + const Position refPosLT = pu.Y().topLeft().offset(xBv, yBv); + const Position refPosBR = pu.Y().bottomRight().offset(xBv, yBv); + const ChannelType chType = toChannelType(COMPONENT_Y); + if (!pu.cs->isDecomp(refPosBR, chType)) + return false; + if (!pu.cs->isDecomp(refPosLT, chType)) + return false; + return true; +} + +//! \} diff --git a/source/Lib/EncoderLib/InterSearch.h b/source/Lib/EncoderLib/InterSearch.h index d2a4bccb923c67ab42365efd82543acc3aa8f095..27430d67c7660e084b8eec83208886117cfc6a41 100644 --- a/source/Lib/EncoderLib/InterSearch.h +++ b/source/Lib/EncoderLib/InterSearch.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -78,9 +78,15 @@ struct AffineMVInfo int x, y, w, h; }; +struct BlkUniMvInfo +{ + Mv uniMvs[2][33]; + int x, y, w, h; +}; + typedef struct { - Mv acMvAffine4Para[2][2]; + Mv acMvAffine4Para[2][3]; Mv acMvAffine6Para[2][3]; int16_t affine4ParaRefIdx[2]; int16_t affine6ParaRefIdx[2]; @@ -108,16 +114,21 @@ private: CodingStructure **m_pSaveCS; ClpRng m_lumaClpRng; - uint32_t m_estWeightIdxBits[GBI_NUM]; - GBiMotionParam m_uniMotions; + uint32_t m_estWeightIdxBits[BCW_NUM]; + BcwMotionParam m_uniMotions; bool m_affineModeSelected; std::unordered_map< Position, std::unordered_map< Size, BlkRecord> > m_ctuRecord; AffineMVInfo *m_affMVList; int m_affMVListIdx; int m_affMVListSize; int m_affMVListMaxSize; + BlkUniMvInfo* m_uniMvList; + int m_uniMvListIdx; + int m_uniMvListSize; + int m_uniMvListMaxSize; Distortion m_hevcCost; EncAffineMotion m_affineMotion; + PatentBvCand m_defaultCachedBvs; protected: // interface to option EncCfg* m_pcEncCfg; @@ -140,6 +151,8 @@ protected: RefPicList m_currRefPicList; int m_currRefPicIndex; bool m_skipFracME; + int m_numHashMVStoreds[NUM_REF_PIC_LIST_01][MAX_NUM_REF]; + Mv m_hashMVStoreds[NUM_REF_PIC_LIST_01][MAX_NUM_REF][5]; // Misc. Pel *m_pTempPel; @@ -150,8 +163,9 @@ protected: Mv m_integerMv2Nx2N [NUM_REF_PIC_LIST_01][MAX_NUM_REF]; bool m_isInitialized; - unsigned int m_numBVs, m_numBV16s; - Mv m_acBVs[IBC_NUM_CANDIDATES]; + + Mv m_acBVs[2 * IBC_NUM_CANDIDATES]; + unsigned int m_numBVs; bool m_useCompositeRef; Distortion m_estMinDistSbt[NUMBER_SBT_MODE + 1]; // estimated minimum SSE value of the PU if using a SBT mode uint8_t m_sbtRdoOrder[NUMBER_SBT_MODE]; // order of SBT mode in RDO @@ -228,8 +242,81 @@ public: m_affMVListSize = std::min(m_affMVListSize + 1, m_affMVListMaxSize); } } + void resetUniMvList() { m_uniMvListIdx = 0; m_uniMvListSize = 0; } + void insertUniMvCands(CompArea blkArea, Mv cMvTemp[2][33]) + { + BlkUniMvInfo* curMvInfo = m_uniMvList + m_uniMvListIdx; + int j = 0; + for (; j < m_uniMvListSize; j++) + { + BlkUniMvInfo* prevMvInfo = m_uniMvList + ((m_uniMvListIdx - 1 - j + m_uniMvListMaxSize) % (m_uniMvListMaxSize)); + if ((blkArea.x == prevMvInfo->x) && (blkArea.y == prevMvInfo->y) && (blkArea.width == prevMvInfo->w) && (blkArea.height == prevMvInfo->h)) + { + break; + } + } + + if (j < m_uniMvListSize) + { + curMvInfo = m_uniMvList + ((m_uniMvListIdx - 1 - j + m_uniMvListMaxSize) % (m_uniMvListMaxSize)); + } + + ::memcpy(curMvInfo->uniMvs, cMvTemp, 2 * 33 * sizeof(Mv)); + if (j == m_uniMvListSize) // new element + { + curMvInfo->x = blkArea.x; + curMvInfo->y = blkArea.y; + curMvInfo->w = blkArea.width; + curMvInfo->h = blkArea.height; + m_uniMvListSize = std::min(m_uniMvListSize + 1, m_uniMvListMaxSize); + m_uniMvListIdx = (m_uniMvListIdx + 1) % (m_uniMvListMaxSize); + } + } + void savePrevUniMvInfo(CompArea blkArea, BlkUniMvInfo &tmpUniMvInfo, bool& isUniMvInfoSaved) + { + int j = 0; + BlkUniMvInfo* curUniMvInfo = nullptr; + for (; j < m_uniMvListSize; j++) + { + curUniMvInfo = m_uniMvList + ((m_uniMvListIdx - 1 - j + m_uniMvListMaxSize) % (m_uniMvListMaxSize)); + if ((blkArea.x == curUniMvInfo->x) && (blkArea.y == curUniMvInfo->y) && (blkArea.width == curUniMvInfo->w) && (blkArea.height == curUniMvInfo->h)) + { + break; + } + } + + if (j < m_uniMvListSize) + { + isUniMvInfoSaved = true; + tmpUniMvInfo = *curUniMvInfo; + } + } + void addUniMvInfo(BlkUniMvInfo &tmpUniMVInfo) + { + int j = 0; + BlkUniMvInfo* prevUniMvInfo = nullptr; + for (; j < m_uniMvListSize; j++) + { + prevUniMvInfo = m_uniMvList + ((m_uniMvListIdx - 1 - j + m_uniMvListMaxSize) % (m_uniMvListMaxSize)); + if ((tmpUniMVInfo.x == prevUniMvInfo->x) && (tmpUniMVInfo.y == prevUniMvInfo->y) && (tmpUniMVInfo.w == prevUniMvInfo->w) && (tmpUniMVInfo.h == prevUniMvInfo->h)) + { + break; + } + } + if (j < m_uniMvListSize) + { + *prevUniMvInfo = tmpUniMVInfo; + } + else + { + m_uniMvList[m_uniMvListIdx] = tmpUniMVInfo; + m_uniMvListIdx = (m_uniMvListIdx + 1) % m_uniMvListMaxSize; + m_uniMvListSize = std::min(m_uniMvListSize + 1, m_uniMvListMaxSize); + } + } void resetSavedAffineMotion(); - void storeAffineMotion( Mv acAffineMv[2][3], int16_t affineRefIdx[2], EAffineModel affineType, int gbiIdx ); + void storeAffineMotion( Mv acAffineMv[2][3], int16_t affineRefIdx[2], EAffineModel affineType, int bcwIdx ); + bool searchBv(PredictionUnit& pu, int xPos, int yPos, int width, int height, int picWidth, int picHeight, int xBv, int yBv, int ctuSize); protected: /// sub-function for motion vector refinement used in fractional-pel accuracy @@ -257,6 +344,7 @@ protected: uint8_t ucPointNr; int subShiftMode; unsigned imvShift; + bool useAltHpelIf; bool inCtuSearch; bool zeroMV; } IntTZSearchStruct; @@ -281,14 +369,22 @@ public: bool predIBCSearch ( CodingUnit& cu, Partitioner& partitioner, const int localSearchRangeX, const int localSearchRangeY, IbcHashMap& ibcHashMap); void xIntraPatternSearch ( PredictionUnit& pu, IntTZSearchStruct& cStruct, Mv& rcMv, Distortion& ruiCost, Mv* cMvSrchRngLT, Mv* cMvSrchRngRB, Mv* pcMvPred); void xSetIntraSearchRange ( PredictionUnit& pu, int iRoiWidth, int iRoiHeight, const int localSearchRangeX, const int localSearchRangeY, Mv& rcMvSrchRngLT, Mv& rcMvSrchRngRB); - void resetIbcSearch() { m_numBVs = m_numBV16s = 0; } + void resetIbcSearch() + { + for (int i = 0; i < IBC_NUM_CANDIDATES; i++) + { + m_defaultCachedBvs.m_bvCands[i].setZero(); + } + m_defaultCachedBvs.currCnt = 0; + } void xIBCEstimation ( PredictionUnit& pu, PelUnitBuf& origBuf, Mv *pcMvPred, Mv &rcMv, Distortion &ruiCost, const int localSearchRangeX, const int localSearchRangeY); void xIBCSearchMVCandUpdate ( Distortion uiSad, int x, int y, Distortion* uiSadBestCand, Mv* cMVCand); int xIBCSearchMVChromaRefine( PredictionUnit& pu, int iRoiWidth, int iRoiHeight, int cuPelX, int cuPelY, Distortion* uiSadBestCand, Mv* cMVCand); void addToSortList(std::list<BlockHash>& listBlockHash, std::list<int>& listCost, int cost, const BlockHash& blockHash); bool predInterHashSearch(CodingUnit& cu, Partitioner& partitioner, bool& isPerfectMatch); bool xHashInterEstimation(PredictionUnit& pu, RefPicList& bestRefPicList, int& bestRefIndex, Mv& bestMv, Mv& bestMvd, int& bestMVPIndex, bool& isPerfectMatch); - int xHashInterPredME(const PredictionUnit& pu, RefPicList currRefPicList, int currRefPicIndex, Mv bestMv[5]); + bool xRectHashInterEstimation(PredictionUnit& pu, RefPicList& bestRefPicList, int& bestRefIndex, Mv& bestMv, Mv& bestMvd, int& bestMVPIndex, bool& isPerfectMatch); + void selectRectangleMatchesInter(const MapIterator& itBegin, int count, std::list<BlockHash>& listBlockHash, const BlockHash& currBlockHash, int width, int height, int idxNonSimple, unsigned int* &hashValues, int baseNum, int picWidth, int picHeight, bool isHorizontal, uint16_t* curHashPic); void selectMatchesInter(const MapIterator& itBegin, int count, std::list<BlockHash>& vecBlockHash, const BlockHash& currBlockHash); protected: @@ -326,7 +422,7 @@ protected: RefPicList eRefPicList, int iRefIdx ); - uint32_t xCalcAffineMVBits ( PredictionUnit& pu, Mv mvCand[3], Mv mvPred[3], bool mvHighPrec = false ); + uint32_t xCalcAffineMVBits ( PredictionUnit& pu, Mv mvCand[3], Mv mvPred[3] ); void xCopyAMVPInfo ( AMVPInfo* pSrc, AMVPInfo* pDst ); uint32_t xGetMvpIdxBits ( int iIdx, int iNum ); @@ -352,6 +448,8 @@ protected: ); void xTZSearch ( const PredictionUnit& pu, + RefPicList eRefPicList, + int iRefIdxPred, IntTZSearchStruct& cStruct, Mv& rcMv, Distortion& ruiSAD, @@ -361,6 +459,8 @@ protected: ); void xTZSearchSelective ( const PredictionUnit& pu, + RefPicList eRefPicList, + int iRefIdxPred, IntTZSearchStruct& cStruct, Mv& rcMv, Distortion& ruiSAD, @@ -375,6 +475,8 @@ protected: ); void xPatternSearchFast ( const PredictionUnit& pu, + RefPicList eRefPicList, + int iRefIdxPred, IntTZSearchStruct& cStruct, Mv& rcMv, Distortion& ruiSAD, @@ -415,9 +517,9 @@ protected: Mv hevcMv[2][33] , Mv mvAffine4Para[2][33][3] , int refIdx4Para[2] - , uint8_t gbiIdx = GBI_DEFAULT - , bool enforceGBiPred = false - , uint32_t gbiIdxBits = 0 + , uint8_t bcwIdx = BCW_DEFAULT + , bool enforceBcwPred = false + , uint32_t bcwIdxBits = 0 ); void xAffineMotionEstimation ( PredictionUnit& pu, @@ -447,24 +549,24 @@ protected: void xCopyAffineAMVPInfo ( AffineAMVPInfo& src, AffineAMVPInfo& dst ); void xCheckBestAffineMVP ( PredictionUnit &pu, AffineAMVPInfo &affineAMVPInfo, RefPicList eRefPicList, Mv acMv[3], Mv acMvPred[3], int& riMVPIdx, uint32_t& ruiBits, Distortion& ruiCost ); - Distortion xGetSymmetricCost( PredictionUnit& pu, PelUnitBuf& origBuf, RefPicList eCurRefPicList, const MvField& cCurMvField, MvField& cTarMvField , int gbiIdx ); + Distortion xGetSymmetricCost( PredictionUnit& pu, PelUnitBuf& origBuf, RefPicList eCurRefPicList, const MvField& cCurMvField, MvField& cTarMvField , int bcwIdx ); Distortion xSymmeticRefineMvSearch( PredictionUnit& pu, PelUnitBuf& origBuf, Mv& rcMvCurPred, Mv& rcMvTarPred - , RefPicList eRefPicList, MvField& rCurMvField, MvField& rTarMvField, Distortion uiMinCost, int searchPattern, int nSearchStepShift, uint32_t uiMaxSearchRounds , int gbiIdx ); + , RefPicList eRefPicList, MvField& rCurMvField, MvField& rTarMvField, Distortion uiMinCost, int searchPattern, int nSearchStepShift, uint32_t uiMaxSearchRounds , int bcwIdx ); - void xSymmetricMotionEstimation( PredictionUnit& pu, PelUnitBuf& origBuf, Mv& rcMvCurPred, Mv& rcMvTarPred, RefPicList eRefPicList, MvField& rCurMvField, MvField& rTarMvField, Distortion& ruiCost, int gbiIdx ); + void xSymmetricMotionEstimation( PredictionUnit& pu, PelUnitBuf& origBuf, Mv& rcMvCurPred, Mv& rcMvTarPred, RefPicList eRefPicList, MvField& rCurMvField, MvField& rTarMvField, Distortion& ruiCost, int bcwIdx ); bool xReadBufferedAffineUniMv ( PredictionUnit& pu, RefPicList eRefPicList, int32_t iRefIdx, Mv acMvPred[3], Mv acMv[3], uint32_t& ruiBits, Distortion& ruiCost , int& mvpIdx, const AffineAMVPInfo& aamvpi ); - double xGetMEDistortionWeight ( uint8_t gbiIdx, RefPicList eRefPicList); + double xGetMEDistortionWeight ( uint8_t bcwIdx, RefPicList eRefPicList); bool xReadBufferedUniMv ( PredictionUnit& pu, RefPicList eRefPicList, int32_t iRefIdx, Mv& pcMvPred, Mv& rcMv, uint32_t& ruiBits, Distortion& ruiCost); - void xClipMv ( Mv& rcMv, const struct Position& pos, const struct Size& size, const class SPS& sps ); + void xClipMv ( Mv& rcMv, const struct Position& pos, const struct Size& size, const class SPS& sps, const class PPS& pps ); public: void resetBufferedUniMotions () { m_uniMotions.reset(); } - uint32_t getWeightIdxBits ( uint8_t gbiIdx ) { return m_estWeightIdxBits[gbiIdx]; } + uint32_t getWeightIdxBits ( uint8_t bcwIdx ) { return m_estWeightIdxBits[bcwIdx]; } void initWeightIdxBits (); void symmvdCheckBestMvp( PredictionUnit& pu, @@ -472,7 +574,7 @@ public: Mv curMv, RefPicList curRefList, AMVPInfo amvpInfo[2][33], - int32_t gbiIdx, + int32_t bcwIdx, Mv cMvPredSym[2], int32_t mvpIdxSym[2], Distortion& bestCost, @@ -480,7 +582,7 @@ public: ); protected: - void xExtDIFUpSamplingH ( CPelBuf* pcPattern ); + void xExtDIFUpSamplingH(CPelBuf* pcPattern, bool useAltHpelIf); void xExtDIFUpSamplingQ ( CPelBuf* pcPatternKey, Mv halfPelRef ); uint32_t xDetermineBestMvp ( PredictionUnit& pu, Mv acMvTemp[3], int& mvpIdx, const AffineAMVPInfo& aamvpi ); // ------------------------------------------------------------------------------------------------------------------- @@ -498,8 +600,10 @@ public: void xEncodeInterResidualQT (CodingStructure &cs, Partitioner &partitioner, const ComponentID &compID); void xEstimateInterResidualQT (CodingStructure &cs, Partitioner &partitioner, Distortion *puiZeroDist = NULL , const bool luma = true, const bool chroma = true + , PelUnitBuf* orgResi = NULL ); uint64_t xGetSymbolFracBitsInter (CodingStructure &cs, Partitioner &partitioner); + uint64_t xCalcPuMeBits (PredictionUnit& pu); };// END CLASS DEFINITION EncSearch diff --git a/source/Lib/EncoderLib/IntraSearch.cpp b/source/Lib/EncoderLib/IntraSearch.cpp index 4e2852e5fdb03188ddb80fea56aa73d2a2da0a73..ebc5ab44132e81daeafa53e611292b0ec49b2815 100644 --- a/source/Lib/EncoderLib/IntraSearch.cpp +++ b/source/Lib/EncoderLib/IntraSearch.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -49,10 +49,9 @@ #include <math.h> #include <limits> - //! \ingroup EncoderLib //! \{ - +#define PLTCtx(c) SubCtx( Ctx::Palette, c ) IntraSearch::IntraSearch() : m_pSplitCS (nullptr) , m_pFullCS (nullptr) @@ -69,6 +68,17 @@ IntraSearch::IntraSearch() { m_pSharedPredTransformSkip[ch] = nullptr; } + m_truncBinBits = nullptr; + m_escapeNumBins = nullptr; + m_minErrorIndexMap = nullptr; + for (unsigned i = 0; i < (MAXPLTSIZE + 1); i++) + { + m_indexError[i] = nullptr; + } + for (unsigned i = 0; i < NUM_TRELLIS_STATE; i++) + { + m_statePtRDOQ[i] = nullptr; + } } @@ -151,7 +161,44 @@ void IntraSearch::destroy() } m_tmpStorageLCU.destroy(); + m_colorTransResiBuf.destroy(); m_isInitialized = false; + if (m_truncBinBits != nullptr) + { + for (unsigned i = 0; i < m_symbolSize; i++) + { + delete[] m_truncBinBits[i]; + m_truncBinBits[i] = nullptr; + } + delete[] m_truncBinBits; + m_truncBinBits = nullptr; + } + if (m_escapeNumBins != nullptr) + { + delete[] m_escapeNumBins; + m_escapeNumBins = nullptr; + } + if (m_indexError[0] != nullptr) + { + for (unsigned i = 0; i < (MAXPLTSIZE + 1); i++) + { + delete[] m_indexError[i]; + m_indexError[i] = nullptr; + } + } + if (m_minErrorIndexMap != nullptr) + { + delete[] m_minErrorIndexMap; + m_minErrorIndexMap = nullptr; + } + if (m_statePtRDOQ[0] != nullptr) + { + for (unsigned i = 0; i < NUM_TRELLIS_STATE; i++) + { + delete[] m_statePtRDOQ[i]; + m_statePtRDOQ[i] = nullptr; + } + } } IntraSearch::~IntraSearch() @@ -171,6 +218,7 @@ void IntraSearch::init( EncCfg* pcEncCfg, const uint32_t maxCUHeight, const uint32_t maxTotalCUDepth , EncReshape* pcReshape + , const unsigned bitDepthY ) { CHECK(m_isInitialized, "Already initialized"); @@ -185,6 +233,7 @@ void IntraSearch::init( EncCfg* pcEncCfg, IntraPrediction::init( cform, pcEncCfg->getBitDepth( CHANNEL_TYPE_LUMA ) ); m_tmpStorageLCU.create(UnitArea(cform, Area(0, 0, MAX_CU_SIZE, MAX_CU_SIZE))); + m_colorTransResiBuf.create(UnitArea(cform, Area(0, 0, MAX_CU_SIZE, MAX_CU_SIZE))); for( uint32_t ch = 0; ch < MAX_NUM_TBLOCKS; ch++ ) { @@ -218,8 +267,9 @@ void IntraSearch::init( EncCfg* pcEncCfg, m_pBestCS[width][height] = new CodingStructure( m_unitCache.cuCache, m_unitCache.puCache, m_unitCache.tuCache ); m_pTempCS[width][height] = new CodingStructure( m_unitCache.cuCache, m_unitCache.puCache, m_unitCache.tuCache ); - m_pBestCS[width][height]->create( m_pcEncCfg->getChromaFormatIdc(), Area( 0, 0, gp_sizeIdxInfo->sizeFrom( width ), gp_sizeIdxInfo->sizeFrom( height ) ), false ); - m_pTempCS[width][height]->create( m_pcEncCfg->getChromaFormatIdc(), Area( 0, 0, gp_sizeIdxInfo->sizeFrom( width ), gp_sizeIdxInfo->sizeFrom( height ) ), false ); + m_pBestCS[width][height]->create(m_pcEncCfg->getChromaFormatIdc(), Area(0, 0, gp_sizeIdxInfo->sizeFrom(width), gp_sizeIdxInfo->sizeFrom(height)), false, (bool)pcEncCfg->getPLTMode()); + m_pTempCS[width][height]->create(m_pcEncCfg->getChromaFormatIdc(), Area(0, 0, gp_sizeIdxInfo->sizeFrom(width), gp_sizeIdxInfo->sizeFrom(height)), false, (bool)pcEncCfg->getPLTMode()); + m_pFullCS [width][height] = new CodingStructure*[uiNumLayersToAllocateFull]; m_pSplitCS[width][height] = new CodingStructure*[uiNumLayersToAllocateSplit]; @@ -227,14 +277,13 @@ void IntraSearch::init( EncCfg* pcEncCfg, { m_pFullCS [width][height][layer] = new CodingStructure( m_unitCache.cuCache, m_unitCache.puCache, m_unitCache.tuCache ); - m_pFullCS [width][height][layer]->create( m_pcEncCfg->getChromaFormatIdc(), Area( 0, 0, gp_sizeIdxInfo->sizeFrom( width ), gp_sizeIdxInfo->sizeFrom( height ) ), false ); + m_pFullCS[width][height][layer]->create(m_pcEncCfg->getChromaFormatIdc(), Area(0, 0, gp_sizeIdxInfo->sizeFrom(width), gp_sizeIdxInfo->sizeFrom(height)), false, (bool)pcEncCfg->getPLTMode()); } for( uint32_t layer = 0; layer < uiNumLayersToAllocateSplit; layer++ ) { m_pSplitCS[width][height][layer] = new CodingStructure( m_unitCache.cuCache, m_unitCache.puCache, m_unitCache.tuCache ); - - m_pSplitCS[width][height][layer]->create( m_pcEncCfg->getChromaFormatIdc(), Area( 0, 0, gp_sizeIdxInfo->sizeFrom( width ), gp_sizeIdxInfo->sizeFrom( height ) ), false ); + m_pSplitCS[width][height][layer]->create(m_pcEncCfg->getChromaFormatIdc(), Area(0, 0, gp_sizeIdxInfo->sizeFrom(width), gp_sizeIdxInfo->sizeFrom(height)), false, (bool)pcEncCfg->getPLTMode()); } } else @@ -255,116 +304,172 @@ void IntraSearch::init( EncCfg* pcEncCfg, for( uint32_t depth = 0; depth < uiNumSaveLayersToAllocate; depth++ ) { m_pSaveCS[depth] = new CodingStructure( m_unitCache.cuCache, m_unitCache.puCache, m_unitCache.tuCache ); - m_pSaveCS[depth]->create( UnitArea( cform, Area( 0, 0, maxCUWidth, maxCUHeight ) ), false ); + m_pSaveCS[depth]->create(UnitArea(cform, Area(0, 0, maxCUWidth, maxCUHeight)), false, (bool)pcEncCfg->getPLTMode()); } m_isInitialized = true; + if (pcEncCfg->getPLTMode()) + { + m_symbolSize = (1 << bitDepthY); // pixel values are within [0, SymbolSize-1] with size SymbolSize + if (m_truncBinBits == nullptr) + { + m_truncBinBits = new uint16_t*[m_symbolSize]; + for (unsigned i = 0; i < m_symbolSize; i++) + { + m_truncBinBits[i] = new uint16_t[m_symbolSize + 1]; + } + } + if (m_escapeNumBins == nullptr) + { + m_escapeNumBins = new uint16_t[m_symbolSize]; + } + initTBCTable(bitDepthY); + if (m_indexError[0] == nullptr) + { + for (unsigned i = 0; i < (MAXPLTSIZE + 1); i++) + { + m_indexError[i] = new double[MAX_CU_BLKSIZE_PLT*MAX_CU_BLKSIZE_PLT]; + } + } + if (m_minErrorIndexMap == nullptr) + { + m_minErrorIndexMap = new uint8_t[MAX_CU_BLKSIZE_PLT*MAX_CU_BLKSIZE_PLT]; + } + if (m_statePtRDOQ[0] == nullptr) + { + for (unsigned i = 0; i < NUM_TRELLIS_STATE; i++) + { + m_statePtRDOQ[i] = new uint8_t[MAX_CU_BLKSIZE_PLT*MAX_CU_BLKSIZE_PLT]; + } + } + } } ////////////////////////////////////////////////////////////////////////// // INTRA PREDICTION ////////////////////////////////////////////////////////////////////////// +static constexpr double COST_UNKNOWN = -65536.0; + +double IntraSearch::findInterCUCost( CodingUnit &cu ) +{ + if( cu.isConsIntra() && !cu.slice->isIntra() ) + { + //search corresponding inter CU cost + for( int i = 0; i < m_numCuInSCIPU; i++ ) + { + if( cu.lumaPos() == m_cuAreaInSCIPU[i].pos() && cu.lumaSize() == m_cuAreaInSCIPU[i].size() ) + { + return m_cuCostInSCIPU[i]; + } + } + } + return COST_UNKNOWN; +} -void IntraSearch::estIntraPredLumaQT( CodingUnit &cu, Partitioner &partitioner, const double bestCostSoFar ) +bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, const double bestCostSoFar, bool mtsCheckRangeFlag, int mtsFirstCheckId, int mtsLastCheckId, bool moreProbMTSIdxFirst, CodingStructure* bestCS) { CodingStructure &cs = *cu.cs; const SPS &sps = *cs.sps; - const uint32_t uiWidthBit = g_aucLog2[partitioner.currArea().lwidth() ]; - const uint32_t uiHeightBit = g_aucLog2[partitioner.currArea().lheight()]; + const uint32_t uiWidthBit = floorLog2(partitioner.currArea().lwidth() ); + const uint32_t uiHeightBit = floorLog2(partitioner.currArea().lheight()); // Lambda calculation at equivalent Qp of 4 is recommended because at that Qp, the quantization divisor is 1. - const double sqrtLambdaForFirstPass = m_pcRdCost->getMotionLambda(cu.transQuantBypass) / double(1 << SCALE_BITS); - + const double sqrtLambdaForFirstPass = m_pcRdCost->getMotionLambda( ) * FRAC_BITS_SCALE; //===== loop over partitions ===== const TempCtx ctxStart ( m_CtxCache, m_CABACEstimator->getCtx() ); + const TempCtx ctxStartMipFlag ( m_CtxCache, SubCtx( Ctx::MipFlag, m_CABACEstimator->getCtx() ) ); + const TempCtx ctxStartIspMode ( m_CtxCache, SubCtx( Ctx::ISPMode, m_CABACEstimator->getCtx() ) ); + const TempCtx ctxStartPlanarFlag ( m_CtxCache, SubCtx( Ctx::IntraLumaPlanarFlag, m_CABACEstimator->getCtx() ) ); const TempCtx ctxStartIntraMode(m_CtxCache, SubCtx(Ctx::IntraLumaMpmFlag, m_CABACEstimator->getCtx())); - const TempCtx ctxStartMHIntraMode ( m_CtxCache, SubCtx( Ctx::MHIntraPredMode, m_CABACEstimator->getCtx() ) ); const TempCtx ctxStartMrlIdx ( m_CtxCache, SubCtx( Ctx::MultiRefLineIdx, m_CABACEstimator->getCtx() ) ); CHECK( !cu.firstPU, "CU has no PUs" ); const bool keepResi = cs.pps->getPpsRangeExtension().getCrossComponentPredictionEnabledFlag() || KEEP_PRED_AND_RESI_SIGNALS; + // variables for saving fast intra modes scan results across multiple LFNST passes + bool LFNSTLoadFlag = sps.getUseLFNST() && cu.lfnstIdx != 0; + bool LFNSTSaveFlag = sps.getUseLFNST() && cu.lfnstIdx == 0; + + LFNSTSaveFlag &= sps.getUseIntraMTS() ? cu.mtsFlag == 0 : true; + + const uint32_t lfnstIdx = cu.lfnstIdx; + double costInterCU = findInterCUCost( cu ); + + const int width = partitioner.currArea().lwidth(); + const int height = partitioner.currArea().lheight(); + + // Marking MTS usage for faster MTS + // 0: MTS is either not applicable for current CU (cuWidth > MTS_INTRA_MAX_CU_SIZE or cuHeight > MTS_INTRA_MAX_CU_SIZE), not active in the config file or the fast decision algorithm is not used in this case + // 1: MTS fast algorithm can be applied for the current CU, and the DCT2 is being checked + // 2: MTS is being checked for current CU. Stored results of DCT2 can be utilized for speedup + uint8_t mtsUsageFlag = 0; + const int maxSizeEMT = MTS_INTRA_MAX_CU_SIZE; + if( width <= maxSizeEMT && height <= maxSizeEMT && sps.getUseIntraMTS() ) + { + mtsUsageFlag = ( sps.getUseLFNST() && cu.mtsFlag == 1 ) ? 2 : 1; + } + + if( width * height < 64 && !m_pcEncCfg->getUseFastLFNST() ) + { + mtsUsageFlag = 0; + } - uint32_t extraModes = 0; // add two extra modes, which would be used after uiMode <= DC_IDX is removed for cu.nsstIdx == 3 + const bool colorTransformIsEnabled = sps.getUseColorTrans() && !CS::isDualITree(cs); + const bool isFirstColorSpace = colorTransformIsEnabled && ((m_pcEncCfg->getRGBFormatFlag() && cu.colorTransform) || (!m_pcEncCfg->getRGBFormatFlag() && !cu.colorTransform)); + const bool isSecondColorSpace = colorTransformIsEnabled && ((m_pcEncCfg->getRGBFormatFlag() && !cu.colorTransform) || (!m_pcEncCfg->getRGBFormatFlag() && cu.colorTransform)); - const int width = partitioner.currArea().lwidth(); - const int height = partitioner.currArea().lheight(); - int nOptionsForISP = NUM_INTRA_SUBPARTITIONS_MODES; double bestCurrentCost = bestCostSoFar; + bool ispCanBeUsed = sps.getUseISP() && cu.mtsFlag == 0 && cu.lfnstIdx == 0 && CU::canUseISP(width, height, cu.cs->sps->getMaxTbSize()); + bool saveDataForISP = ispCanBeUsed && (!colorTransformIsEnabled || isFirstColorSpace); + bool testISP = ispCanBeUsed && (!colorTransformIsEnabled || !cu.colorTransform); - int ispOptions[NUM_INTRA_SUBPARTITIONS_MODES] = { 0 }; - if( nOptionsForISP > 1 ) + if ( saveDataForISP ) { -#if MAX_TB_SIZE_SIGNALLING - auto splitsThatCanBeUsedForISP = CU::canUseISPSplit( width, height, cu.cs->sps->getMaxTbSize() ); -#else - auto splitsThatCanBeUsedForISP = CU::canUseISPSplit( width, height, MAX_TB_SIZEY ); -#endif - if( splitsThatCanBeUsedForISP == CAN_USE_VER_AND_HORL_SPLITS ) - { - const CodingUnit* cuLeft = cu.ispMode != NOT_INTRA_SUBPARTITIONS ? cs.getCU( cs.area.blocks[partitioner.chType].pos().offset( -1, 0 ), partitioner.chType ) : nullptr; - const CodingUnit* cuAbove = cu.ispMode != NOT_INTRA_SUBPARTITIONS ? cs.getCU( cs.area.blocks[partitioner.chType].pos().offset( 0, -1 ), partitioner.chType ) : nullptr; - bool ispHorIsFirstTest = CU::firstTestISPHorSplit( width, height, COMPONENT_Y, cuLeft, cuAbove ); - if( ispHorIsFirstTest ) - { - ispOptions[1] = HOR_INTRA_SUBPARTITIONS; - ispOptions[2] = VER_INTRA_SUBPARTITIONS; - } - else - { - ispOptions[1] = VER_INTRA_SUBPARTITIONS; - ispOptions[2] = HOR_INTRA_SUBPARTITIONS; - } - } - else if( splitsThatCanBeUsedForISP == HOR_INTRA_SUBPARTITIONS ) - { - nOptionsForISP = 2; - ispOptions[1] = HOR_INTRA_SUBPARTITIONS; - } - else if( splitsThatCanBeUsedForISP == VER_INTRA_SUBPARTITIONS ) - { - nOptionsForISP = 2; - ispOptions[1] = VER_INTRA_SUBPARTITIONS; - } - else - { - nOptionsForISP = 1; - } + //reset the intra modes lists variables + m_ispCandListHor.clear(); + m_ispCandListVer.clear(); } - if( nOptionsForISP > 1 ) + if( testISP ) { - //variables for the full RD list without MRL modes - m_rdModeListWithoutMrl .clear(); - m_rdModeListWithoutMrlHor .clear(); - m_rdModeListWithoutMrlVer .clear(); - //variables with data from regular intra used to skip ISP splits - m_intraModeDiagRatio .clear(); - m_intraModeHorVerRatio .clear(); - m_intraModeTestedNormalIntra.clear(); + //reset the variables used for the tests + m_regIntraRDListWithCosts.clear(); + int numTotalPartsHor = (int)width >> floorLog2(CU::getISPSplitDim(width, height, TU_1D_VERT_SPLIT)); + int numTotalPartsVer = (int)height >> floorLog2(CU::getISPSplitDim(width, height, TU_1D_HORZ_SPLIT)); + m_ispTestedModes[0].init( numTotalPartsHor, numTotalPartsVer ); + //the total number of subpartitions is modified to take into account the cases where LFNST cannot be combined with ISP due to size restrictions + numTotalPartsHor = sps.getUseLFNST() && CU::canUseLfnstWithISP(cu.Y(), HOR_INTRA_SUBPARTITIONS) ? numTotalPartsHor : 0; + numTotalPartsVer = sps.getUseLFNST() && CU::canUseLfnstWithISP(cu.Y(), VER_INTRA_SUBPARTITIONS) ? numTotalPartsVer : 0; + for (int j = 1; j < NUM_LFNST_NUM_PER_SET; j++) + { + m_ispTestedModes[j].init(numTotalPartsHor, numTotalPartsVer); + } } - static_vector<uint32_t, FAST_UDI_MAX_RDMODE_NUM> uiHadModeList; + const bool testBDPCM = (sps.getBDPCMEnabled()!=0) && CU::bdpcmAllowed(cu, ComponentID(partitioner.chType)) && cu.mtsFlag == 0 && cu.lfnstIdx == 0; + static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM> uiHadModeList; static_vector<double, FAST_UDI_MAX_RDMODE_NUM> CandCostList; static_vector<double, FAST_UDI_MAX_RDMODE_NUM> CandHadList; - static_vector<int, FAST_UDI_MAX_RDMODE_NUM> extendRefList; - static_vector<int, FAST_UDI_MAX_RDMODE_NUM>* nullList = NULL; - auto &pu = *cu.firstPU; + bool validReturn = false; { CandHadList.clear(); CandCostList.clear(); uiHadModeList.clear(); - extendRefList.clear(); CHECK(pu.cu != &cu, "PU is not contained in the CU"); //===== determine set of modes to be tested (using prediction signal only) ===== int numModesAvailable = NUM_LUMA_MODE; // total number of Intra modes - static_vector< uint32_t, FAST_UDI_MAX_RDMODE_NUM > uiRdModeList; + const bool fastMip = sps.getUseMIP() && m_pcEncCfg->getUseFastMIP(); + const bool mipAllowed = sps.getUseMIP() && isLuma(partitioner.chType) && ((cu.lfnstIdx == 0) || allowLfnstWithMip(cu.firstPU->lumaSize())); + const bool testMip = mipAllowed && !(cu.lwidth() > (8 * cu.lheight()) || cu.lheight() > (8 * cu.lwidth())); + const bool supportedMipBlkSize = pu.lwidth() <= MIP_MAX_WIDTH && pu.lheight() <= MIP_MAX_HEIGHT; + + static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM> uiRdModeList; int numModesForFullRD = 3; numModesForFullRD = g_aucIntraModeNumFast_UseMPM_2D[uiWidthBit - MIN_CU_LOG2][uiHeightBit - MIN_CU_LOG2]; @@ -373,383 +478,507 @@ void IntraSearch::estIntraPredLumaQT( CodingUnit &cu, Partitioner &partitioner, numModesForFullRD = numModesAvailable; #endif + if (isSecondColorSpace) { - // this should always be true - CHECK( !pu.Y().valid(), "PU is not valid" ); -#if ENABLE_JVET_L0283_MRL - bool isFirstLineOfCtu = (((pu.block(COMPONENT_Y).y)&((pu.cs->sps)->getMaxCUWidth() - 1)) == 0); - int numOfPassesExtendRef = (isFirstLineOfCtu ? 1 : MRL_NUM_REF_LINES); -#endif - pu.multiRefIdx = 0; - - //===== init pattern for luma prediction ===== - initIntraPatternChType( cu, pu.Y(), IntraPrediction::useFilteredIntraRefSamples( COMPONENT_Y, pu, false, pu ) ); - if( numModesForFullRD != numModesAvailable ) + uiRdModeList.clear(); + if (m_numSavedRdModeFirstColorSpace[m_savedRdModeIdx] > 0) + { + for (int i = 0; i < m_numSavedRdModeFirstColorSpace[m_savedRdModeIdx]; i++) + { + uiRdModeList.push_back(m_savedRdModeFirstColorSpace[m_savedRdModeIdx][i]); + } + } + else { - CHECK( numModesForFullRD >= numModesAvailable, "Too many modes for full RD search" ); + return false; + } + } + else + { + if (mtsUsageFlag != 2) + { + // this should always be true + CHECK(!pu.Y().valid(), "PU is not valid"); + bool isFirstLineOfCtu = (((pu.block(COMPONENT_Y).y) & ((pu.cs->sps)->getMaxCUWidth() - 1)) == 0); + int numOfPassesExtendRef = ((!sps.getUseMRL() || isFirstLineOfCtu) ? 1 : MRL_NUM_REF_LINES); + pu.multiRefIdx = 0; + + if (numModesForFullRD != numModesAvailable) + { + CHECK(numModesForFullRD >= numModesAvailable, "Too many modes for full RD search"); - const CompArea &area = pu.Y(); + const CompArea &area = pu.Y(); - PelBuf piOrg = cs.getOrgBuf(area); - PelBuf piPred = cs.getPredBuf(area); + PelBuf piOrg = cs.getOrgBuf(area); + PelBuf piPred = cs.getPredBuf(area); - DistParam distParam; + DistParam distParamSad; + DistParam distParamHad; + if (cu.slice->getPicHeader()->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag()) + { + CompArea tmpArea(COMPONENT_Y, area.chromaFormat, Position(0, 0), area.size()); + PelBuf tmpOrg = m_tmpStorageLCU.getBuf(tmpArea); + tmpOrg.copyFrom(piOrg); + tmpOrg.rspSignal(m_pcReshape->getFwdLUT()); + m_pcRdCost->setDistParam(distParamSad, tmpOrg, piPred, sps.getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, + false); // Use SAD cost + m_pcRdCost->setDistParam(distParamHad, tmpOrg, piPred, sps.getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, + true); // Use HAD (SATD) cost + } + else + { + m_pcRdCost->setDistParam(distParamSad, piOrg, piPred, sps.getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, + false); // Use SAD cost + m_pcRdCost->setDistParam(distParamHad, piOrg, piPred, sps.getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, + true); // Use HAD (SATD) cost + } - const bool bUseHadamard = cu.transQuantBypass == 0; + distParamSad.applyWeight = false; + distParamHad.applyWeight = false; - if (cu.slice->getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag()) - { - CompArea tmpArea(COMPONENT_Y, area.chromaFormat, Position(0, 0), area.size()); - PelBuf tmpOrg = m_tmpStorageLCU.getBuf(tmpArea); - tmpOrg.copyFrom(piOrg); - tmpOrg.rspSignal(m_pcReshape->getFwdLUT()); - m_pcRdCost->setDistParam(distParam, tmpOrg, piPred, sps.getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, bUseHadamard); - } - else - m_pcRdCost->setDistParam(distParam, piOrg, piPred, sps.getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, bUseHadamard); + if (testMip && supportedMipBlkSize) + { + numModesForFullRD += fastMip + ? std::max(numModesForFullRD, floorLog2(std::min(pu.lwidth(), pu.lheight())) - 1) + : numModesForFullRD; + } + const int numHadCand = (testMip ? 2 : 1) * 3; - distParam.applyWeight = false; + //*** Derive (regular) candidates using Hadamard + cu.mipFlag = false; - bool bSatdChecked[NUM_INTRA_MODE]; - memset( bSatdChecked, 0, sizeof( bSatdChecked ) ); + //===== init pattern for luma prediction ===== + initIntraPatternChType(cu, pu.Y(), true); + bool bSatdChecked[NUM_INTRA_MODE]; + memset(bSatdChecked, 0, sizeof(bSatdChecked)); - { - for( int modeIdx = 0; modeIdx < numModesAvailable; modeIdx++ ) + if (!LFNSTLoadFlag) { - uint32_t uiMode = modeIdx; - Distortion uiSad = 0; - - // Skip checking extended Angular modes in the first round of SATD - if( uiMode > DC_IDX && ( uiMode & 1 ) ) + for (int modeIdx = 0; modeIdx < numModesAvailable; modeIdx++) { - continue; - } - - bSatdChecked[uiMode] = true; + uint32_t uiMode = modeIdx; + Distortion minSadHad = 0; - pu.intraDir[0] = modeIdx; + // Skip checking extended Angular modes in the first round of SATD + if (uiMode > DC_IDX && (uiMode & 1)) + { + continue; + } - if( useDPCMForFirstPassIntraEstimation( pu, uiMode ) ) - { - encPredIntraDPCM( COMPONENT_Y, piOrg, piPred, uiMode ); - } - else - { - predIntraAng( COMPONENT_Y, piPred, pu, IntraPrediction::useFilteredIntraRefSamples( COMPONENT_Y, pu, true, pu ) ); - } - // use Hadamard transform here - uiSad += distParam.distFunc(distParam); + bSatdChecked[uiMode] = true; - // NB xFracModeBitsIntra will not affect the mode for chroma that may have already been pre-estimated. - m_CABACEstimator->getCtx() = SubCtx(Ctx::IntraLumaMpmFlag, ctxStartIntraMode); - m_CABACEstimator->getCtx() = SubCtx( Ctx::MHIntraPredMode, ctxStartMHIntraMode ); - m_CABACEstimator->getCtx() = SubCtx( Ctx::MultiRefLineIdx, ctxStartMrlIdx ); + pu.intraDir[0] = modeIdx; - uint64_t fracModeBits = xFracModeBitsIntra(pu, uiMode, CHANNEL_TYPE_LUMA); + initPredIntraParams(pu, pu.Y(), sps); + predIntraAng(COMPONENT_Y, piPred, pu); + // Use the min between SAD and HAD as the cost criterion + // SAD is scaled by 2 to align with the scaling of HAD + minSadHad += std::min(distParamSad.distFunc(distParamSad) * 2, distParamHad.distFunc(distParamHad)); - double cost = ( double ) uiSad + ( double ) fracModeBits * sqrtLambdaForFirstPass; + // NB xFracModeBitsIntra will not affect the mode for chroma that may have already been pre-estimated. + m_CABACEstimator->getCtx() = SubCtx( Ctx::MipFlag, ctxStartMipFlag ); + m_CABACEstimator->getCtx() = SubCtx( Ctx::ISPMode, ctxStartIspMode ); + m_CABACEstimator->getCtx() = SubCtx(Ctx::IntraLumaPlanarFlag, ctxStartPlanarFlag); + m_CABACEstimator->getCtx() = SubCtx(Ctx::IntraLumaMpmFlag, ctxStartIntraMode); + m_CABACEstimator->getCtx() = SubCtx( Ctx::MultiRefLineIdx, ctxStartMrlIdx ); - DTRACE( g_trace_ctx, D_INTRA_COST, "IntraHAD: %u, %llu, %f (%d)\n", uiSad, fracModeBits, cost, uiMode ); + uint64_t fracModeBits = xFracModeBitsIntra(pu, uiMode, CHANNEL_TYPE_LUMA); - updateCandList( uiMode, cost, uiRdModeList, CandCostList - , extendRefList, 0 - , numModesForFullRD + extraModes ); - updateCandList(uiMode, (double) uiSad, uiHadModeList, CandHadList - , *nullList, -1 - , 3 + extraModes); - } - } // NSSTFlag + double cost = (double) minSadHad + (double) fracModeBits * sqrtLambdaForFirstPass; - // forget the extra modes - uiRdModeList.resize( numModesForFullRD ); - CandCostList.resize(numModesForFullRD); - extendRefList.resize(numModesForFullRD); - static_vector<unsigned, FAST_UDI_MAX_RDMODE_NUM> parentCandList(FAST_UDI_MAX_RDMODE_NUM); - std::copy_n(uiRdModeList.begin(), numModesForFullRD, parentCandList.begin()); + DTRACE(g_trace_ctx, D_INTRA_COST, "IntraHAD: %u, %llu, %f (%d)\n", minSadHad, fracModeBits, cost, uiMode); - // Second round of SATD for extended Angular modes - for (int modeIdx = 0; modeIdx < numModesForFullRD; modeIdx++) - { - unsigned parentMode = parentCandList[modeIdx]; - if (parentMode > (DC_IDX + 1) && parentMode < (NUM_LUMA_MODE - 1)) + updateCandList(ModeInfo(false, false, 0, NOT_INTRA_SUBPARTITIONS, uiMode), cost, uiRdModeList, + CandCostList, numModesForFullRD); + updateCandList(ModeInfo(false, false, 0, NOT_INTRA_SUBPARTITIONS, uiMode), double(minSadHad), + uiHadModeList, CandHadList, numHadCand); + } + if (!sps.getUseMIP() && LFNSTSaveFlag) + { + // save found best modes + m_uiSavedNumRdModesLFNST = numModesForFullRD; + m_uiSavedRdModeListLFNST = uiRdModeList; + m_dSavedModeCostLFNST = CandCostList; + // PBINTRA fast + m_uiSavedHadModeListLFNST = uiHadModeList; + m_dSavedHadListLFNST = CandHadList; + LFNSTSaveFlag = false; + } + } // NSSTFlag + if (!sps.getUseMIP() && LFNSTLoadFlag) + { + // restore saved modes + numModesForFullRD = m_uiSavedNumRdModesLFNST; + uiRdModeList = m_uiSavedRdModeListLFNST; + CandCostList = m_dSavedModeCostLFNST; + // PBINTRA fast + uiHadModeList = m_uiSavedHadModeListLFNST; + CandHadList = m_dSavedHadListLFNST; + } // !LFNSTFlag + + if (!(sps.getUseMIP() && LFNSTLoadFlag)) { - for (int subModeIdx = -1; subModeIdx <= 1; subModeIdx += 2) + static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM> parentCandList = uiRdModeList; + + // Second round of SATD for extended Angular modes + for (int modeIdx = 0; modeIdx < numModesForFullRD; modeIdx++) { - unsigned mode = parentMode + subModeIdx; + unsigned parentMode = parentCandList[modeIdx].modeId; + if (parentMode > (DC_IDX + 1) && parentMode < (NUM_LUMA_MODE - 1)) + { + for (int subModeIdx = -1; subModeIdx <= 1; subModeIdx += 2) + { + unsigned mode = parentMode + subModeIdx; + if (!bSatdChecked[mode]) + { + pu.intraDir[0] = mode; - if (!bSatdChecked[mode]) - { - pu.intraDir[0] = mode; + initPredIntraParams(pu, pu.Y(), sps); + predIntraAng(COMPONENT_Y, piPred, pu); - if (useDPCMForFirstPassIntraEstimation(pu, mode)) - { - encPredIntraDPCM(COMPONENT_Y, piOrg, piPred, mode); - } - else - { - predIntraAng(COMPONENT_Y, piPred, pu, - IntraPrediction::useFilteredIntraRefSamples(COMPONENT_Y, pu, true, pu)); - } - // use Hadamard transform here - Distortion sad = distParam.distFunc(distParam); + // Use the min between SAD and SATD as the cost criterion + // SAD is scaled by 2 to align with the scaling of HAD + Distortion minSadHad = + std::min(distParamSad.distFunc(distParamSad) * 2, distParamHad.distFunc(distParamHad)); - // NB xFracModeBitsIntra will not affect the mode for chroma that may have already been pre-estimated. - m_CABACEstimator->getCtx() = SubCtx(Ctx::IntraLumaMpmFlag, ctxStartIntraMode); - m_CABACEstimator->getCtx() = SubCtx( Ctx::MHIntraPredMode, ctxStartMHIntraMode ); - m_CABACEstimator->getCtx() = SubCtx( Ctx::MultiRefLineIdx, ctxStartMrlIdx ); + // NB xFracModeBitsIntra will not affect the mode for chroma that may have already been + // pre-estimated. + m_CABACEstimator->getCtx() = SubCtx(Ctx::MipFlag, ctxStartMipFlag); + m_CABACEstimator->getCtx() = SubCtx(Ctx::ISPMode, ctxStartIspMode); + m_CABACEstimator->getCtx() = SubCtx(Ctx::IntraLumaPlanarFlag, ctxStartPlanarFlag); + m_CABACEstimator->getCtx() = SubCtx(Ctx::IntraLumaMpmFlag, ctxStartIntraMode); + m_CABACEstimator->getCtx() = SubCtx(Ctx::MultiRefLineIdx, ctxStartMrlIdx); - uint64_t fracModeBits = xFracModeBitsIntra(pu, mode, CHANNEL_TYPE_LUMA); + uint64_t fracModeBits = xFracModeBitsIntra(pu, mode, CHANNEL_TYPE_LUMA); - double cost = (double) sad + (double) fracModeBits * sqrtLambdaForFirstPass; + double cost = (double) minSadHad + (double) fracModeBits * sqrtLambdaForFirstPass; - updateCandList(mode, cost, uiRdModeList, CandCostList - , extendRefList, 0 - , numModesForFullRD); - updateCandList(mode, (double)sad, uiHadModeList, CandHadList - , *nullList, -1 - , 3); + updateCandList(ModeInfo(false, false, 0, NOT_INTRA_SUBPARTITIONS, mode), cost, uiRdModeList, + CandCostList, numModesForFullRD); + updateCandList(ModeInfo(false, false, 0, NOT_INTRA_SUBPARTITIONS, mode), double(minSadHad), + uiHadModeList, CandHadList, numHadCand); - bSatdChecked[mode] = true; + bSatdChecked[mode] = true; + } + } } } - } - } - if( nOptionsForISP > 1 ) - { - //we save the list with no mrl modes to keep only the Hadamard selected modes (no mpms) - m_rdModeListWithoutMrl.resize( numModesForFullRD ); - std::copy_n( uiRdModeList.begin(), numModesForFullRD, m_rdModeListWithoutMrl.begin() ); - } -#if ENABLE_JVET_L0283_MRL - pu.multiRefIdx = 1; - const int numMPMs = NUM_MOST_PROBABLE_MODES; - unsigned multiRefMPM [numMPMs]; - PU::getIntraMPMs(pu, multiRefMPM); - for (int mRefNum = 1; mRefNum < numOfPassesExtendRef; mRefNum++) - { - int multiRefIdx = MULTI_REF_LINE_IDX[mRefNum]; - - pu.multiRefIdx = multiRefIdx; - { - initIntraPatternChType(cu, pu.Y(), IntraPrediction::useFilteredIntraRefSamples(COMPONENT_Y, pu, false, pu)); - } - for (int x = 0; x < numMPMs; x++) - { - uint32_t mode = multiRefMPM[x]; + if (saveDataForISP) { - pu.intraDir[0] = mode; + // we save the regular intra modes list + m_ispCandListHor = uiRdModeList; + } + pu.multiRefIdx = 1; + const int numMPMs = NUM_MOST_PROBABLE_MODES; + unsigned multiRefMPM[numMPMs]; + PU::getIntraMPMs(pu, multiRefMPM); + for (int mRefNum = 1; mRefNum < numOfPassesExtendRef; mRefNum++) + { + int multiRefIdx = MULTI_REF_LINE_IDX[mRefNum]; - if (useDPCMForFirstPassIntraEstimation(pu, mode)) + pu.multiRefIdx = multiRefIdx; { - encPredIntraDPCM(COMPONENT_Y, piOrg, piPred, mode); + initIntraPatternChType(cu, pu.Y(), true); } - else + for (int x = 1; x < numMPMs; x++) { - predIntraAng(COMPONENT_Y, piPred, pu, IntraPrediction::useFilteredIntraRefSamples(COMPONENT_Y, pu, true, pu)); + uint32_t mode = multiRefMPM[x]; + { + pu.intraDir[0] = mode; + initPredIntraParams(pu, pu.Y(), sps); + + predIntraAng(COMPONENT_Y, piPred, pu); + + // Use the min between SAD and SATD as the cost criterion + // SAD is scaled by 2 to align with the scaling of HAD + Distortion minSadHad = + std::min(distParamSad.distFunc(distParamSad) * 2, distParamHad.distFunc(distParamHad)); + + // NB xFracModeBitsIntra will not affect the mode for chroma that may have already been pre-estimated. + m_CABACEstimator->getCtx() = SubCtx(Ctx::MipFlag, ctxStartMipFlag); + m_CABACEstimator->getCtx() = SubCtx(Ctx::ISPMode, ctxStartIspMode); + m_CABACEstimator->getCtx() = SubCtx(Ctx::IntraLumaPlanarFlag, ctxStartPlanarFlag); + m_CABACEstimator->getCtx() = SubCtx(Ctx::IntraLumaMpmFlag, ctxStartIntraMode); + m_CABACEstimator->getCtx() = SubCtx(Ctx::MultiRefLineIdx, ctxStartMrlIdx); + + uint64_t fracModeBits = xFracModeBitsIntra(pu, mode, CHANNEL_TYPE_LUMA); + + double cost = (double) minSadHad + (double) fracModeBits * sqrtLambdaForFirstPass; + updateCandList(ModeInfo(false, false, multiRefIdx, NOT_INTRA_SUBPARTITIONS, mode), cost, uiRdModeList, + CandCostList, numModesForFullRD); + updateCandList(ModeInfo(false, false, multiRefIdx, NOT_INTRA_SUBPARTITIONS, mode), double(minSadHad), + uiHadModeList, CandHadList, numHadCand); + } } + } + CHECKD(uiRdModeList.size() != numModesForFullRD, "Error: RD mode list size"); - // use Hadamard transform here - Distortion sad = distParam.distFunc(distParam); + if (LFNSTSaveFlag && testMip + && !allowLfnstWithMip(cu.firstPU->lumaSize())) // save a different set for the next run + { + // save found best modes + m_uiSavedRdModeListLFNST = uiRdModeList; + m_dSavedModeCostLFNST = CandCostList; + // PBINTRA fast + m_uiSavedHadModeListLFNST = uiHadModeList; + m_dSavedHadListLFNST = CandHadList; + m_uiSavedNumRdModesLFNST = + g_aucIntraModeNumFast_UseMPM_2D[uiWidthBit - MIN_CU_LOG2][uiHeightBit - MIN_CU_LOG2]; + m_uiSavedRdModeListLFNST.resize(m_uiSavedNumRdModesLFNST); + m_dSavedModeCostLFNST.resize(m_uiSavedNumRdModesLFNST); + // PBINTRA fast + m_uiSavedHadModeListLFNST.resize(3); + m_dSavedHadListLFNST.resize(3); + LFNSTSaveFlag = false; + } + //*** Derive MIP candidates using Hadamard + if (testMip && !supportedMipBlkSize) + { + // avoid estimation for unsupported blk sizes + const int transpOff = getNumModesMip(pu.Y()); + const int numModesFull = (transpOff << 1); + for (uint32_t uiModeFull = 0; uiModeFull < numModesFull; uiModeFull++) + { + const bool isTransposed = (uiModeFull >= transpOff ? true : false); + const uint32_t uiMode = (isTransposed ? uiModeFull - transpOff : uiModeFull); - // NB xFracModeBitsIntra will not affect the mode for chroma that may have already been pre-estimated. - m_CABACEstimator->getCtx() = SubCtx(Ctx::IntraLumaMpmFlag, ctxStartIntraMode); - m_CABACEstimator->getCtx() = SubCtx( Ctx::MHIntraPredMode, ctxStartMHIntraMode ); - m_CABACEstimator->getCtx() = SubCtx( Ctx::MultiRefLineIdx, ctxStartMrlIdx ); + numModesForFullRD++; + uiRdModeList.push_back(ModeInfo(true, isTransposed, 0, NOT_INTRA_SUBPARTITIONS, uiMode)); + CandCostList.push_back(0); + } + } + else if (testMip) + { + cu.mipFlag = true; + pu.multiRefIdx = 0; - uint64_t fracModeBits = xFracModeBitsIntra(pu, mode, CHANNEL_TYPE_LUMA); + double mipHadCost[MAX_NUM_MIP_MODE] = { MAX_DOUBLE }; - double cost = (double)sad + (double)fracModeBits * sqrtLambdaForFirstPass; - updateCandList(mode, cost, uiRdModeList, CandCostList, extendRefList, multiRefIdx, numModesForFullRD); - } - } - } -#endif - CandCostList.resize(numModesForFullRD); - extendRefList.resize(numModesForFullRD); - if( m_pcEncCfg->getFastUDIUseMPMEnabled() ) - { - const int numMPMs = NUM_MOST_PROBABLE_MODES; - unsigned uiPreds[numMPMs]; + initIntraPatternChType(cu, pu.Y()); + initIntraMip(pu, pu.Y()); - pu.multiRefIdx = 0; + const int transpOff = getNumModesMip(pu.Y()); + const int numModesFull = (transpOff << 1); + for (uint32_t uiModeFull = 0; uiModeFull < numModesFull; uiModeFull++) + { + const bool isTransposed = (uiModeFull >= transpOff ? true : false); + const uint32_t uiMode = (isTransposed ? uiModeFull - transpOff : uiModeFull); - const int numCand = PU::getIntraMPMs( pu, uiPreds ); + pu.mipTransposedFlag = isTransposed; + pu.intraDir[CHANNEL_TYPE_LUMA] = uiMode; + predIntraMip(COMPONENT_Y, piPred, pu); - for( int j = 0; j < numCand; j++ ) - { - bool mostProbableModeIncluded = false; - int mostProbableMode = uiPreds[j]; + // Use the min between SAD and HAD as the cost criterion + // SAD is scaled by 2 to align with the scaling of HAD + Distortion minSadHad = + std::min(distParamSad.distFunc(distParamSad) * 2, distParamHad.distFunc(distParamHad)); + m_CABACEstimator->getCtx() = SubCtx(Ctx::MipFlag, ctxStartMipFlag); - for( int i = 0; i < numModesForFullRD; i++ ) - { - mostProbableModeIncluded |= (mostProbableMode == uiRdModeList[i] && extendRefList[i] == 0); + uint64_t fracModeBits = xFracModeBitsIntra(pu, uiMode, CHANNEL_TYPE_LUMA); + + double cost = double(minSadHad) + double(fracModeBits) * sqrtLambdaForFirstPass; + mipHadCost[uiModeFull] = cost; + DTRACE(g_trace_ctx, D_INTRA_COST, "IntraMIP: %u, %llu, %f (%d)\n", minSadHad, fracModeBits, cost, + uiModeFull); + + updateCandList(ModeInfo(true, isTransposed, 0, NOT_INTRA_SUBPARTITIONS, uiMode), cost, uiRdModeList, + CandCostList, numModesForFullRD + 1); + updateCandList(ModeInfo(true, isTransposed, 0, NOT_INTRA_SUBPARTITIONS, uiMode), + 0.8 * double(minSadHad), uiHadModeList, CandHadList, numHadCand); + } + + const double thresholdHadCost = 1.0 + 1.4 / sqrt((double) (pu.lwidth() * pu.lheight())); + reduceHadCandList(uiRdModeList, CandCostList, numModesForFullRD, thresholdHadCost, mipHadCost, pu, + fastMip); } - if( !mostProbableModeIncluded ) + if (sps.getUseMIP() && LFNSTSaveFlag) { - extendRefList.push_back(0); - numModesForFullRD++; - uiRdModeList.push_back( mostProbableMode ); + // save found best modes + m_uiSavedNumRdModesLFNST = numModesForFullRD; + m_uiSavedRdModeListLFNST = uiRdModeList; + m_dSavedModeCostLFNST = CandCostList; + // PBINTRA fast + m_uiSavedHadModeListLFNST = uiHadModeList; + m_dSavedHadListLFNST = CandHadList; + LFNSTSaveFlag = false; } } - if( nOptionsForISP > 1 ) + else // if( sps.getUseMIP() && LFNSTLoadFlag) { - //we add the ISP MPMs to the list without mrl modes - m_rdModeListWithoutMrlHor = m_rdModeListWithoutMrl; - m_rdModeListWithoutMrlVer = m_rdModeListWithoutMrl; - static_vector<uint32_t, FAST_UDI_MAX_RDMODE_NUM>* listPointer; - for( int k = 1; k < nOptionsForISP; k++ ) + // restore saved modes + numModesForFullRD = m_uiSavedNumRdModesLFNST; + uiRdModeList = m_uiSavedRdModeListLFNST; + CandCostList = m_dSavedModeCostLFNST; + // PBINTRA fast + uiHadModeList = m_uiSavedHadModeListLFNST; + CandHadList = m_dSavedHadListLFNST; + } + + if (m_pcEncCfg->getFastUDIUseMPMEnabled()) + { + const int numMPMs = NUM_MOST_PROBABLE_MODES; + unsigned uiPreds[numMPMs]; + + pu.multiRefIdx = 0; + + const int numCand = PU::getIntraMPMs(pu, uiPreds); + + for (int j = 0; j < numCand; j++) + { + bool mostProbableModeIncluded = false; + ModeInfo mostProbableMode( false, false, 0, NOT_INTRA_SUBPARTITIONS, uiPreds[j] ); + + for (int i = 0; i < numModesForFullRD; i++) + { + mostProbableModeIncluded |= (mostProbableMode == uiRdModeList[i]); + } + if (!mostProbableModeIncluded) + { + numModesForFullRD++; + uiRdModeList.push_back(mostProbableMode); + CandCostList.push_back(0); + } + } + if (saveDataForISP) { - cu.ispMode = ispOptions[k]; - listPointer = &( cu.ispMode == HOR_INTRA_SUBPARTITIONS ? m_rdModeListWithoutMrlHor : m_rdModeListWithoutMrlVer ); - const int numCandISP = PU::getIntraMPMs( pu, uiPreds ); - for( int j = 0; j < numCandISP; j++ ) + // we add the MPMs to the list that contains only regular intra modes + for (int j = 0; j < numCand; j++) { - bool mostProbableModeIncluded = false; - int mostProbableMode = uiPreds[j]; + bool mostProbableModeIncluded = false; + ModeInfo mostProbableMode(false, false, 0, NOT_INTRA_SUBPARTITIONS, uiPreds[j]); - for( int i = 0; i < listPointer->size(); i++ ) + for (int i = 0; i < m_ispCandListHor.size(); i++) { - mostProbableModeIncluded |= ( mostProbableMode == listPointer->at( i ) ); + mostProbableModeIncluded |= (mostProbableMode == m_ispCandListHor[i]); } - if( !mostProbableModeIncluded ) + if (!mostProbableModeIncluded) { - listPointer->push_back( mostProbableMode ); + m_ispCandListHor.push_back(mostProbableMode); } } } - cu.ispMode = NOT_INTRA_SUBPARTITIONS; } } - } - else - { - for( int i = 0; i < numModesForFullRD; i++ ) + else + { + THROW("Full search not supported for MIP"); + } + if (sps.getUseLFNST() && mtsUsageFlag == 1) { - uiRdModeList.push_back( i ); + // Store the modes to be checked with RD + m_savedNumRdModes[lfnstIdx] = numModesForFullRD; + std::copy_n(uiRdModeList.begin(), numModesForFullRD, m_savedRdModeList[lfnstIdx]); } } - } - - if( nOptionsForISP > 1 ) // we remove the non-MPMs from the ISP lists - { - static_vector< uint32_t, FAST_UDI_MAX_RDMODE_NUM > uiRdModeListCopyHor = m_rdModeListWithoutMrlHor; - m_rdModeListWithoutMrlHor.clear(); - static_vector< uint32_t, FAST_UDI_MAX_RDMODE_NUM > uiRdModeListCopyVer = m_rdModeListWithoutMrlVer; - m_rdModeListWithoutMrlVer.clear(); - static_vector< uint32_t, FAST_UDI_MAX_RDMODE_NUM > *listPointerCopy, *listPointer; - for( int ispOptionIdx = 1; ispOptionIdx < nOptionsForISP; ispOptionIdx++ ) + else // mtsUsage = 2 (here we potentially reduce the number of modes that will be full-RD checked) { - cu.ispMode = ispOptions[ispOptionIdx]; - //we get the mpm cand list - const int numMPMs = NUM_MOST_PROBABLE_MODES; - unsigned uiPreds[numMPMs]; - - pu.multiRefIdx = 0; + if ((m_pcEncCfg->getUseFastLFNST() || !cu.slice->isIntra()) && m_bestModeCostValid[lfnstIdx]) + { + numModesForFullRD = 0; - PU::getIntraMPMs( pu, uiPreds ); + double thresholdSkipMode = 1.0 + ((cu.lfnstIdx > 0) ? 0.1 : 1.0) * (1.4 / sqrt((double) (width * height))); - //we copy only the ISP MPMs - listPointerCopy = &( cu.ispMode == HOR_INTRA_SUBPARTITIONS ? uiRdModeListCopyHor : uiRdModeListCopyVer ); - listPointer = &( cu.ispMode == HOR_INTRA_SUBPARTITIONS ? m_rdModeListWithoutMrlHor : m_rdModeListWithoutMrlVer ); - for( int k = 0; k < listPointerCopy->size(); k++ ) - { - for( int q = 0; q < numMPMs; q++ ) + // Skip checking the modes with much larger R-D cost than the best mode + for (int i = 0; i < m_savedNumRdModes[lfnstIdx]; i++) { - if( listPointerCopy->at( k ) == uiPreds[q] ) + if (m_modeCostStore[lfnstIdx][i] <= thresholdSkipMode * m_bestModeCostStore[lfnstIdx]) { - listPointer->push_back( listPointerCopy->at( k ) ); - break; + uiRdModeList.push_back(m_savedRdModeList[lfnstIdx][i]); + numModesForFullRD++; } } } + else // this is necessary because we skip the candidates list calculation, since it was already obtained for + // the DCT-II. Now we load it + { + // Restore the modes to be checked with RD + numModesForFullRD = m_savedNumRdModes[lfnstIdx]; + uiRdModeList.resize(numModesForFullRD); + std::copy_n(m_savedRdModeList[lfnstIdx], m_savedNumRdModes[lfnstIdx], uiRdModeList.begin()); + CandCostList.resize(numModesForFullRD); + } } - cu.ispMode = NOT_INTRA_SUBPARTITIONS; - } + CHECK(numModesForFullRD != uiRdModeList.size(), "Inconsistent state!"); - CHECK( numModesForFullRD != uiRdModeList.size(), "Inconsistent state!" ); + // after this point, don't use numModesForFullRD - // after this point, don't use numModesForFullRD - - // PBINTRA fast - if( m_pcEncCfg->getUsePbIntraFast() && !cs.slice->isIntra() && uiRdModeList.size() < numModesAvailable ) - { - if( CandHadList.size() < 3 || CandHadList[2] > cs.interHad * PBINTRA_RATIO ) + // PBINTRA fast + if (m_pcEncCfg->getUsePbIntraFast() && !cs.slice->isIntra() && uiRdModeList.size() < numModesAvailable + && !cs.slice->getDisableSATDForRD() && (mtsUsageFlag != 2 || lfnstIdx > 0)) { - uiRdModeList.resize( std::min<size_t>( uiRdModeList.size(), 2 ) ); - extendRefList.resize( std::min<size_t>( extendRefList.size(), 2 ) ); - if( nOptionsForISP > 1 ) + double pbintraRatio = (lfnstIdx > 0) ? 1.25 : PBINTRA_RATIO; + int maxSize = -1; + ModeInfo bestMipMode; + int bestMipIdx = -1; + for (int idx = 0; idx < uiRdModeList.size(); idx++) { - m_rdModeListWithoutMrlHor.resize( std::min<size_t>( m_rdModeListWithoutMrlHor.size(), 2 ) ); - m_rdModeListWithoutMrlVer.resize( std::min<size_t>( m_rdModeListWithoutMrlVer.size(), 2 ) ); + if (uiRdModeList[idx].mipFlg) + { + bestMipMode = uiRdModeList[idx]; + bestMipIdx = idx; + break; + } } - } - if( CandHadList.size() < 2 || CandHadList[1] > cs.interHad * PBINTRA_RATIO ) - { - uiRdModeList.resize( std::min<size_t>( uiRdModeList.size(), 1 ) ); - extendRefList.resize( std::min<size_t>( extendRefList.size(), 1 ) ); - if( nOptionsForISP > 1 ) + const int numHadCand = 3; + for (int k = numHadCand - 1; k >= 0; k--) { - m_rdModeListWithoutMrlHor.resize( std::min<size_t>( m_rdModeListWithoutMrlHor.size(), 1 ) ); - m_rdModeListWithoutMrlVer.resize( std::min<size_t>( m_rdModeListWithoutMrlVer.size(), 1 ) ); + if (CandHadList.size() < (k + 1) || CandHadList[k] > cs.interHad * pbintraRatio) + { + maxSize = k; + } } - } - if( CandHadList.size() < 1 || CandHadList[0] > cs.interHad * PBINTRA_RATIO ) - { - cs.dist = std::numeric_limits<Distortion>::max(); - cs.interHad = 0; + if (maxSize > 0) + { + uiRdModeList.resize(std::min<size_t>(uiRdModeList.size(), maxSize)); + if (bestMipIdx >= 0) + { + if (uiRdModeList.size() <= bestMipIdx) + { + uiRdModeList.push_back(bestMipMode); + } + } + if (saveDataForISP) + { + m_ispCandListHor.resize(std::min<size_t>(m_ispCandListHor.size(), maxSize)); + } + } + if (maxSize == 0) + { + cs.dist = std::numeric_limits<Distortion>::max(); + cs.interHad = 0; - //===== reset context models ===== - m_CABACEstimator->getCtx() = SubCtx(Ctx::IntraLumaMpmFlag, ctxStartIntraMode); - m_CABACEstimator->getCtx() = SubCtx( Ctx::MHIntraPredMode, ctxStartMHIntraMode ); - m_CABACEstimator->getCtx() = SubCtx( Ctx::MultiRefLineIdx, ctxStartMrlIdx ); + //===== reset context models ===== + m_CABACEstimator->getCtx() = SubCtx(Ctx::MipFlag, ctxStartMipFlag); + m_CABACEstimator->getCtx() = SubCtx(Ctx::ISPMode, ctxStartIspMode); + m_CABACEstimator->getCtx() = SubCtx(Ctx::IntraLumaPlanarFlag, ctxStartPlanarFlag); + m_CABACEstimator->getCtx() = SubCtx(Ctx::IntraLumaMpmFlag, ctxStartIntraMode); + m_CABACEstimator->getCtx() = SubCtx(Ctx::MultiRefLineIdx, ctxStartMrlIdx); - return; + return false; + } } } - if ( nOptionsForISP > 1 ) - { - //we create a single full RD list that includes all intra modes using regular intra, MRL and ISP - auto* firstIspList = ispOptions[1] == HOR_INTRA_SUBPARTITIONS ? &m_rdModeListWithoutMrlHor : &m_rdModeListWithoutMrlVer; - auto* secondIspList = ispOptions[1] == HOR_INTRA_SUBPARTITIONS ? &m_rdModeListWithoutMrlVer : &m_rdModeListWithoutMrlHor; - - if ( m_pcEncCfg->getUseFastISP() ) - { - // find the first non-MRL mode - size_t indexFirstMode = std::find( extendRefList.begin(), extendRefList.end(), 0 ) - extendRefList.begin(); - // if not found, just take the last mode - if( indexFirstMode >= extendRefList.size() ) indexFirstMode = extendRefList.size() - 1; - // move the mode indicated by indexFirstMode to the beginning - for( int idx = ((int)indexFirstMode) - 1; idx >= 0; idx-- ) - { - std::swap( extendRefList[idx], extendRefList[idx + 1] ); - std::swap( uiRdModeList [idx], uiRdModeList [idx + 1] ); - } - //insert all ISP modes after the first non-mrl mode - uiRdModeList.insert( uiRdModeList.begin() + 1, secondIspList->begin(), secondIspList->end() ); - uiRdModeList.insert( uiRdModeList.begin() + 1, firstIspList->begin() , firstIspList->end() ); + int numNonISPModes = (int)uiRdModeList.size(); - extendRefList.insert( extendRefList.begin() + 1, secondIspList->size(), MRL_NUM_REF_LINES + ispOptions[2] ); - extendRefList.insert( extendRefList.begin() + 1, firstIspList->size() , MRL_NUM_REF_LINES + ispOptions[1] ); - } - else + if ( testISP ) + { + // we reserve positions for ISP in the common full RD list + const int maxNumRDModesISP = sps.getUseLFNST() ? 16 * NUM_LFNST_NUM_PER_SET : 16; + m_curIspLfnstIdx = 0; + for (int i = 0; i < maxNumRDModesISP; i++) { - //insert all ISP modes at the end of the current list - uiRdModeList.insert( uiRdModeList.end(), secondIspList->begin(), secondIspList->end() ); - uiRdModeList.insert( uiRdModeList.end(), firstIspList->begin() , firstIspList->end() ); - - extendRefList.insert( extendRefList.end(), secondIspList->size(), MRL_NUM_REF_LINES + ispOptions[2] ); - extendRefList.insert( extendRefList.end(), firstIspList->size() , MRL_NUM_REF_LINES + ispOptions[1] ); + uiRdModeList.push_back( ModeInfo( false, false, 0, INTRA_SUBPARTITIONS_RESERVED, 0 ) ); } } - CHECKD(uiRdModeList.size() != extendRefList.size(),"uiRdModeList and extendRefList do not have the same size!"); //===== check modes (using r-d costs) ===== - uint32_t uiBestPUMode = 0; - int bestExtendRef = 0; + ModeInfo uiBestPUMode; + int bestBDPCMMode = 0; + double bestCostNonBDPCM = MAX_DOUBLE; CodingStructure *csTemp = m_pTempCS[gp_sizeIdxInfo->idxFrom( cu.lwidth() )][gp_sizeIdxInfo->idxFrom( cu.lheight() )]; CodingStructure *csBest = m_pBestCS[gp_sizeIdxInfo->idxFrom( cu.lwidth() )][gp_sizeIdxInfo->idxFrom( cu.lheight() )]; @@ -758,49 +987,72 @@ void IntraSearch::estIntraPredLumaQT( CodingUnit &cu, Partitioner &partitioner, csBest->slice = cs.slice; csTemp->initStructData(); csBest->initStructData(); + csTemp->picture = cs.picture; + csBest->picture = cs.picture; // just to be sure numModesForFullRD = ( int ) uiRdModeList.size(); - PartSplit intraSubPartitionsProcOrder = TU_NO_ISP; - int bestNormalIntraModeIndex = -1; - uint8_t bestIspOption = NOT_INTRA_SUBPARTITIONS; TUIntraSubPartitioner subTuPartitioner( partitioner ); - bool ispHorAllZeroCbfs = false, ispVerAllZeroCbfs = false; - - for (uint32_t uiMode = 0; uiMode < numModesForFullRD; uiMode++) + if ( testISP ) { - // set luma prediction mode - uint32_t uiOrgMode = uiRdModeList[uiMode]; + m_modeCtrl->setIspCost( MAX_DOUBLE ); + m_modeCtrl->setMtsFirstPassNoIspCost( MAX_DOUBLE ); + } + int bestLfnstIdx = cu.lfnstIdx; - cu.ispMode = extendRefList[uiMode] > MRL_NUM_REF_LINES ? extendRefList[uiMode] - MRL_NUM_REF_LINES : NOT_INTRA_SUBPARTITIONS; - pu.intraDir[0] = uiOrgMode; + for (int mode = isSecondColorSpace ? 0 : -2 * int(testBDPCM); mode < (int)uiRdModeList.size(); mode++) + { + // set CU/PU to luma prediction mode + ModeInfo uiOrgMode; + if (sps.getUseColorTrans() && !m_pcEncCfg->getRGBFormatFlag() && isSecondColorSpace && mode) + { + continue; + } - int multiRefIdx = 0; - pu.multiRefIdx = multiRefIdx; - if( cu.ispMode ) + if (mode < 0 || (isSecondColorSpace && m_savedBDPCMModeFirstColorSpace[m_savedRdModeIdx][mode])) + { + cu.bdpcmMode = mode < 0 ? -mode : m_savedBDPCMModeFirstColorSpace[m_savedRdModeIdx][mode]; + uiOrgMode = ModeInfo( false, false, 0, NOT_INTRA_SUBPARTITIONS, cu.bdpcmMode == 2 ? VER_IDX : HOR_IDX ); + } + else + { + cu.bdpcmMode = 0; + uiOrgMode = uiRdModeList[mode]; + } + if (!cu.bdpcmMode && uiRdModeList[mode].ispMod == INTRA_SUBPARTITIONS_RESERVED) + { + if (mode == numNonISPModes) // the list needs to be sorted only once { - intraSubPartitionsProcOrder = CU::getISPType( cu, COMPONENT_Y ); - bool tuIsDividedInRows = CU::divideTuInRows( cu ); - if ( ( tuIsDividedInRows && ispHorAllZeroCbfs ) || ( !tuIsDividedInRows && ispVerAllZeroCbfs ) ) - { - continue; - } - if( m_intraModeDiagRatio.at( bestNormalIntraModeIndex ) > 1.25 ) + if (m_pcEncCfg->getUseFastISP()) { - continue; + m_modeCtrl->setBestPredModeDCT2(uiBestPUMode.modeId); } - if( ( m_intraModeHorVerRatio.at( bestNormalIntraModeIndex ) > 1.25 && tuIsDividedInRows ) || ( m_intraModeHorVerRatio.at( bestNormalIntraModeIndex ) < 0.8 && !tuIsDividedInRows ) ) + if (!xSortISPCandList(bestCurrentCost, csBest->cost, uiBestPUMode)) { - continue; + break; } } - else + xGetNextISPMode(uiRdModeList[mode], (mode > 0 ? &uiRdModeList[mode - 1] : nullptr), Size(width, height)); + if (uiRdModeList[mode].ispMod == INTRA_SUBPARTITIONS_RESERVED) { - multiRefIdx = extendRefList[uiMode]; - pu.multiRefIdx = multiRefIdx; - CHECK( pu.multiRefIdx && ( pu.intraDir[0] == DC_IDX || pu.intraDir[0] == PLANAR_IDX ), "ERL" ); + continue; } + cu.lfnstIdx = m_curIspLfnstIdx; + uiOrgMode = uiRdModeList[mode]; + } + cu.mipFlag = uiOrgMode.mipFlg; + pu.mipTransposedFlag = uiOrgMode.mipTrFlg; + cu.ispMode = uiOrgMode.ispMod; + pu.multiRefIdx = uiOrgMode.mRefId; + pu.intraDir[CHANNEL_TYPE_LUMA] = uiOrgMode.modeId; + + CHECK(cu.mipFlag && pu.multiRefIdx, "Error: combination of MIP and MRL not supported"); + CHECK(pu.multiRefIdx && (pu.intraDir[0] == PLANAR_IDX), "Error: combination of MRL and Planar mode not supported"); + CHECK(cu.ispMode && cu.mipFlag, "Error: combination of ISP and MIP not supported"); + CHECK(cu.ispMode && pu.multiRefIdx, "Error: combination of ISP and MRL not supported"); + CHECK(cu.ispMode&& cu.colorTransform, "Error: combination of ISP and ACT not supported"); + pu.intraDir[CHANNEL_TYPE_CHROMA] = cu.colorTransform ? DM_CHROMA_IDX : pu.intraDir[CHANNEL_TYPE_CHROMA]; // set context models m_CABACEstimator->getCtx() = ctxStart; @@ -808,64 +1060,166 @@ void IntraSearch::estIntraPredLumaQT( CodingUnit &cu, Partitioner &partitioner, // determine residual for partition cs.initSubStructure( *csTemp, partitioner.chType, cs.area, true ); + bool tmpValidReturn = false; if( cu.ispMode ) { - xRecurIntraCodingLumaQT( *csTemp, subTuPartitioner, bestCurrentCost, 0, intraSubPartitionsProcOrder ); + if ( m_pcEncCfg->getUseFastISP() ) + { + m_modeCtrl->setISPWasTested(true); + } + tmpValidReturn = xIntraCodingLumaISP(*csTemp, subTuPartitioner, bestCurrentCost); + if (csTemp->tus.size() == 0) + { + // no TUs were coded + csTemp->cost = MAX_DOUBLE; + continue; + } + // we save the data for future tests + m_ispTestedModes[m_curIspLfnstIdx].setModeResults((ISPType)cu.ispMode, (int)uiOrgMode.modeId, (int)csTemp->tus.size(), csTemp->cus[0]->firstTU->cbf[COMPONENT_Y] ? csTemp->cost : MAX_DOUBLE, csBest->cost); + csTemp->cost = !tmpValidReturn ? MAX_DOUBLE : csTemp->cost; } else { - xRecurIntraCodingLumaQT( *csTemp, partitioner, bestIspOption ? bestCurrentCost : MAX_DOUBLE, -1, TU_NO_ISP, bestIspOption ); - } - - if( cu.ispMode && !csTemp->cus[0]->firstTU->cbf[COMPONENT_Y] ) - { - if ( cu.ispMode == HOR_INTRA_SUBPARTITIONS ) + if (cu.colorTransform) { - ispHorAllZeroCbfs |= ( m_pcEncCfg->getUseFastISP() && csTemp->tus[0]->lheight() > 2 && csTemp->cost >= bestCurrentCost ); + tmpValidReturn = xRecurIntraCodingACTQT(*csTemp, partitioner, mtsCheckRangeFlag, mtsFirstCheckId, mtsLastCheckId, moreProbMTSIdxFirst); } else { - ispVerAllZeroCbfs |= ( m_pcEncCfg->getUseFastISP() && csTemp->tus[0]->lwidth() > 2 && csTemp->cost >= bestCurrentCost ); + tmpValidReturn = xRecurIntraCodingLumaQT( + *csTemp, partitioner, uiBestPUMode.ispMod ? bestCurrentCost : MAX_DOUBLE, -1, TU_NO_ISP, + uiBestPUMode.ispMod, mtsCheckRangeFlag, mtsFirstCheckId, mtsLastCheckId, moreProbMTSIdxFirst); } + } + + if (!cu.ispMode && !cu.mtsFlag && !cu.lfnstIdx && !cu.bdpcmMode && !pu.multiRefIdx && !cu.mipFlag && testISP) + { + m_regIntraRDListWithCosts.push_back( ModeInfoWithCost( cu.mipFlag, pu.mipTransposedFlag, pu.multiRefIdx, cu.ispMode, uiOrgMode.modeId, csTemp->cost ) ); + } + + if( cu.ispMode && !csTemp->cus[0]->firstTU->cbf[COMPONENT_Y] ) + { csTemp->cost = MAX_DOUBLE; csTemp->costDbOffset = 0; + tmpValidReturn = false; } + validReturn |= tmpValidReturn; + if( sps.getUseLFNST() && mtsUsageFlag == 1 && !cu.ispMode && mode >= 0 ) + { + m_modeCostStore[lfnstIdx][mode] = tmpValidReturn ? csTemp->cost : (MAX_DOUBLE / 2.0); //(MAX_DOUBLE / 2.0) ?? + } + DTRACE(g_trace_ctx, D_INTRA_COST, "IntraCost T [x=%d,y=%d,w=%d,h=%d] %f (%d,%d,%d,%d,%d,%d) \n", cu.blocks[0].x, + cu.blocks[0].y, (int) width, (int) height, csTemp->cost, uiOrgMode.modeId, uiOrgMode.ispMod, + pu.multiRefIdx, cu.mipFlag, cu.lfnstIdx, cu.mtsFlag); - DTRACE( g_trace_ctx, D_INTRA_COST, "IntraCost T %f (%d) \n", csTemp->cost, uiOrgMode ); - - // check r-d cost - if( csTemp->cost < csBest->cost ) + if( tmpValidReturn ) { - std::swap( csTemp, csBest ); - - uiBestPUMode = uiOrgMode; - bestExtendRef = multiRefIdx; - bestIspOption = cu.ispMode; - if( csBest->cost < bestCurrentCost ) + if (isFirstColorSpace) + { + if (m_pcEncCfg->getRGBFormatFlag() || !cu.ispMode) + { + sortRdModeListFirstColorSpace(uiOrgMode, csTemp->cost, cu.bdpcmMode, m_savedRdModeFirstColorSpace[m_savedRdModeIdx], m_savedRdCostFirstColorSpace[m_savedRdModeIdx], m_savedBDPCMModeFirstColorSpace[m_savedRdModeIdx], m_numSavedRdModeFirstColorSpace[m_savedRdModeIdx]); + } + } + // check r-d cost + if( csTemp->cost < csBest->cost ) { - bestCurrentCost = csBest->cost; + std::swap( csTemp, csBest ); + + uiBestPUMode = uiOrgMode; + bestBDPCMMode = cu.bdpcmMode; + if( sps.getUseLFNST() && mtsUsageFlag == 1 && !cu.ispMode ) + { + m_bestModeCostStore[ lfnstIdx ] = csBest->cost; //cs.cost; + m_bestModeCostValid[ lfnstIdx ] = true; + } + if( csBest->cost < bestCurrentCost ) + { + bestCurrentCost = csBest->cost; + } + if ( cu.ispMode ) + { + m_modeCtrl->setIspCost(csBest->cost); + bestLfnstIdx = cu.lfnstIdx; + } + else if ( testISP ) + { + m_modeCtrl->setMtsFirstPassNoIspCost(csBest->cost); + } } - if( !cu.ispMode ) + if( !cu.ispMode && !cu.bdpcmMode && csBest->cost < bestCostNonBDPCM ) { - bestNormalIntraModeIndex = uiMode; + bestCostNonBDPCM = csBest->cost; } } csTemp->releaseIntermediateData(); + if( m_pcEncCfg->getFastLocalDualTreeMode() ) + { + if( cu.isConsIntra() && !cu.slice->isIntra() && csBest->cost != MAX_DOUBLE && costInterCU != COST_UNKNOWN && mode >= 0 ) + { + if( m_pcEncCfg->getFastLocalDualTreeMode() == 2 ) + { + //Note: only try one intra mode, which is especially useful to reduce EncT for LDB case (around 4%) + break; + } + else + { + if( csBest->cost > costInterCU * 1.5 ) + { + break; + } + } + } + } + if (sps.getUseColorTrans() && !CS::isDualITree(cs)) + { + if ((m_pcEncCfg->getRGBFormatFlag() && !cu.colorTransform) && csBest->cost != MAX_DOUBLE && bestCS->cost != MAX_DOUBLE && mode >= 0) + { + if (csBest->cost > bestCS->cost) + { + break; + } + } + } } // Mode loop - cu.ispMode = bestIspOption; + cu.ispMode = uiBestPUMode.ispMod; + cu.lfnstIdx = bestLfnstIdx; - cs.useSubStructure(*csBest, partitioner.chType, pu.singleChan(CHANNEL_TYPE_LUMA), true, true, keepResi, keepResi); + if( validReturn ) + { + if (cu.colorTransform) + { + cs.useSubStructure(*csBest, partitioner.chType, pu, true, true, keepResi, keepResi); + } + else + { + cs.useSubStructure(*csBest, partitioner.chType, pu.singleChan(CHANNEL_TYPE_LUMA), true, true, keepResi, + keepResi); + } + } csBest->releaseIntermediateData(); - //=== update PU data ==== - pu.intraDir[0] = uiBestPUMode; - pu.multiRefIdx = bestExtendRef; + if( validReturn ) + { + //=== update PU data ==== + cu.mipFlag = uiBestPUMode.mipFlg; + pu.mipTransposedFlag = uiBestPUMode.mipTrFlg; + pu.multiRefIdx = uiBestPUMode.mRefId; + pu.intraDir[ CHANNEL_TYPE_LUMA ] = uiBestPUMode.modeId; + cu.bdpcmMode = bestBDPCMMode; + if (cu.colorTransform) + { + CHECK(pu.intraDir[CHANNEL_TYPE_CHROMA] != DM_CHROMA_IDX, "chroma should use DM mode for adaptive color transform"); + } + } } //===== reset context models ===== m_CABACEstimator->getCtx() = ctxStart; + + return validReturn; } void IntraSearch::estIntraPredChromaQT( CodingUnit &cu, Partitioner &partitioner, const double maxCostAllowed ) @@ -878,7 +1232,7 @@ void IntraSearch::estIntraPredChromaQT( CodingUnit &cu, Partitioner &partitioner cs.setDecomp( cs.area.Cb(), false ); double bestCostSoFar = maxCostAllowed; - bool lumaUsesISP = !CS::isDualITree( *cu.cs ) && cu.ispMode; + bool lumaUsesISP = !cu.isSepTree() && cu.ispMode; PartSplit ispType = lumaUsesISP ? CU::getISPType( cu, COMPONENT_Y ) : TU_NO_ISP; CHECK( cu.ispMode && bestCostSoFar < 0, "bestCostSoFar must be positive!" ); @@ -888,12 +1242,12 @@ void IntraSearch::estIntraPredChromaQT( CodingUnit &cu, Partitioner &partitioner uint32_t uiBestMode = 0; Distortion uiBestDist = 0; double dBestCost = MAX_DOUBLE; + int32_t bestBDPCMMode = 0; //----- init mode list ---- { - uint32_t uiMinMode = 0; - uint32_t uiMaxMode = NUM_CHROMA_MODE; - + int32_t uiMinMode = 0; + int32_t uiMaxMode = NUM_CHROMA_MODE; //----- check chroma modes ----- uint32_t chromaCandModes[ NUM_CHROMA_MODE ]; PU::getIntraChromaCandModes( pu, chromaCandModes ); @@ -905,13 +1259,13 @@ void IntraSearch::estIntraPredChromaQT( CodingUnit &cu, Partitioner &partitioner saveCS.area.repositionTo( cs.area ); saveCS.clearTUs(); - if( !CS::isDualITree( cs ) && cu.ispMode ) + if( !cu.isSepTree() && cu.ispMode ) { saveCS.clearCUs(); saveCS.clearPUs(); } - if( CS::isDualITree( cs ) ) + if( cu.isSepTree() ) { if( partitioner.canSplit( TU_MAX_TR_SPLIT, cs ) ) { @@ -966,9 +1320,8 @@ void IntraSearch::estIntraPredChromaQT( CodingUnit &cu, Partitioner &partitioner { modeIsEnable[i] = 1; } - - DistParam distParam; - const bool useHadamard = true; + DistParam distParamSad; + DistParam distParamSatd; pu.intraDir[1] = MDLM_L_IDX; // temporary assigned, just to indicate this is a MDLM mode. for luma down-sampling operation. initIntraPatternChType(cu, pu.Cb()); @@ -990,42 +1343,50 @@ void IntraSearch::estIntraPredChromaQT( CodingUnit &cu, Partitioner &partitioner pu.intraDir[1] = mode; // temporary assigned, for SATD checking. int64_t sad = 0; + int64_t sadCb = 0; + int64_t satdCb = 0; + int64_t sadCr = 0; + int64_t satdCr = 0; CodingStructure& cs = *(pu.cs); CompArea areaCb = pu.Cb(); PelBuf orgCb = cs.getOrgBuf(areaCb); PelBuf predCb = cs.getPredBuf(areaCb); - - m_pcRdCost->setDistParam(distParam, orgCb, predCb, pu.cs->sps->getBitDepth(CHANNEL_TYPE_CHROMA), COMPONENT_Cb, useHadamard); - distParam.applyWeight = false; - + m_pcRdCost->setDistParam(distParamSad, orgCb, predCb, pu.cs->sps->getBitDepth(CHANNEL_TYPE_CHROMA), COMPONENT_Cb, false); + m_pcRdCost->setDistParam(distParamSatd, orgCb, predCb, pu.cs->sps->getBitDepth(CHANNEL_TYPE_CHROMA), COMPONENT_Cb, true); + distParamSad.applyWeight = false; + distParamSatd.applyWeight = false; if (PU::isLMCMode(mode)) { predIntraChromaLM(COMPONENT_Cb, predCb, pu, areaCb, mode); } else { - predIntraAng(COMPONENT_Cb, predCb, pu, false); + initPredIntraParams(pu, pu.Cb(), *pu.cs->sps); + predIntraAng(COMPONENT_Cb, predCb, pu); } - - sad += distParam.distFunc(distParam); - + sadCb = distParamSad.distFunc(distParamSad) * 2; + satdCb = distParamSatd.distFunc(distParamSatd); + sad += std::min(sadCb, satdCb); CompArea areaCr = pu.Cr(); PelBuf orgCr = cs.getOrgBuf(areaCr); PelBuf predCr = cs.getPredBuf(areaCr); - - m_pcRdCost->setDistParam(distParam, orgCr, predCr, pu.cs->sps->getBitDepth(CHANNEL_TYPE_CHROMA), COMPONENT_Cr, useHadamard); - distParam.applyWeight = false; - + m_pcRdCost->setDistParam(distParamSad, orgCr, predCr, pu.cs->sps->getBitDepth(CHANNEL_TYPE_CHROMA), COMPONENT_Cr, false); + m_pcRdCost->setDistParam(distParamSatd, orgCr, predCr, pu.cs->sps->getBitDepth(CHANNEL_TYPE_CHROMA), COMPONENT_Cr, true); + distParamSad.applyWeight = false; + distParamSatd.applyWeight = false; if (PU::isLMCMode(mode)) { predIntraChromaLM(COMPONENT_Cr, predCr, pu, areaCr, mode); } else { - predIntraAng(COMPONENT_Cr, predCr, pu, false); + initPredIntraParams(pu, pu.Cr(), *pu.cs->sps); + predIntraAng(COMPONENT_Cr, predCr, pu); } - sad += distParam.distFunc(distParam); + sadCr = distParamSad.distFunc(distParamSad) * 2; + satdCr = distParamSatd.distFunc(distParamSatd); + sad += std::min(sadCr, satdCr); satdSortedCost[idx] = sad; } // sort the mode based on the cost from small to large. @@ -1056,10 +1417,20 @@ void IntraSearch::estIntraPredChromaQT( CodingUnit &cu, Partitioner &partitioner // save the dist Distortion baseDist = cs.dist; - - for (uint32_t uiMode = uiMinMode; uiMode < uiMaxMode; uiMode++) + bool testBDPCM = true; + testBDPCM = testBDPCM && CU::bdpcmAllowed(cu, COMPONENT_Cb) && cu.ispMode == 0 && cu.mtsFlag == 0 && cu.lfnstIdx == 0; + for (int32_t uiMode = uiMinMode - (2 * int(testBDPCM)); uiMode < uiMaxMode; uiMode++) { - const int chromaIntraMode = chromaCandModes[uiMode]; + int chromaIntraMode = chromaCandModes[uiMode]; + + if (uiMode < 0) + { + cu.bdpcmModeChroma = -uiMode; + chromaIntraMode = chromaCandModes[0]; + } + else + { + cu.bdpcmModeChroma = 0; if( PU::isLMCMode( chromaIntraMode ) && ! PU::isLMCModeEnabled( pu, chromaIntraMode ) ) { continue; @@ -1068,6 +1439,7 @@ void IntraSearch::estIntraPredChromaQT( CodingUnit &cu, Partitioner &partitioner { continue; } + } cs.setDecomp( pu.Cb(), false ); cs.dist = baseDist; //----- restore context models ----- @@ -1082,7 +1454,7 @@ void IntraSearch::estIntraPredChromaQT( CodingUnit &cu, Partitioner &partitioner continue; } - if (cs.pps->getUseTransformSkip()) + if (cs.sps->getTransformSkipEnabledFlag()) { m_CABACEstimator->getCtx() = ctxStart; } @@ -1120,6 +1492,7 @@ void IntraSearch::estIntraPredChromaQT( CodingUnit &cu, Partitioner &partitioner dBestCost = dCost; uiBestDist = uiDist; uiBestMode = chromaIntraMode; + bestBDPCMMode = cu.bdpcmModeChroma; } } @@ -1146,6 +1519,7 @@ void IntraSearch::estIntraPredChromaQT( CodingUnit &cu, Partitioner &partitioner pu.intraDir[1] = uiBestMode; cs.dist = uiBestDist; + cu.bdpcmModeChroma = bestBDPCMMode; } //----- restore context models ----- @@ -1156,1184 +1530,4127 @@ void IntraSearch::estIntraPredChromaQT( CodingUnit &cu, Partitioner &partitioner } } -void IntraSearch::IPCMSearch(CodingStructure &cs, Partitioner& partitioner) + +void IntraSearch::saveCuAreaCostInSCIPU( Area area, double cost ) { - ComponentID compStr = (CS::isDualITree(cs) && !isLuma(partitioner.chType)) ? COMPONENT_Cb: COMPONENT_Y; - ComponentID compEnd = (CS::isDualITree(cs) && isLuma(partitioner.chType)) ? COMPONENT_Y : COMPONENT_Cr; - for( ComponentID compID = compStr; compID <= compEnd; compID = ComponentID(compID+1) ) + if( m_numCuInSCIPU < NUM_INTER_CU_INFO_SAVE ) { - - xEncPCM(cs, partitioner, compID); + m_cuAreaInSCIPU[m_numCuInSCIPU] = area; + m_cuCostInSCIPU[m_numCuInSCIPU] = cost; + m_numCuInSCIPU++; } - - cs.getPredBuf().fill(0); - cs.getResiBuf().fill(0); - cs.getOrgResiBuf().fill(0); - - cs.dist = 0; - cs.fracBits = 0; - cs.cost = 0; - - cs.setDecomp(cs.area); - cs.picture->getPredBuf(cs.area).copyFrom(cs.getPredBuf()); } -void IntraSearch::xEncPCM(CodingStructure &cs, Partitioner& partitioner, const ComponentID &compID) +void IntraSearch::initCuAreaCostInSCIPU() { - TransformUnit &tu = *cs.getTU( partitioner.chType ); - - const int channelBitDepth = cs.sps->getBitDepth(toChannelType(compID)); - const uint32_t uiPCMBitDepth = cs.sps->getPCMBitDepth(toChannelType(compID)); - - const int pcmShiftRight = (channelBitDepth - int(uiPCMBitDepth)); - - CompArea area = tu.blocks[compID]; - PelBuf pcmBuf = tu.getPcmbuf (compID); - PelBuf recBuf = cs.getRecoBuf ( area ); - CPelBuf orgBuf = cs.getOrgBuf ( area ); - - CHECK(pcmShiftRight < 0, "Negative shift"); - CompArea tmpArea(COMPONENT_Y, area.chromaFormat, Position(0, 0), area.size()); - PelBuf tempOrgBuf = m_tmpStorageLCU.getBuf(tmpArea); - tempOrgBuf.copyFrom(orgBuf); - if (cs.slice->getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag() && compID == COMPONENT_Y) + for( int i = 0; i < NUM_INTER_CU_INFO_SAVE; i++ ) { - tempOrgBuf.rspSignal(m_pcReshape->getFwdLUT()); - } - for (uint32_t uiY = 0; uiY < pcmBuf.height; uiY++) - { - for (uint32_t uiX = 0; uiX < pcmBuf.width; uiX++) - { - // Encode - pcmBuf.at(uiX, uiY) = tempOrgBuf.at(uiX, uiY) >> pcmShiftRight; - // Reconstruction - recBuf.at(uiX, uiY) = pcmBuf.at(uiX, uiY) << pcmShiftRight; - } + m_cuAreaInSCIPU[i] = Area(); + m_cuCostInSCIPU[i] = 0; } + m_numCuInSCIPU = 0; } - -// ------------------------------------------------------------------------------------------------------------------- -// Intra search -// ------------------------------------------------------------------------------------------------------------------- - -void IntraSearch::xEncIntraHeader( CodingStructure &cs, Partitioner &partitioner, const bool &bLuma, const bool &bChroma, const int subTuIdx ) +void IntraSearch::PLTSearch(CodingStructure &cs, Partitioner& partitioner, ComponentID compBegin, uint32_t numComp) { - CodingUnit &cu = *cs.getCU( partitioner.chType ); + CodingUnit &cu = *cs.getCU(partitioner.chType); + TransformUnit &tu = *cs.getTU(partitioner.chType); + uint32_t height = cu.block(compBegin).height; + uint32_t width = cu.block(compBegin).width; - if (bLuma) + if (m_pcEncCfg->getLmcs() && (cs.picHeader->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag())) { - bool isFirst = cu.ispMode ? subTuIdx == 0 : partitioner.currArea().lumaPos() == cs.area.lumaPos(); - - // CU header - if( isFirst ) + cs.getPredBuf().copyFrom(cs.getOrgBuf()); + cs.getPredBuf().Y().rspSignal(m_pcReshape->getFwdLUT()); + } + cu.lastPLTSize[compBegin] = cs.prevPLT.curPLTSize[compBegin]; + //derive palette + derivePLTLossy(cs, partitioner, compBegin, numComp); + reorderPLT(cs, partitioner, compBegin, numComp); + + preCalcPLTIndexRD(cs, partitioner, compBegin, numComp); // Pre-calculate distortions for each pixel + double rdCost = MAX_DOUBLE; + deriveIndexMap(cs, partitioner, compBegin, numComp, PLT_SCAN_HORTRAV, rdCost); // Optimize palette index map (horizontal scan) + if ((cu.curPLTSize[compBegin] + cu.useEscape[compBegin]) > 1) + { + deriveIndexMap(cs, partitioner, compBegin, numComp, PLT_SCAN_VERTRAV, rdCost); // Optimize palette index map (vertical scan) + } + cu.useRotation[compBegin] = m_bestScanRotationMode; + int indexMaxSize = cu.useEscape[compBegin] ? (cu.curPLTSize[compBegin] + 1) : cu.curPLTSize[compBegin]; + if (indexMaxSize <= 1) + { + cu.useRotation[compBegin] = false; + } + //reconstruct pixel + PelBuf curPLTIdx = tu.getcurPLTIdx(compBegin); + for (uint32_t y = 0; y < height; y++) + { + for (uint32_t x = 0; x < width; x++) { - if ((!cs.slice->isIntra() || cs.slice->getSPS()->getIBCFlag()) - && cu.Y().valid() - ) + if (curPLTIdx.at(x, y) == cu.curPLTSize[compBegin]) { - if( cs.pps->getTransquantBypassEnabledFlag() ) - { - m_CABACEstimator->cu_transquant_bypass_flag( cu ); - } - m_CABACEstimator->cu_skip_flag( cu ); - m_CABACEstimator->pred_mode ( cu ); + calcPixelPred(cs, partitioner, y, x, compBegin, numComp); } - if( CU::isIntra(cu) ) + else { - m_CABACEstimator->pcm_data( cu, partitioner ); - if( cu.ipcm ) + for (uint32_t compID = compBegin; compID < (compBegin + numComp); compID++) { - return; + CompArea area = cu.blocks[compID]; + PelBuf recBuf = cs.getRecoBuf(area); + uint32_t scaleX = getComponentScaleX((ComponentID)COMPONENT_Cb, cs.sps->getChromaFormatIdc()); + uint32_t scaleY = getComponentScaleY((ComponentID)COMPONENT_Cb, cs.sps->getChromaFormatIdc()); + if (compBegin != COMPONENT_Y || compID == COMPONENT_Y) + { + recBuf.at(x, y) = cu.curPLT[compID][curPLTIdx.at(x, y)]; + } + else if (compBegin == COMPONENT_Y && compID != COMPONENT_Y && y % (1 << scaleY) == 0 && x % (1 << scaleX) == 0) + { + recBuf.at(x >> scaleX, y >> scaleY) = cu.curPLT[compID][curPLTIdx.at(x, y)]; + } } } - m_CABACEstimator->extend_ref_line(cu); - m_CABACEstimator->isp_mode ( cu ); - } - - PredictionUnit &pu = *cs.getPU(partitioner.currArea().lumaPos(), partitioner.chType); - - // luma prediction mode - if (isFirst) - { - if ( !cu.Y().valid()) - m_CABACEstimator->pred_mode( cu ); - m_CABACEstimator->intra_luma_pred_mode( pu ); } } - if (bChroma) - { - bool isFirst = partitioner.currArea().Cb().valid() && partitioner.currArea().chromaPos() == cs.area.chromaPos(); - - PredictionUnit &pu = *cs.getPU( partitioner.currArea().chromaPos(), CHANNEL_TYPE_CHROMA ); + cs.getPredBuf().fill(0); + cs.getResiBuf().fill(0); + cs.getOrgResiBuf().fill(0); - if( isFirst ) + cs.fracBits = MAX_UINT; + cs.cost = MAX_DOUBLE; + Distortion distortion = 0; + for (uint32_t comp = compBegin; comp < (compBegin + numComp); comp++) + { + const ComponentID compID = ComponentID(comp); + CPelBuf reco = cs.getRecoBuf(compID); + CPelBuf org = cs.getOrgBuf(compID); +#if WCG_EXT + if (m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled() || ( + m_pcEncCfg->getLmcs() && (cs.picHeader->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag()))) { - m_CABACEstimator->intra_chroma_pred_mode( pu ); + const CPelBuf orgLuma = cs.getOrgBuf(cs.area.blocks[COMPONENT_Y]); + + if (compID == COMPONENT_Y && !(m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled())) + { + const CompArea &areaY = cu.Y(); + CompArea tmpArea1(COMPONENT_Y, areaY.chromaFormat, Position(0, 0), areaY.size()); + PelBuf tmpRecLuma = m_tmpStorageLCU.getBuf(tmpArea1); + tmpRecLuma.copyFrom(reco); + tmpRecLuma.rspSignal(m_pcReshape->getInvLUT()); + distortion += m_pcRdCost->getDistPart(org, tmpRecLuma, cs.sps->getBitDepth(toChannelType(compID)), compID, DF_SSE_WTD, &orgLuma); + } + else + { + distortion += m_pcRdCost->getDistPart(org, reco, cs.sps->getBitDepth(toChannelType(compID)), compID, DF_SSE_WTD, &orgLuma); + } } + else +#endif + distortion += m_pcRdCost->getDistPart(org, reco, cs.sps->getBitDepth(toChannelType(compID)), compID, DF_SSE); } -} -void IntraSearch::xEncSubdivCbfQT( CodingStructure &cs, Partitioner &partitioner, const bool &bLuma, const bool &bChroma, const int subTuIdx, const PartSplit ispType ) + cs.dist += distortion; + const CompArea &area = cu.blocks[compBegin]; + cs.setDecomp(area); + cs.picture->getRecoBuf(area).copyFrom(cs.getRecoBuf(area)); +} +void IntraSearch::calcPixelPredRD(CodingStructure& cs, Partitioner& partitioner, Pel* orgBuf, Pel* paPixelValue, Pel* paRecoValue, ComponentID compBegin, uint32_t numComp) { - const UnitArea &currArea = partitioner.currArea(); - int subTuCounter = subTuIdx; - TransformUnit &currTU = *cs.getTU( currArea.blocks[partitioner.chType], partitioner.chType, subTuCounter ); - CodingUnit &currCU = *currTU.cu; - uint32_t currDepth = partitioner.currTrDepth; - - const bool subdiv = currTU.depth > currDepth; - ComponentID compID = partitioner.chType == CHANNEL_TYPE_LUMA ? COMPONENT_Y : COMPONENT_Cb; - const bool chromaCbfISP = currArea.blocks[COMPONENT_Cb].valid() && currCU.ispMode && !subdiv; - - if( partitioner.canSplit( TU_MAX_TR_SPLIT, cs ) ) - { - CHECK( !subdiv, "TU split implied" ); - } - else + CodingUnit &cu = *cs.getCU(partitioner.chType); + TransformUnit &tu = *cs.getTU(partitioner.chType); + + int qp[3]; + int qpRem[3]; + int qpPer[3]; + int quantiserScale[3]; + int quantiserRightShift[3]; + int rightShiftOffset[3]; + int invquantiserRightShift[3]; + int add[3]; + for (uint32_t ch = compBegin; ch < (compBegin + numComp); ch++) { - CHECK( subdiv && !currCU.ispMode && isLuma( compID ), "No TU subdivision is allowed with QTBT" ); + QpParam cQP(tu, ComponentID(ch)); + qp[ch] = cQP.Qp(true); + qpRem[ch] = qp[ch] % 6; + qpPer[ch] = qp[ch] / 6; + quantiserScale[ch] = g_quantScales[0][qpRem[ch]]; + quantiserRightShift[ch] = QUANT_SHIFT + qpPer[ch]; + rightShiftOffset[ch] = 1 << (quantiserRightShift[ch] - 1); + invquantiserRightShift[ch] = IQUANT_SHIFT; + add[ch] = 1 << (invquantiserRightShift[ch] - 1); } - if( bChroma && ( !currCU.ispMode || chromaCbfISP ) ) + for (uint32_t ch = compBegin; ch < (compBegin + numComp); ch++) { - const uint32_t numberValidComponents = getNumberValidComponents(currArea.chromaFormat); - const uint32_t cbfDepth = ( chromaCbfISP ? currDepth - 1 : currDepth ); - - for (uint32_t ch = COMPONENT_Cb; ch < numberValidComponents; ch++) - { - const ComponentID compID = ComponentID(ch); - - if( currDepth == 0 || TU::getCbfAtDepth( currTU, compID, currDepth - 1 ) || chromaCbfISP ) - { - const bool prevCbf = ( compID == COMPONENT_Cr ? TU::getCbfAtDepth( currTU, COMPONENT_Cb, currDepth ) : false ); - m_CABACEstimator->cbf_comp( cs, TU::getCbfAtDepth( currTU, compID, currDepth ), currArea.blocks[compID], cbfDepth, prevCbf ); - - } - } + const int channelBitDepth = cu.cs->sps->getBitDepth(toChannelType((ComponentID)ch)); + paPixelValue[ch] = Pel(std::max<int>(0, ((orgBuf[ch] * quantiserScale[ch] + rightShiftOffset[ch]) >> quantiserRightShift[ch]))); + assert(paPixelValue[ch] < (1 << (channelBitDepth + 1))); + paRecoValue[ch] = (((paPixelValue[ch] * g_invQuantScales[0][qpRem[ch]]) << qpPer[ch]) + add[ch]) >> invquantiserRightShift[ch]; + paRecoValue[ch] = Pel(ClipBD<int>(paRecoValue[ch], channelBitDepth));//to be checked } +} - if (subdiv) - { +void IntraSearch::preCalcPLTIndexRD(CodingStructure& cs, Partitioner& partitioner, ComponentID compBegin, uint32_t numComp) +{ + CodingUnit &cu = *cs.getCU(partitioner.chType); + uint32_t height = cu.block(compBegin).height; + uint32_t width = cu.block(compBegin).width; - if( partitioner.canSplit( TU_MAX_TR_SPLIT, cs ) ) - { - partitioner.splitCurrArea( TU_MAX_TR_SPLIT, cs ); - } - else if( currCU.ispMode && isLuma( compID ) ) + CPelBuf orgBuf[3]; + for (int comp = compBegin; comp < (compBegin + numComp); comp++) + { + CompArea area = cu.blocks[comp]; + if (m_pcEncCfg->getLmcs() && (cs.picHeader->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag())) { - partitioner.splitCurrArea( ispType, cs ); + orgBuf[comp] = cs.getPredBuf(area); } else - THROW( "Cannot perform an implicit split!" ); - - do { - xEncSubdivCbfQT( cs, partitioner, bLuma, bChroma, subTuCounter, ispType ); - subTuCounter += subTuCounter != -1 ? 1 : 0; - } while( partitioner.nextPart( cs ) ); - - partitioner.exitCurrSplit(); + orgBuf[comp] = cs.getOrgBuf(area); + } } - else + + int rasPos; + uint32_t scaleX = getComponentScaleX(COMPONENT_Cb, cs.sps->getChromaFormatIdc()); + uint32_t scaleY = getComponentScaleY(COMPONENT_Cb, cs.sps->getChromaFormatIdc()); + for (uint32_t y = 0; y < height; y++) { - //===== Cbfs ===== - if (bLuma) + for (uint32_t x = 0; x < width; x++) { - bool previousCbf = false; - bool lastCbfIsInferred = false; - if( ispType != TU_NO_ISP ) + rasPos = y * width + x;; + // chroma discard + bool discardChroma = (compBegin == COMPONENT_Y) && (y&scaleY || x&scaleX); + Pel curPel[3]; + for (int comp = compBegin; comp < (compBegin + numComp); comp++) { - bool rootCbfSoFar = false; - uint32_t nTus = currCU.ispMode == HOR_INTRA_SUBPARTITIONS ? currCU.lheight() >> g_aucLog2[currTU.lheight()] : currCU.lwidth() >> g_aucLog2[currTU.lwidth()]; - if( subTuCounter == nTus - 1 ) + uint32_t pX1 = (comp > 0 && compBegin == COMPONENT_Y) ? (x >> scaleX) : x; + uint32_t pY1 = (comp > 0 && compBegin == COMPONENT_Y) ? (y >> scaleY) : y; + curPel[comp] = orgBuf[comp].at(pX1, pY1); + } + + uint8_t pltIdx = 0; + double minError = MAX_DOUBLE; + uint8_t bestIdx = 0; + while (pltIdx < cu.curPLTSize[compBegin]) + { + uint64_t sqrtError = 0; + for (int comp = compBegin; comp < (discardChroma ? 1 : (compBegin + numComp)); comp++) { - TransformUnit* tuPointer = currCU.firstTU; - for( int tuIdx = 0; tuIdx < nTus - 1; tuIdx++ ) + int64_t tmpErr = int64_t(curPel[comp] - cu.curPLT[comp][pltIdx]); + if (isChroma((ComponentID)comp)) { - rootCbfSoFar |= TU::getCbfAtDepth( *tuPointer, COMPONENT_Y, currDepth ); - tuPointer = tuPointer->next; + sqrtError += uint64_t(tmpErr*tmpErr*ENC_CHROMA_WEIGHTING); } - if( !rootCbfSoFar ) + else { - lastCbfIsInferred = true; + sqrtError += tmpErr*tmpErr; } } - if( !lastCbfIsInferred ) + m_indexError[pltIdx][rasPos] = (double)sqrtError; + if (sqrtError < minError) { - previousCbf = TU::getPrevTuCbfAtDepth( currTU, COMPONENT_Y, partitioner.currTrDepth ); + minError = (double)sqrtError; + bestIdx = pltIdx; } + pltIdx++; } - if( !lastCbfIsInferred ) + + Pel paPixelValue[3], paRecoValue[3]; + calcPixelPredRD(cs, partitioner, curPel, paPixelValue, paRecoValue, compBegin, numComp); + uint64_t error = 0, rate = 0; + for (int comp = compBegin; comp < (discardChroma ? 1 : (compBegin + numComp)); comp++) { - m_CABACEstimator->cbf_comp( cs, TU::getCbfAtDepth( currTU, COMPONENT_Y, currDepth ), currTU.Y(), currTU.depth, previousCbf, currCU.ispMode ); + int64_t tmpErr = int64_t(curPel[comp] - paRecoValue[comp]); + if (isChroma((ComponentID)comp)) + { + error += uint64_t(tmpErr*tmpErr*ENC_CHROMA_WEIGHTING); + } + else + { + error += tmpErr*tmpErr; + } + rate += m_escapeNumBins[paPixelValue[comp]]; // encode quantized escape color } + double rdCost = (double)error + m_pcRdCost->getLambda()*(double)rate; + m_indexError[cu.curPLTSize[compBegin]][rasPos] = rdCost; + if (rdCost < minError) + { + minError = rdCost; + bestIdx = (uint8_t)cu.curPLTSize[compBegin]; + } + m_minErrorIndexMap[rasPos] = bestIdx; // save the optimal index of the current pixel } } } -void IntraSearch::xEncCoeffQT( CodingStructure &cs, Partitioner &partitioner, const ComponentID compID, const int subTuIdx, const PartSplit ispType ) +void IntraSearch::deriveIndexMap(CodingStructure& cs, Partitioner& partitioner, ComponentID compBegin, uint32_t numComp, PLTScanMode pltScanMode, double& dMinCost) { - const UnitArea &currArea = partitioner.currArea(); - - int subTuCounter = subTuIdx; - TransformUnit &currTU = *cs.getTU( currArea.blocks[partitioner.chType], partitioner.chType, subTuIdx ); - uint32_t currDepth = partitioner.currTrDepth; - const bool subdiv = currTU.depth > currDepth; + CodingUnit &cu = *cs.getCU(partitioner.chType); + TransformUnit &tu = *cs.getTU(partitioner.chType); + uint32_t height = cu.block(compBegin).height; + uint32_t width = cu.block(compBegin).width; + + int total = height*width; + Pel *runIndex = tu.getPLTIndex(compBegin); + bool *runType = tu.getRunTypes(compBegin); + m_scanOrder = g_scanOrder[SCAN_UNGROUPED][pltScanMode ? SCAN_TRAV_VER : SCAN_TRAV_HOR][gp_sizeIdxInfo->idxFrom(width)][gp_sizeIdxInfo->idxFrom(height)]; +// Trellis initialization + for (int i = 0; i < 2; i++) + { + memset(m_prevRunTypeRDOQ[i], 0, sizeof(Pel)*NUM_TRELLIS_STATE); + memset(m_prevRunPosRDOQ[i], 0, sizeof(int)*NUM_TRELLIS_STATE); + memset(m_stateCostRDOQ[i], 0, sizeof (double)*NUM_TRELLIS_STATE); + } + for (int state = 0; state < NUM_TRELLIS_STATE; state++) + { + m_statePtRDOQ[state][0] = 0; + } +// Context modeling + const FracBitsAccess& fracBits = m_CABACEstimator->getCtx().getFracBitsAcess(); + BinFracBits fracBitsPltCopyFlagIndex[RUN_IDX_THRE + 1]; + for (int dist = 0; dist <= RUN_IDX_THRE; dist++) + { + const unsigned ctxId = DeriveCtx::CtxPltCopyFlag(PLT_RUN_INDEX, dist); + fracBitsPltCopyFlagIndex[dist] = fracBits.getFracBitsArray(Ctx::IdxRunModel( ctxId ) ); + } + BinFracBits fracBitsPltCopyFlagAbove[RUN_IDX_THRE + 1]; + for (int dist = 0; dist <= RUN_IDX_THRE; dist++) + { + const unsigned ctxId = DeriveCtx::CtxPltCopyFlag(PLT_RUN_COPY, dist); + fracBitsPltCopyFlagAbove[dist] = fracBits.getFracBitsArray(Ctx::CopyRunModel( ctxId ) ); + } + const BinFracBits fracBitsPltRunType = fracBits.getFracBitsArray( Ctx::RunTypeFlag() ); - if (subdiv) +// Trellis RDO per CG + bool contTrellisRD = true; + for (int subSetId = 0; ( subSetId <= (total - 1) >> LOG2_PALETTE_CG_SIZE ) && contTrellisRD; subSetId++) { - if (partitioner.canSplit(TU_MAX_TR_SPLIT, cs)) - { - partitioner.splitCurrArea(TU_MAX_TR_SPLIT, cs); - } - else if( currTU.cu->ispMode ) + int minSubPos = subSetId << LOG2_PALETTE_CG_SIZE; + int maxSubPos = minSubPos + (1 << LOG2_PALETTE_CG_SIZE); + maxSubPos = (maxSubPos > total) ? total : maxSubPos; // if last position is out of the current CU size + contTrellisRD = deriveSubblockIndexMap(cs, partitioner, compBegin, pltScanMode, minSubPos, maxSubPos, fracBitsPltRunType, fracBitsPltCopyFlagIndex, fracBitsPltCopyFlagAbove, dMinCost, (bool)pltScanMode); + } + if (!contTrellisRD) + { + return; + } + + +// best state at the last scan position + double sumRdCost = MAX_DOUBLE; + uint8_t bestState = 0; + for (uint8_t state = 0; state < NUM_TRELLIS_STATE; state++) + { + if (m_stateCostRDOQ[0][state] < sumRdCost) { - partitioner.splitCurrArea( ispType, cs ); + sumRdCost = m_stateCostRDOQ[0][state]; + bestState = state; } - else - THROW("Implicit TU split not available!"); + } - do + bool checkRunTable [MAX_CU_BLKSIZE_PLT*MAX_CU_BLKSIZE_PLT]; + uint8_t checkIndexTable[MAX_CU_BLKSIZE_PLT*MAX_CU_BLKSIZE_PLT]; + uint8_t bestStateTable [MAX_CU_BLKSIZE_PLT*MAX_CU_BLKSIZE_PLT]; + uint8_t nextState = bestState; +// best trellis path + for (int i = (width*height - 1); i >= 0; i--) + { + bestStateTable[i] = nextState; + int rasterPos = m_scanOrder[i].idx; + nextState = m_statePtRDOQ[nextState][rasterPos]; + } +// reconstruct index and runs based on the state pointers + for (int i = 0; i < (width*height); i++) + { + int rasterPos = m_scanOrder[i].idx; + int abovePos = (pltScanMode == PLT_SCAN_HORTRAV) ? m_scanOrder[i].idx - width : m_scanOrder[i].idx - 1; + nextState = bestStateTable[i]; + if ( nextState == 0 ) // same as the previous { - xEncCoeffQT( cs, partitioner, compID, subTuCounter, ispType ); - subTuCounter += subTuCounter != -1 ? 1 : 0; - } while( partitioner.nextPart( cs ) ); - - partitioner.exitCurrSplit(); + checkRunTable[rasterPos] = checkRunTable[ m_scanOrder[i - 1].idx ]; + if ( checkRunTable[rasterPos] == PLT_RUN_INDEX ) + { + checkIndexTable[rasterPos] = checkIndexTable[m_scanOrder[i - 1].idx]; + } + else + { + checkIndexTable[rasterPos] = checkIndexTable[ abovePos ]; + } + } + else if (nextState == 1) // CopyAbove mode + { + checkRunTable[rasterPos] = PLT_RUN_COPY; + checkIndexTable[rasterPos] = checkIndexTable[abovePos]; + } + else if (nextState == 2) // Index mode + { + checkRunTable[rasterPos] = PLT_RUN_INDEX; + checkIndexTable[rasterPos] = m_minErrorIndexMap[rasterPos]; + } } - else - if( currArea.blocks[compID].valid() ) +// Escape flag + m_bestEscape = false; + for (int pos = 0; pos < (width*height); pos++) { - if( TU::hasCrossCompPredInfo( currTU, compID ) ) + uint8_t index = checkIndexTable[pos]; + if (index == cu.curPLTSize[compBegin]) { - m_CABACEstimator->cross_comp_pred( currTU, compID ); + m_bestEscape = true; + break; } - if( TU::getCbf( currTU, compID ) ) + } + +// Horizontal scan v.s vertical scan + if (sumRdCost < dMinCost) + { + cu.useEscape[compBegin] = m_bestEscape; + m_bestScanRotationMode = pltScanMode; + for (int pos = 0; pos < (width*height); pos++) { - m_CABACEstimator->residual_coding( currTU, compID ); + runIndex[pos] = checkIndexTable[pos]; + runType[pos] = checkRunTable[pos]; } + dMinCost = sumRdCost; } } -uint64_t IntraSearch::xGetIntraFracBitsQT( CodingStructure &cs, Partitioner &partitioner, const bool &bLuma, const bool &bChroma, const int subTuIdx, const PartSplit ispType ) +bool IntraSearch::deriveSubblockIndexMap( + CodingStructure& cs, + Partitioner& partitioner, + ComponentID compBegin, + PLTScanMode pltScanMode, + int minSubPos, + int maxSubPos, + const BinFracBits& fracBitsPltRunType, + const BinFracBits* fracBitsPltIndexINDEX, + const BinFracBits* fracBitsPltIndexCOPY, + const double minCost, + bool useRotate +) { - m_CABACEstimator->resetBits(); + CodingUnit &cu = *cs.getCU(partitioner.chType); + uint32_t height = cu.block(compBegin).height; + uint32_t width = cu.block(compBegin).width; + int indexMaxValue = cu.curPLTSize[compBegin]; + + int refId = 0; + int currRasterPos, currScanPos, prevScanPos, aboveScanPos, roffset; + int log2Width = (pltScanMode == PLT_SCAN_HORTRAV) ? floorLog2(width): floorLog2(height); + int buffersize = (pltScanMode == PLT_SCAN_HORTRAV) ? 2*width: 2*height; + for (int curPos = minSubPos; curPos < maxSubPos; curPos++) + { + currRasterPos = m_scanOrder[curPos].idx; + prevScanPos = (curPos == 0) ? 0 : (curPos - 1) % buffersize; + roffset = (curPos >> log2Width) << log2Width; + aboveScanPos = roffset - (curPos - roffset + 1); + aboveScanPos %= buffersize; + currScanPos = curPos % buffersize; + if ((pltScanMode == PLT_SCAN_HORTRAV && curPos < width) || (pltScanMode == PLT_SCAN_VERTRAV && curPos < height)) + { + aboveScanPos = -1; // first column/row: above row is not valid + } - xEncIntraHeader( cs, partitioner, bLuma, bChroma, subTuIdx ); - xEncSubdivCbfQT( cs, partitioner, bLuma, bChroma, subTuIdx, ispType ); +// Trellis stats: +// 1st state: same as previous scanned sample +// 2nd state: Copy_Above mode +// 3rd state: Index mode +// Loop of current state + for ( int curState = 0; curState < NUM_TRELLIS_STATE; curState++ ) + { + double minRdCost = MAX_DOUBLE; + int minState = 0; // best prevState + uint8_t bestRunIndex = 0; + bool bestRunType = 0; + bool bestPrevCodedType = 0; + int bestPrevCodedPos = 0; + if ( ( curState == 0 && curPos == 0 ) || ( curState == 1 && aboveScanPos < 0 ) ) // state not available + { + m_stateCostRDOQ[1 - refId][curState] = MAX_DOUBLE; + continue; + } + bool runType = 0; + uint8_t runIndex = 0; + if ( curState == 1 ) // 2nd state: Copy_Above mode + { + runType = PLT_RUN_COPY; + } + else if ( curState == 2 ) // 3rd state: Index mode + { + runType = PLT_RUN_INDEX; + runIndex = m_minErrorIndexMap[currRasterPos]; + } - if( bLuma ) - { - xEncCoeffQT( cs, partitioner, COMPONENT_Y, subTuIdx, ispType ); - } - if( bChroma ) - { - xEncCoeffQT( cs, partitioner, COMPONENT_Cb, subTuIdx, ispType ); - xEncCoeffQT( cs, partitioner, COMPONENT_Cr, subTuIdx, ispType ); - } +// Loop of previous state + for ( int stateID = 0; stateID < NUM_TRELLIS_STATE; stateID++ ) + { + if ( m_stateCostRDOQ[refId][stateID] == MAX_DOUBLE ) + { + continue; + } + if ( curState == 0 ) // 1st state: same as previous scanned sample + { + runType = m_runMapRDOQ[refId][stateID][prevScanPos]; + runIndex = ( runType == PLT_RUN_INDEX ) ? m_indexMapRDOQ[refId][stateID][ prevScanPos ] : m_indexMapRDOQ[refId][stateID][ aboveScanPos ]; + } + else if ( curState == 1 ) // 2nd state: Copy_Above mode + { + runIndex = m_indexMapRDOQ[refId][stateID][aboveScanPos]; + } + bool prevRunType = m_runMapRDOQ[refId][stateID][prevScanPos]; + uint8_t prevRunIndex = m_indexMapRDOQ[refId][stateID][prevScanPos]; + uint8_t aboveRunIndex = (aboveScanPos >= 0) ? m_indexMapRDOQ[refId][stateID][aboveScanPos] : 0; + int dist = curPos - m_prevRunPosRDOQ[refId][stateID] - 1; + double rdCost = m_stateCostRDOQ[refId][stateID]; + if ( rdCost >= minRdCost ) continue; + +// Calculate Rd cost + bool prevCodedRunType = m_prevRunTypeRDOQ[refId][stateID]; + int prevCodedPos = m_prevRunPosRDOQ [refId][stateID]; + const BinFracBits* fracBitsPt = (m_prevRunTypeRDOQ[refId][stateID] == PLT_RUN_INDEX) ? fracBitsPltIndexINDEX : fracBitsPltIndexCOPY; + rdCost += rateDistOptPLT(runType, runIndex, prevRunType, prevRunIndex, aboveRunIndex, prevCodedRunType, prevCodedPos, curPos, (pltScanMode == PLT_SCAN_HORTRAV) ? width : height, dist, indexMaxValue, fracBitsPt, fracBitsPltRunType); + if (rdCost < minRdCost) // update minState ( minRdCost ) + { + minRdCost = rdCost; + minState = stateID; + bestRunType = runType; + bestRunIndex = runIndex; + bestPrevCodedType = prevCodedRunType; + bestPrevCodedPos = prevCodedPos; + } + } +// Update trellis info of current state + m_stateCostRDOQ [1 - refId][curState] = minRdCost; + m_prevRunTypeRDOQ[1 - refId][curState] = bestPrevCodedType; + m_prevRunPosRDOQ [1 - refId][curState] = bestPrevCodedPos; + m_statePtRDOQ[curState][currRasterPos] = minState; + int buffer2update = std::min(buffersize, curPos); + memcpy(m_indexMapRDOQ[1 - refId][curState], m_indexMapRDOQ[refId][minState], sizeof(uint8_t)*buffer2update); + memcpy(m_runMapRDOQ[1 - refId][curState], m_runMapRDOQ[refId][minState], sizeof(bool)*buffer2update); + m_indexMapRDOQ[1 - refId][curState][currScanPos] = bestRunIndex; + m_runMapRDOQ [1 - refId][curState][currScanPos] = bestRunType; + } - uint64_t fracBits = m_CABACEstimator->getEstFracBits(); - return fracBits; + if (useRotate) // early terminate: Rd cost >= min cost in horizontal scan + { + if ((m_stateCostRDOQ[1 - refId][0] >= minCost) && + (m_stateCostRDOQ[1 - refId][1] >= minCost) && + (m_stateCostRDOQ[1 - refId][2] >= minCost) ) + { + return 0; + } + } + refId = 1 - refId; + } + return 1; } -uint64_t IntraSearch::xGetIntraFracBitsQTSingleChromaComponent( CodingStructure &cs, Partitioner &partitioner, const ComponentID compID ) +double IntraSearch::rateDistOptPLT( + bool runType, + uint8_t runIndex, + bool prevRunType, + uint8_t prevRunIndex, + uint8_t aboveRunIndex, + bool& prevCodedRunType, + int& prevCodedPos, + int scanPos, + uint32_t width, + int dist, + int indexMaxValue, + const BinFracBits* IndexfracBits, + const BinFracBits& TypefracBits) { - m_CABACEstimator->resetBits(); - - if( compID == COMPONENT_Cb ) - { - //intra mode coding - PredictionUnit &pu = *cs.getPU( partitioner.currArea().lumaPos(), partitioner.chType ); - m_CABACEstimator->intra_chroma_pred_mode( pu ); - //xEncIntraHeader(cs, partitioner, false, true); - } - CHECK( partitioner.currTrDepth != 1, "error in the depth!" ); - const UnitArea &currArea = partitioner.currArea(); - - TransformUnit &currTU = *cs.getTU( currArea.blocks[partitioner.chType], partitioner.chType ); + double rdCost = 0.0; + bool identityFlag = !( (runType != prevRunType) || ( (runType == PLT_RUN_INDEX) && (runIndex != prevRunIndex) ) ); - //cbf coding - m_CABACEstimator->cbf_comp( cs, TU::getCbfAtDepth( currTU, compID, partitioner.currTrDepth ), currArea.blocks[compID], partitioner.currTrDepth - 1 ); - //coeffs coding and cross comp coding - if( TU::hasCrossCompPredInfo( currTU, compID ) ) + if ( ( !identityFlag && runType == PLT_RUN_INDEX ) || scanPos == 0 ) // encode index value { - m_CABACEstimator->cross_comp_pred( currTU, compID ); + uint8_t refIndex = (prevRunType == PLT_RUN_INDEX) ? prevRunIndex : aboveRunIndex; + refIndex = (scanPos == 0) ? ( indexMaxValue + 1) : refIndex; + if ( runIndex == refIndex ) + { + rdCost = MAX_DOUBLE; + return rdCost; + } + rdCost += m_pcRdCost->getLambda()*m_truncBinBits[(runIndex > refIndex) ? runIndex - 1 : runIndex][(scanPos == 0) ? (indexMaxValue + 1) : indexMaxValue]; } - if( TU::getCbf( currTU, compID ) ) + rdCost += m_indexError[runIndex][m_scanOrder[scanPos].idx]; + if (scanPos > 0) { - m_CABACEstimator->residual_coding( currTU, compID ); + rdCost += m_pcRdCost->getLambda()*( identityFlag ? (IndexfracBits[(dist < RUN_IDX_THRE) ? dist : RUN_IDX_THRE].intBits[1] >> SCALE_BITS) : (IndexfracBits[(dist < RUN_IDX_THRE) ? dist : RUN_IDX_THRE].intBits[0] >> SCALE_BITS)); } - - uint64_t fracBits = m_CABACEstimator->getEstFracBits(); - return fracBits; -} - -uint64_t IntraSearch::xGetIntraFracBitsQTChroma(TransformUnit& currTU, const ComponentID &compID) -{ - m_CABACEstimator->resetBits(); - - if( TU::hasCrossCompPredInfo( currTU, compID ) ) + if ( !identityFlag && scanPos >= width && prevRunType != PLT_RUN_COPY ) { - m_CABACEstimator->cross_comp_pred( currTU, compID ); + rdCost += m_pcRdCost->getLambda()*(TypefracBits.intBits[runType] >> SCALE_BITS); } - if( TU::getCbf( currTU, compID ) ) + if (!identityFlag || scanPos == 0) { - m_CABACEstimator->residual_coding( currTU, compID ); + prevCodedRunType = runType; + prevCodedPos = scanPos; } - - uint64_t fracBits = m_CABACEstimator->getEstFracBits(); - return fracBits; + return rdCost; } - -void IntraSearch::xIntraCodingTUBlock(TransformUnit &tu, const ComponentID &compID, const bool &checkCrossCPrediction, Distortion& ruiDist, const int &default0Save1Load2, uint32_t* numSig, std::vector<TrMode>* trModes, const bool loadTr) +uint32_t IntraSearch::getEpExGolombNumBins(uint32_t symbol, uint32_t count) { - if (!tu.blocks[compID].valid()) + uint32_t numBins = 0; + while (symbol >= (uint32_t)(1 << count)) { - return; + numBins++; + symbol -= 1 << count; + count++; } + numBins++; + numBins += count; + assert(numBins <= 32); + return numBins; +} - CodingStructure &cs = *tu.cs; - - const CompArea &area = tu.blocks[compID]; - const SPS &sps = *cs.sps; - const PPS &pps = *cs.pps; - - const ChannelType chType = toChannelType(compID); - const int bitDepth = sps.getBitDepth(chType); - - PelBuf piOrg = cs.getOrgBuf (area); - PelBuf piPred = cs.getPredBuf (area); - PelBuf piResi = cs.getResiBuf (area); - PelBuf piOrgResi = cs.getOrgResiBuf(area); - PelBuf piReco = cs.getRecoBuf (area); - - const PredictionUnit &pu = *cs.getPU(area.pos(), chType); - const uint32_t uiChFinalMode = PU::getFinalIntraMode(pu, chType); - - const bool bUseCrossCPrediction = pps.getPpsRangeExtension().getCrossComponentPredictionEnabledFlag() && isChroma( compID ) && PU::isChromaIntraModeCrossCheckMode( pu ) && checkCrossCPrediction; - const bool ccUseRecoResi = m_pcEncCfg->getUseReconBasedCrossCPredictionEstimate(); - const bool ispSplitIsAllowed = CU::canUseISPSplit( *tu.cu, compID ); - - - //===== init availability pattern ===== - PelBuf sharedPredTS( m_pSharedPredTransformSkip[compID], area ); - if( default0Save1Load2 != 2 ) +uint32_t IntraSearch::getTruncBinBits(uint32_t symbol, uint32_t maxSymbol) +{ + uint32_t idxCodeBit = 0; + uint32_t thresh; + if (maxSymbol > 256) { - const bool bUseFilteredPredictions = IntraPrediction::useFilteredIntraRefSamples( compID, pu, true, tu ); - initIntraPatternChType( *tu.cu, area, bUseFilteredPredictions ); - - //===== get prediction signal ===== - if( compID != COMPONENT_Y && PU::isLMCMode( uiChFinalMode ) ) - { - { - xGetLumaRecPixels( pu, area ); - } - predIntraChromaLM( compID, piPred, pu, area, uiChFinalMode ); - } - else + uint32_t threshVal = 1 << 8; + thresh = 8; + while (threshVal <= maxSymbol) { - predIntraAng( compID, piPred, pu, bUseFilteredPredictions ); - } - - - // save prediction - if( default0Save1Load2 == 1 ) - { - sharedPredTS.copyFrom( piPred ); + thresh++; + threshVal <<= 1; } + thresh--; } else { - // load prediction - piPred.copyFrom( sharedPredTS ); + thresh = g_tbMax[maxSymbol]; } - - - DTRACE( g_trace_ctx, D_PRED, "@(%4d,%4d) [%2dx%2d] IMode=%d\n", tu.lx(), tu.ly(), tu.lwidth(), tu.lheight(), uiChFinalMode ); - //DTRACE_PEL_BUF( D_PRED, piPred, tu, tu.cu->predMode, COMPONENT_Y ); - - const Slice &slice = *cs.slice; - bool flag = slice.getReshapeInfo().getUseSliceReshaper() && (slice.isIntra() || (!slice.isIntra() && m_pcReshape->getCTUFlag())); - if (flag && slice.getReshapeInfo().getSliceReshapeChromaAdj() && isChroma(compID)) + uint32_t uiVal = 1 << thresh; + assert(uiVal <= maxSymbol); + assert((uiVal << 1) > maxSymbol); + assert(symbol < maxSymbol); + uint32_t b = maxSymbol - uiVal; + assert(b < uiVal); + if (symbol < uiVal - b) { - const Area area = tu.Y().valid() ? tu.Y() : Area(recalcPosition(tu.chromaFormat, tu.chType, CHANNEL_TYPE_LUMA, tu.blocks[tu.chType].pos()), recalcSize(tu.chromaFormat, tu.chType, CHANNEL_TYPE_LUMA, tu.blocks[tu.chType].size())); - const CompArea &areaY = CompArea(COMPONENT_Y, tu.chromaFormat, area ); - PelBuf piPredY; - piPredY = cs.picture->getPredBuf(areaY); - const Pel avgLuma = piPredY.computeAvg(); - int adj = m_pcReshape->calculateChromaAdj(avgLuma); - tu.setChromaAdj(adj); + idxCodeBit = thresh; } - //===== get residual signal ===== - piResi.copyFrom( piOrg ); - if (slice.getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag() && compID==COMPONENT_Y) + else { - CompArea tmpArea(COMPONENT_Y, area.chromaFormat, Position(0, 0), area.size()); - PelBuf tmpPred = m_tmpStorageLCU.getBuf(tmpArea); - tmpPred.copyFrom(piPred); - piResi.rspSignal(m_pcReshape->getFwdLUT()); - piResi.subtract(tmpPred); + idxCodeBit = thresh + 1; } - else - piResi.subtract( piPred ); + return idxCodeBit; +} - if (pps.getPpsRangeExtension().getCrossComponentPredictionEnabledFlag() && isLuma(compID)) +void IntraSearch::initTBCTable(int bitDepth) +{ + for (uint32_t i = 0; i < m_symbolSize; i++) { - piOrgResi.copyFrom (piResi); + memset(m_truncBinBits[i], 0, sizeof(uint16_t)*(m_symbolSize + 1)); } - - if (bUseCrossCPrediction) + for (uint32_t i = 0; i < (m_symbolSize + 1); i++) { - if (xCalcCrossComponentPredictionAlpha(tu, compID, ccUseRecoResi) == 0) + for (uint32_t j = 0; j < i; j++) { - return; + m_truncBinBits[j][i] = getTruncBinBits(j, i); } - CrossComponentPrediction::crossComponentPrediction(tu, compID, cs.getResiBuf(tu.Y()), piResi, piResi, false); } - - //===== transform and quantization ===== - //--- init rate estimation arrays for RDOQ --- - //--- transform and quantization --- - TCoeff uiAbsSum = 0; - - const QpParam cQP(tu, compID); - -#if RDOQ_CHROMA_LAMBDA - m_pcTrQuant->selectLambda(compID); -#endif - - flag =flag && (tu.blocks[compID].width*tu.blocks[compID].height > 4); - if (flag && isChroma(compID) && slice.getReshapeInfo().getSliceReshapeChromaAdj() ) + memset(m_escapeNumBins, 0, sizeof(uint16_t)*m_symbolSize); + for (uint32_t i = 0; i < m_symbolSize; i++) { - int cResScaleInv = tu.getChromaAdj(); - double cResScale = round((double)(1 << CSCALE_FP_PREC) / (double)cResScaleInv); - m_pcTrQuant->setLambda(m_pcTrQuant->getLambda() / (cResScale*cResScale)); - piResi.scaleSignal(cResScaleInv, 1, tu.cu->cs->slice->clpRng(compID)); + m_escapeNumBins[i] = getEpExGolombNumBins(i, 3); } +} +void IntraSearch::calcPixelPred(CodingStructure& cs, Partitioner& partitioner, uint32_t yPos, uint32_t xPos, ComponentID compBegin, uint32_t numComp) +{ + CodingUnit &cu = *cs.getCU(partitioner.chType); + TransformUnit &tu = *cs.getTU(partitioner.chType); - double diagRatio = 0, horVerRatio = 0; - - if( trModes ) + CPelBuf orgBuf[3]; + for (int comp = compBegin; comp < (compBegin + numComp); comp++) { - m_pcTrQuant->transformNxN( tu, compID, cQP, trModes, CU::isIntra( *tu.cu ) ? m_pcEncCfg->getIntraMTSMaxCand() : m_pcEncCfg->getInterMTSMaxCand(), ispSplitIsAllowed ? &diagRatio : nullptr, ispSplitIsAllowed ? &horVerRatio : nullptr ); - tu.mtsIdx = trModes->at(0).first; + CompArea area = cu.blocks[comp]; + if (m_pcEncCfg->getLmcs() && (cs.picHeader->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag())) + { + orgBuf[comp] = cs.getPredBuf(area); + } + else + { + orgBuf[comp] = cs.getOrgBuf(area); + } } - m_pcTrQuant->transformNxN( tu, compID, cQP, uiAbsSum, m_CABACEstimator->getCtx(), loadTr, &diagRatio, &horVerRatio ); - if (!tu.cu->ispMode && isLuma(compID) && ispSplitIsAllowed && - tu.mtsIdx == 0 - ) + + int qp[3]; + int qpRem[3]; + int qpPer[3]; + int quantiserScale[3]; + int quantiserRightShift[3]; + int rightShiftOffset[3]; + int invquantiserRightShift[3]; + int add[3]; + for (uint32_t ch = compBegin; ch < (compBegin + numComp); ch++) { - m_intraModeDiagRatio .push_back(diagRatio); - m_intraModeHorVerRatio .push_back(horVerRatio); - m_intraModeTestedNormalIntra.push_back((int)uiChFinalMode); + QpParam cQP(tu, ComponentID(ch)); + qp[ch] = cQP.Qp(true); + qpRem[ch] = qp[ch] % 6; + qpPer[ch] = qp[ch] / 6; + quantiserScale[ch] = g_quantScales[0][qpRem[ch]]; + quantiserRightShift[ch] = QUANT_SHIFT + qpPer[ch]; + rightShiftOffset[ch] = 1 << (quantiserRightShift[ch] - 1); + invquantiserRightShift[ch] = IQUANT_SHIFT; + add[ch] = 1 << (invquantiserRightShift[ch] - 1); } + uint32_t scaleX = getComponentScaleX(COMPONENT_Cb, cs.sps->getChromaFormatIdc()); + uint32_t scaleY = getComponentScaleY(COMPONENT_Cb, cs.sps->getChromaFormatIdc()); + for (uint32_t ch = compBegin; ch < (compBegin + numComp); ch++) + { + const int channelBitDepth = cu.cs->sps->getBitDepth(toChannelType((ComponentID)ch)); + CompArea area = cu.blocks[ch]; + PelBuf recBuf = cs.getRecoBuf(area); + PLTescapeBuf escapeValue = tu.getescapeValue((ComponentID)ch); + if (compBegin != COMPONENT_Y || ch == 0) + { + escapeValue.at(xPos, yPos) = TCoeff(std::max<int>(0, ((orgBuf[ch].at(xPos, yPos) * quantiserScale[ch] + rightShiftOffset[ch]) >> quantiserRightShift[ch]))); + assert(escapeValue.at(xPos, yPos) < (1 << (channelBitDepth + 1))); + recBuf.at(xPos, yPos) = (((escapeValue.at(xPos, yPos)*g_invQuantScales[0][qpRem[ch]]) << qpPer[ch]) + add[ch]) >> invquantiserRightShift[ch]; + recBuf.at(xPos, yPos) = Pel(ClipBD<int>(recBuf.at(xPos, yPos), channelBitDepth));//to be checked + } + else if (compBegin == COMPONENT_Y && ch > 0 && yPos % (1 << scaleY) == 0 && xPos % (1 << scaleX) == 0) + { + uint32_t yPosC = yPos >> scaleY; + uint32_t xPosC = xPos >> scaleX; + escapeValue.at(xPosC, yPosC) = TCoeff(std::max<int>(0, ((orgBuf[ch].at(xPosC, yPosC) * quantiserScale[ch] + rightShiftOffset[ch]) >> quantiserRightShift[ch]))); + assert(escapeValue.at(xPosC, yPosC) < (1 << (channelBitDepth + 1))); + recBuf.at(xPosC, yPosC) = (((escapeValue.at(xPosC, yPosC)*g_invQuantScales[0][qpRem[ch]]) << qpPer[ch]) + add[ch]) >> invquantiserRightShift[ch]; + recBuf.at(xPosC, yPosC) = Pel(ClipBD<int>(recBuf.at(xPosC, yPosC), channelBitDepth));//to be checked + } + } +} +void IntraSearch::derivePLTLossy(CodingStructure& cs, Partitioner& partitioner, ComponentID compBegin, uint32_t numComp) +{ + CodingUnit &cu = *cs.getCU(partitioner.chType); + const int channelBitDepth_L = cs.sps->getBitDepth(CHANNEL_TYPE_LUMA); + const int channelBitDepth_C = cs.sps->getBitDepth(CHANNEL_TYPE_CHROMA); + const int pcmShiftRight_L = (channelBitDepth_L - PLT_ENCBITDEPTH); + const int pcmShiftRight_C = (channelBitDepth_C - PLT_ENCBITDEPTH); - DTRACE( g_trace_ctx, D_TU_ABS_SUM, "%d: comp=%d, abssum=%d\n", DTRACE_GET_COUNTER( g_trace_ctx, D_TU_ABS_SUM ), compID, uiAbsSum ); - + uint32_t height = cu.block(compBegin).height; + uint32_t width = cu.block(compBegin).width; - //--- inverse transform --- - if (uiAbsSum > 0) + CPelBuf orgBuf[3]; + for (int comp = compBegin; comp < (compBegin + numComp); comp++) { - m_pcTrQuant->invTransformNxN(tu, compID, piResi, cQP); + CompArea area = cu.blocks[comp]; + if (m_pcEncCfg->getLmcs() && (cs.picHeader->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag())) + { + orgBuf[comp] = cs.getPredBuf(area); + } + else + { + orgBuf[comp] = cs.getOrgBuf(area); + } } - else + + int errorLimit = g_paletteQuant[cu.qp]; + uint32_t totalSize = height*width; + SortingElement *pelList = new SortingElement[totalSize]; + SortingElement element; + SortingElement *pelListSort = new SortingElement[MAXPLTSIZE + 1]; + uint32_t dictMaxSize = MAXPLTSIZE; + uint32_t idx = 0; + int last = -1; + + uint32_t scaleX = getComponentScaleX(COMPONENT_Cb, cs.sps->getChromaFormatIdc()); + uint32_t scaleY = getComponentScaleY(COMPONENT_Cb, cs.sps->getChromaFormatIdc()); + for (uint32_t y = 0; y < height; y++) { - piResi.fill(0); + for (uint32_t x = 0; x < width; x++) + { + uint32_t org[3], pX, pY; + for (int comp = compBegin; comp < (compBegin + numComp); comp++) + { + pX = (comp > 0 && compBegin == COMPONENT_Y) ? (x >> scaleX) : x; + pY = (comp > 0 && compBegin == COMPONENT_Y) ? (y >> scaleY) : y; + org[comp] = orgBuf[comp].at(pX, pY); + } + element.setAll(org, compBegin, numComp); + int besti = last, bestSAD = (last == -1) ? MAX_UINT : pelList[last].getSAD(element, cs.sps->getBitDepths(), compBegin, numComp); + if (bestSAD) + { + for (int i = idx - 1; i >= 0; i--) + { + uint32_t sad = pelList[i].getSAD(element, cs.sps->getBitDepths(), compBegin, numComp); + if (sad < bestSAD) + { + bestSAD = sad; + besti = i; + if (!sad) break; + } + } + } + if (besti >= 0 && pelList[besti].almostEqualData(element, errorLimit, cs.sps->getBitDepths(), compBegin, numComp)) + { + pelList[besti].addElement(element, compBegin, numComp); + last = besti; + } + else + { + pelList[idx].copyDataFrom(element, compBegin, numComp); + pelList[idx].setCnt(1); + last = idx; + idx++; + } + } } - //===== reconstruction ===== - if (flag && uiAbsSum > 0 && isChroma(compID) && slice.getReshapeInfo().getSliceReshapeChromaAdj() ) + for (int i = 0; i < dictMaxSize; i++) { - piResi.scaleSignal(tu.getChromaAdj(), 0, tu.cu->cs->slice->clpRng(compID)); + pelListSort[i].setCnt(0); + pelListSort[i].resetAll(compBegin, numComp); + } + + //bubble sorting + dictMaxSize = 1; + for (int i = 0; i < idx; i++) + { + if (pelList[i].getCnt() > pelListSort[dictMaxSize - 1].getCnt()) + { + int j; + for (j = dictMaxSize; j > 0; j--) + { + if (pelList[i].getCnt() > pelListSort[j - 1].getCnt() ) + { + pelListSort[j].copyAllFrom(pelListSort[j - 1], compBegin, numComp); + dictMaxSize = std::min(dictMaxSize + 1, (uint32_t)MAXPLTSIZE); + } + else + { + break; + } + } + pelListSort[j].copyAllFrom(pelList[i], compBegin, numComp); + } + } + + uint32_t paletteSize = 0; + uint64_t numColorBits = 0; + for (int comp = compBegin; comp < (compBegin + numComp); comp++) + { + numColorBits += (comp > 0) ? channelBitDepth_C : channelBitDepth_L; + } + const int plt_lambda_shift = (compBegin > 0) ? pcmShiftRight_C : pcmShiftRight_L; + double bitCost = m_pcRdCost->getLambda() / (double) (1 << (2 * plt_lambda_shift)) * numColorBits; + for (int i = 0; i < MAXPLTSIZE; i++) + { + if (pelListSort[i].getCnt()) + { + int half = pelListSort[i].getCnt() >> 1; + for (int comp = compBegin; comp < (compBegin + numComp); comp++) + { + cu.curPLT[comp][paletteSize] = (pelListSort[i].getSumData(comp) + half) / pelListSort[i].getCnt(); + } + + int best = -1; + if (errorLimit) + { + double pal[MAX_NUM_COMPONENT], err = 0.0, bestCost = 0.0; + for (int comp = compBegin; comp < (compBegin + numComp); comp++) + { + pal[comp] = pelListSort[i].getSumData(comp) / (double)pelListSort[i].getCnt(); + err = pal[comp] - cu.curPLT[comp][paletteSize]; + if (isChroma((ComponentID) comp)) + { + bestCost += (err * err * PLT_CHROMA_WEIGHTING) / (1 << (2 * pcmShiftRight_C)); + } + else + { + bestCost += (err * err) / (1 << (2 * pcmShiftRight_L)); + } + } + bestCost = bestCost * pelListSort[i].getCnt() + bitCost; + + for (int t = 0; t < cs.prevPLT.curPLTSize[compBegin]; t++) + { + double cost = 0.0; + for (int comp = compBegin; comp < (compBegin + numComp); comp++) + { + err = pal[comp] - cs.prevPLT.curPLT[comp][t]; + if (isChroma((ComponentID) comp)) + { + cost += (err * err * PLT_CHROMA_WEIGHTING) / (1 << (2 * pcmShiftRight_C)); + } + else + { + cost += (err * err) / (1 << (2 * pcmShiftRight_L)); + } + } + cost *= pelListSort[i].getCnt(); + if (cost < bestCost) + { + best = t; + bestCost = cost; + } + } + if (best != -1) + { + for (int comp = compBegin; comp < (compBegin + numComp); comp++) + { + cu.curPLT[comp][paletteSize] = cs.prevPLT.curPLT[comp][best]; + } + } + } + + bool duplicate = false; + if (pelListSort[i].getCnt() == 1 && best == -1) + { + duplicate = true; + } + else + { + for (int t = 0; t<paletteSize; t++) + { + bool duplicateTmp = true; + for (int comp = compBegin; comp < (compBegin + numComp); comp++) + { + duplicateTmp = duplicateTmp && (cu.curPLT[comp][paletteSize] == cu.curPLT[comp][t]); + } + if (duplicateTmp) + { + duplicate = true; + break; + } + } + } + if (!duplicate) paletteSize++; + } + else + { + break; + } + } + cu.curPLTSize[compBegin] = paletteSize; + + delete[] pelList; + delete[] pelListSort; +} +// ------------------------------------------------------------------------------------------------------------------- +// Intra search +// ------------------------------------------------------------------------------------------------------------------- + +void IntraSearch::xEncIntraHeader( CodingStructure &cs, Partitioner &partitioner, const bool &bLuma, const bool &bChroma, const int subTuIdx ) +{ + CodingUnit &cu = *cs.getCU( partitioner.chType ); + + if (bLuma) + { + bool isFirst = cu.ispMode ? subTuIdx == 0 : partitioner.currArea().lumaPos() == cs.area.lumaPos(); + + // CU header + if( isFirst ) + { + if ((!cs.slice->isIntra() || cs.slice->getSPS()->getIBCFlag() || cs.slice->getSPS()->getPLTMode()) + && cu.Y().valid() + ) + { + m_CABACEstimator->cu_skip_flag( cu ); + m_CABACEstimator->pred_mode ( cu ); + } + if (CU::isPLT(cu)) + { + return; + } + m_CABACEstimator->bdpcm_mode ( cu, ComponentID(partitioner.chType) ); + if (!CS::isDualITree(cs) && isLuma(partitioner.chType)) + m_CABACEstimator->bdpcm_mode(cu, ComponentID(CHANNEL_TYPE_CHROMA)); + } + + PredictionUnit &pu = *cs.getPU(partitioner.currArea().lumaPos(), partitioner.chType); + + // luma prediction mode + if (isFirst) + { + if ( !cu.Y().valid()) + m_CABACEstimator->pred_mode( cu ); + m_CABACEstimator->intra_luma_pred_mode( pu ); + } + } + + if (bChroma) + { + bool isFirst = partitioner.currArea().Cb().valid() && partitioner.currArea().chromaPos() == cs.area.chromaPos(); + + PredictionUnit &pu = *cs.getPU( partitioner.currArea().chromaPos(), CHANNEL_TYPE_CHROMA ); + + if( isFirst ) + { + m_CABACEstimator->intra_chroma_pred_mode( pu ); + } + } +} + +void IntraSearch::xEncSubdivCbfQT( CodingStructure &cs, Partitioner &partitioner, const bool &bLuma, const bool &bChroma, const int subTuIdx, const PartSplit ispType ) +{ + const UnitArea &currArea = partitioner.currArea(); + int subTuCounter = subTuIdx; + TransformUnit &currTU = *cs.getTU( currArea.blocks[partitioner.chType], partitioner.chType, subTuCounter ); + CodingUnit &currCU = *currTU.cu; + uint32_t currDepth = partitioner.currTrDepth; + + const bool subdiv = currTU.depth > currDepth; + ComponentID compID = partitioner.chType == CHANNEL_TYPE_LUMA ? COMPONENT_Y : COMPONENT_Cb; + const bool chromaCbfISP = currArea.blocks[COMPONENT_Cb].valid() && currCU.ispMode && !subdiv; + + if( partitioner.canSplit( TU_MAX_TR_SPLIT, cs ) ) + { + CHECK( !subdiv, "TU split implied" ); + } + else + { + CHECK( subdiv && !currCU.ispMode && isLuma( compID ), "No TU subdivision is allowed with QTBT" ); + } + + if( bChroma && ( !currCU.ispMode || chromaCbfISP ) ) + { + const uint32_t numberValidComponents = getNumberValidComponents(currArea.chromaFormat); + const uint32_t cbfDepth = ( chromaCbfISP ? currDepth - 1 : currDepth ); + + for (uint32_t ch = COMPONENT_Cb; ch < numberValidComponents; ch++) + { + const ComponentID compID = ComponentID(ch); + + if( currDepth == 0 || TU::getCbfAtDepth( currTU, compID, currDepth - 1 ) || chromaCbfISP ) + { + const bool prevCbf = ( compID == COMPONENT_Cr ? TU::getCbfAtDepth( currTU, COMPONENT_Cb, currDepth ) : false ); + m_CABACEstimator->cbf_comp( cs, TU::getCbfAtDepth( currTU, compID, currDepth ), currArea.blocks[compID], cbfDepth, prevCbf ); + + } + } + } + + if (subdiv) + { + + if( partitioner.canSplit( TU_MAX_TR_SPLIT, cs ) ) + { + partitioner.splitCurrArea( TU_MAX_TR_SPLIT, cs ); + } + else if( currCU.ispMode && isLuma( compID ) ) + { + partitioner.splitCurrArea( ispType, cs ); + } + else + THROW( "Cannot perform an implicit split!" ); + + do + { + xEncSubdivCbfQT( cs, partitioner, bLuma, bChroma, subTuCounter, ispType ); + subTuCounter += subTuCounter != -1 ? 1 : 0; + } while( partitioner.nextPart( cs ) ); + + partitioner.exitCurrSplit(); + } + else + { + //===== Cbfs ===== + if (bLuma) + { + bool previousCbf = false; + bool lastCbfIsInferred = false; + if( ispType != TU_NO_ISP ) + { + bool rootCbfSoFar = false; + uint32_t nTus = currCU.ispMode == HOR_INTRA_SUBPARTITIONS ? currCU.lheight() >> floorLog2(currTU.lheight()) : currCU.lwidth() >> floorLog2(currTU.lwidth()); + if( subTuCounter == nTus - 1 ) + { + TransformUnit* tuPointer = currCU.firstTU; + for( int tuIdx = 0; tuIdx < nTus - 1; tuIdx++ ) + { + rootCbfSoFar |= TU::getCbfAtDepth( *tuPointer, COMPONENT_Y, currDepth ); + tuPointer = tuPointer->next; + } + if( !rootCbfSoFar ) + { + lastCbfIsInferred = true; + } + } + if( !lastCbfIsInferred ) + { + previousCbf = TU::getPrevTuCbfAtDepth( currTU, COMPONENT_Y, partitioner.currTrDepth ); + } + } + if( !lastCbfIsInferred ) + { + m_CABACEstimator->cbf_comp( cs, TU::getCbfAtDepth( currTU, COMPONENT_Y, currDepth ), currTU.Y(), currTU.depth, previousCbf, currCU.ispMode ); + } + } + } +} + +void IntraSearch::xEncCoeffQT( CodingStructure &cs, Partitioner &partitioner, const ComponentID compID, const int subTuIdx, const PartSplit ispType, CUCtx* cuCtx ) +{ + const UnitArea &currArea = partitioner.currArea(); + + int subTuCounter = subTuIdx; + TransformUnit &currTU = *cs.getTU( currArea.blocks[partitioner.chType], partitioner.chType, subTuIdx ); + uint32_t currDepth = partitioner.currTrDepth; + const bool subdiv = currTU.depth > currDepth; + + if (subdiv) + { + if (partitioner.canSplit(TU_MAX_TR_SPLIT, cs)) + { + partitioner.splitCurrArea(TU_MAX_TR_SPLIT, cs); + } + else if( currTU.cu->ispMode ) + { + partitioner.splitCurrArea( ispType, cs ); + } + else + THROW("Implicit TU split not available!"); + + do + { + xEncCoeffQT( cs, partitioner, compID, subTuCounter, ispType, cuCtx ); + subTuCounter += subTuCounter != -1 ? 1 : 0; + } while( partitioner.nextPart( cs ) ); + + partitioner.exitCurrSplit(); + } + else + + if( currArea.blocks[compID].valid() ) + { + if( compID == COMPONENT_Cr ) + { + const int cbfMask = ( TU::getCbf( currTU, COMPONENT_Cb ) ? 2 : 0 ) + ( TU::getCbf( currTU, COMPONENT_Cr ) ? 1 : 0 ); + m_CABACEstimator->joint_cb_cr( currTU, cbfMask ); + } + if( TU::hasCrossCompPredInfo( currTU, compID ) ) + { + m_CABACEstimator->cross_comp_pred( currTU, compID ); + } + if( TU::getCbf( currTU, compID ) ) + { + if( isLuma(compID) ) + { + m_CABACEstimator->residual_coding( currTU, compID, cuCtx ); + m_CABACEstimator->mts_idx( *currTU.cu, cuCtx ); + } + else + m_CABACEstimator->residual_coding( currTU, compID ); + } + } +} + +uint64_t IntraSearch::xGetIntraFracBitsQT( CodingStructure &cs, Partitioner &partitioner, const bool &bLuma, const bool &bChroma, const int subTuIdx, const PartSplit ispType, CUCtx* cuCtx ) +{ + m_CABACEstimator->resetBits(); + + xEncIntraHeader( cs, partitioner, bLuma, bChroma, subTuIdx ); + xEncSubdivCbfQT( cs, partitioner, bLuma, bChroma, subTuIdx, ispType ); + + + if( bLuma ) + { + xEncCoeffQT( cs, partitioner, COMPONENT_Y, subTuIdx, ispType, cuCtx ); + } + if( bChroma ) + { + xEncCoeffQT( cs, partitioner, COMPONENT_Cb, subTuIdx, ispType ); + xEncCoeffQT( cs, partitioner, COMPONENT_Cr, subTuIdx, ispType ); + } + + CodingUnit& cu = *cs.getCU(partitioner.chType); + if ( cuCtx && bLuma && cu.isSepTree() && ( !cu.ispMode || ( cu.lfnstIdx && subTuIdx == 0 ) || ( !cu.lfnstIdx && subTuIdx == m_ispTestedModes[cu.lfnstIdx].numTotalParts[cu.ispMode - 1] - 1 ) ) ) + { + m_CABACEstimator->residual_lfnst_mode(cu, *cuCtx); + } + + uint64_t fracBits = m_CABACEstimator->getEstFracBits(); + return fracBits; +} + +uint64_t IntraSearch::xGetIntraFracBitsQTSingleChromaComponent( CodingStructure &cs, Partitioner &partitioner, const ComponentID compID ) +{ + m_CABACEstimator->resetBits(); + + if( compID == COMPONENT_Cb ) + { + //intra mode coding + PredictionUnit &pu = *cs.getPU( partitioner.currArea().lumaPos(), partitioner.chType ); + m_CABACEstimator->intra_chroma_pred_mode( pu ); + //xEncIntraHeader(cs, partitioner, false, true); + } + CHECK( partitioner.currTrDepth != 1, "error in the depth!" ); + const UnitArea &currArea = partitioner.currArea(); + + TransformUnit &currTU = *cs.getTU( currArea.blocks[partitioner.chType], partitioner.chType ); + + //cbf coding + const bool prevCbf = ( compID == COMPONENT_Cr ? TU::getCbfAtDepth( currTU, COMPONENT_Cb, partitioner.currTrDepth ) : false ); + m_CABACEstimator->cbf_comp( cs, TU::getCbfAtDepth( currTU, compID, partitioner.currTrDepth ), currArea.blocks[compID], partitioner.currTrDepth - 1, prevCbf ); + //coeffs coding and cross comp coding + if( TU::hasCrossCompPredInfo( currTU, compID ) ) + { + m_CABACEstimator->cross_comp_pred( currTU, compID ); + } + if( TU::getCbf( currTU, compID ) ) + { + m_CABACEstimator->residual_coding( currTU, compID ); + } + + uint64_t fracBits = m_CABACEstimator->getEstFracBits(); + return fracBits; +} + +uint64_t IntraSearch::xGetIntraFracBitsQTChroma(TransformUnit& currTU, const ComponentID &compID) +{ + m_CABACEstimator->resetBits(); + + if( TU::hasCrossCompPredInfo( currTU, compID ) ) + { + m_CABACEstimator->cross_comp_pred( currTU, compID ); + } + + // Include Cbf and jointCbCr flags here as we make decisions across components + CodingStructure &cs = *currTU.cs; + + if ( currTU.jointCbCr ) + { + const int cbfMask = ( TU::getCbf( currTU, COMPONENT_Cb ) ? 2 : 0 ) + ( TU::getCbf( currTU, COMPONENT_Cr ) ? 1 : 0 ); + m_CABACEstimator->cbf_comp( cs, cbfMask>>1, currTU.blocks[ COMPONENT_Cb ], currTU.depth, false ); + m_CABACEstimator->cbf_comp( cs, cbfMask &1, currTU.blocks[ COMPONENT_Cr ], currTU.depth, cbfMask>>1 ); + if( cbfMask ) + m_CABACEstimator->joint_cb_cr( currTU, cbfMask ); + if( cbfMask >> 1 ) + m_CABACEstimator->residual_coding( currTU, COMPONENT_Cb ); + if( cbfMask & 1 ) + m_CABACEstimator->residual_coding( currTU, COMPONENT_Cr ); + } + else + { + if ( compID == COMPONENT_Cb ) + m_CABACEstimator->cbf_comp( cs, TU::getCbf( currTU, compID ), currTU.blocks[ compID ], currTU.depth, false ); + else + { + const bool cbCbf = TU::getCbf( currTU, COMPONENT_Cb ); + const bool crCbf = TU::getCbf( currTU, compID ); + const int cbfMask = ( cbCbf ? 2 : 0 ) + ( crCbf ? 1 : 0 ); + m_CABACEstimator->cbf_comp( cs, crCbf, currTU.blocks[ compID ], currTU.depth, cbCbf ); + m_CABACEstimator->joint_cb_cr( currTU, cbfMask ); + } + } + + if( !currTU.jointCbCr && TU::getCbf( currTU, compID ) ) + { + m_CABACEstimator->residual_coding( currTU, compID ); + } + + uint64_t fracBits = m_CABACEstimator->getEstFracBits(); + return fracBits; +} + +void IntraSearch::xIntraCodingTUBlock(TransformUnit &tu, const ComponentID &compID, const bool &checkCrossCPrediction, Distortion& ruiDist, const int &default0Save1Load2, uint32_t* numSig, std::vector<TrMode>* trModes, const bool loadTr) +{ + if (!tu.blocks[compID].valid()) + { + return; + } + + CodingStructure &cs = *tu.cs; + m_pcRdCost->setChromaFormat(cs.sps->getChromaFormatIdc()); + + const CompArea &area = tu.blocks[compID]; + const SPS &sps = *cs.sps; + const PPS &pps = *cs.pps; + + const ChannelType chType = toChannelType(compID); + const int bitDepth = sps.getBitDepth(chType); + + PelBuf piOrg = cs.getOrgBuf (area); + PelBuf piPred = cs.getPredBuf (area); + PelBuf piResi = cs.getResiBuf (area); + PelBuf piOrgResi = cs.getOrgResiBuf(area); + PelBuf piReco = cs.getRecoBuf (area); + + const PredictionUnit &pu = *cs.getPU(area.pos(), chType); + const uint32_t uiChFinalMode = PU::getFinalIntraMode(pu, chType); + + const bool bUseCrossCPrediction = pps.getPpsRangeExtension().getCrossComponentPredictionEnabledFlag() && isChroma( compID ) && PU::isChromaIntraModeCrossCheckMode( pu ) && checkCrossCPrediction; + const bool ccUseRecoResi = m_pcEncCfg->getUseReconBasedCrossCPredictionEstimate(); + + + //===== init availability pattern ===== + CHECK( tu.jointCbCr && compID == COMPONENT_Cr, "wrong combination of compID and jointCbCr" ); + bool jointCbCr = tu.jointCbCr && compID == COMPONENT_Cb; + + if (compID == COMPONENT_Y || (isChroma(compID) && tu.cu->bdpcmModeChroma)) + { + PelBuf sharedPredTS( m_pSharedPredTransformSkip[compID], area ); + if( default0Save1Load2 != 2 ) + { + bool predRegDiffFromTB = CU::isPredRegDiffFromTB(*tu.cu, compID); + bool firstTBInPredReg = CU::isFirstTBInPredReg(*tu.cu, compID, area); + CompArea areaPredReg(COMPONENT_Y, tu.chromaFormat, area); + if (tu.cu->ispMode && isLuma(compID)) + { + if (predRegDiffFromTB) + { + if (firstTBInPredReg) + { + CU::adjustPredArea(areaPredReg); + initIntraPatternChTypeISP(*tu.cu, areaPredReg, piReco); + } + } + else + initIntraPatternChTypeISP(*tu.cu, area, piReco); + } + else + { + initIntraPatternChType(*tu.cu, area); + } + + //===== get prediction signal ===== + if(compID != COMPONENT_Y && !tu.cu->bdpcmModeChroma && PU::isLMCMode(uiChFinalMode)) + { + { + xGetLumaRecPixels( pu, area ); + } + predIntraChromaLM( compID, piPred, pu, area, uiChFinalMode ); + } + else + { + if( PU::isMIP( pu, chType ) ) + { + initIntraMip( pu, area ); + predIntraMip( compID, piPred, pu ); + } + else + { + if (predRegDiffFromTB) + { + if (firstTBInPredReg) + { + PelBuf piPredReg = cs.getPredBuf(areaPredReg); + predIntraAng(compID, piPredReg, pu); + } + } + else + predIntraAng(compID, piPred, pu); + } + } + + + // save prediction + if( default0Save1Load2 == 1 ) + { + sharedPredTS.copyFrom( piPred ); + } + } + else + { + // load prediction + piPred.copyFrom( sharedPredTS ); + } + } + + + DTRACE( g_trace_ctx, D_PRED, "@(%4d,%4d) [%2dx%2d] IMode=%d\n", tu.lx(), tu.ly(), tu.lwidth(), tu.lheight(), uiChFinalMode ); + //DTRACE_PEL_BUF( D_PRED, piPred, tu, tu.cu->predMode, COMPONENT_Y ); + + const Slice &slice = *cs.slice; + bool flag = slice.getPicHeader()->getLmcsEnabledFlag() && (slice.isIntra() || (!slice.isIntra() && m_pcReshape->getCTUFlag())); + if (isLuma(compID)) + { + //===== get residual signal ===== + piResi.copyFrom( piOrg ); + if (slice.getPicHeader()->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag() && compID == COMPONENT_Y) + { + CompArea tmpArea(COMPONENT_Y, area.chromaFormat, Position(0, 0), area.size()); + PelBuf tmpPred = m_tmpStorageLCU.getBuf(tmpArea); + tmpPred.copyFrom(piPred); + piResi.rspSignal(m_pcReshape->getFwdLUT()); + piResi.subtract(tmpPred); + } + else + piResi.subtract( piPred ); + + if (pps.getPpsRangeExtension().getCrossComponentPredictionEnabledFlag() && isLuma(compID)) + { + piOrgResi.copyFrom (piResi); + } + + if (bUseCrossCPrediction) + { + if (xCalcCrossComponentPredictionAlpha(tu, compID, ccUseRecoResi) == 0) + { + return; + } + CrossComponentPrediction::crossComponentPrediction(tu, compID, cs.getResiBuf(tu.Y()), piResi, piResi, false); + } + } + + //===== transform and quantization ===== + //--- init rate estimation arrays for RDOQ --- + //--- transform and quantization --- + TCoeff uiAbsSum = 0; + + const QpParam cQP(tu, compID); + +#if RDOQ_CHROMA_LAMBDA + m_pcTrQuant->selectLambda(compID); +#endif + + flag =flag && (tu.blocks[compID].width*tu.blocks[compID].height > 4); + if (flag && isChroma(compID) && slice.getPicHeader()->getLmcsChromaResidualScaleFlag() ) + { + int cResScaleInv = tu.getChromaAdj(); + double cResScale = (double)(1 << CSCALE_FP_PREC) / (double)cResScaleInv; + m_pcTrQuant->setLambda(m_pcTrQuant->getLambda() / (cResScale*cResScale)); + } + + const CompArea &crArea = tu.blocks [ COMPONENT_Cr ]; + PelBuf crOrg = cs.getOrgBuf ( crArea ); + PelBuf crPred = cs.getPredBuf ( crArea ); + PelBuf crResi = cs.getResiBuf ( crArea ); + PelBuf crReco = cs.getRecoBuf ( crArea ); + + if ( jointCbCr ) + { + // Lambda is loosened for the joint mode with respect to single modes as the same residual is used for both chroma blocks + const int absIct = abs( TU::getICTMode(tu) ); + const double lfact = ( absIct == 1 || absIct == 3 ? 0.8 : 0.5 ); + m_pcTrQuant->setLambda( lfact * m_pcTrQuant->getLambda() ); + } + if ( sps.getJointCbCrEnabledFlag() && isChroma(compID) && (tu.cu->cs->slice->getSliceQp() > 18) ) + { + m_pcTrQuant->setLambda( 1.3 * m_pcTrQuant->getLambda() ); + } + + if( isLuma(compID) ) + { + if (trModes) + { + m_pcTrQuant->transformNxN(tu, compID, cQP, trModes, m_pcEncCfg->getMTSIntraMaxCand()); + tu.mtsIdx[compID] = trModes->at(0).first; + } +#if JVET_AHG14_LOSSLESS + if( !( m_pcEncCfg->getCostMode() == COST_LOSSLESS_CODING && tu.mtsIdx[compID] == 0 ) || tu.cu->bdpcmMode != 0 ) + { + m_pcTrQuant->transformNxN(tu, compID, cQP, uiAbsSum, m_CABACEstimator->getCtx(), loadTr); + } +#else + m_pcTrQuant->transformNxN(tu, compID, cQP, uiAbsSum, m_CABACEstimator->getCtx(), loadTr); +#endif + + + DTRACE( g_trace_ctx, D_TU_ABS_SUM, "%d: comp=%d, abssum=%d\n", DTRACE_GET_COUNTER( g_trace_ctx, D_TU_ABS_SUM ), compID, uiAbsSum ); + + if (tu.cu->ispMode && isLuma(compID) && CU::isISPLast(*tu.cu, area, area.compID) && CU::allLumaCBFsAreZero(*tu.cu)) + { + // ISP has to have at least one non-zero CBF + ruiDist = MAX_INT; + return; + } + +#if JVET_AHG14_LOSSLESS + if( ( m_pcEncCfg->getCostMode() == COST_LOSSLESS_CODING && tu.mtsIdx[compID] == 0 ) && 0 == tu.cu->bdpcmMode ) + { + uiAbsSum = 0; + tu.getCoeffs( compID ).fill( 0 ); + TU::setCbfAtDepth( tu, compID, tu.depth, 0 ); + } +#endif + + //--- inverse transform --- + if (uiAbsSum > 0) + { + m_pcTrQuant->invTransformNxN(tu, compID, piResi, cQP); + } + else + { + piResi.fill(0); + } + } + else // chroma + { + int codedCbfMask = 0; + ComponentID codeCompId = (tu.jointCbCr ? (tu.jointCbCr >> 1 ? COMPONENT_Cb : COMPONENT_Cr) : compID); + const QpParam qpCbCr(tu, codeCompId); + + if( tu.jointCbCr ) + { + ComponentID otherCompId = ( codeCompId==COMPONENT_Cr ? COMPONENT_Cb : COMPONENT_Cr ); + tu.getCoeffs( otherCompId ).fill(0); // do we need that? + TU::setCbfAtDepth (tu, otherCompId, tu.depth, false ); + } + PelBuf& codeResi = ( codeCompId == COMPONENT_Cr ? crResi : piResi ); + uiAbsSum = 0; + + if (trModes) + { + m_pcTrQuant->transformNxN(tu, compID, qpCbCr, trModes, m_pcEncCfg->getMTSIntraMaxCand()); + tu.mtsIdx[compID] = trModes->at(0).first; + } + // encoder bugfix: Set loadTr to aovid redundant transform process +#if JVET_AHG14_LOSSLESS + if (!(m_pcEncCfg->getCostMode() == COST_LOSSLESS_CODING && tu.mtsIdx[compID] == 0) || tu.cu->bdpcmModeChroma != 0) + { + m_pcTrQuant->transformNxN(tu, codeCompId, qpCbCr, uiAbsSum, m_CABACEstimator->getCtx(), loadTr); + } +#else + m_pcTrQuant->transformNxN(tu, codeCompId, qpCbCr, uiAbsSum, m_CABACEstimator->getCtx(), loadTr); +#endif + +#if JVET_AHG14_LOSSLESS + if ((m_pcEncCfg->getCostMode() == COST_LOSSLESS_CODING && tu.mtsIdx[compID] == 0) && 0 == tu.cu->bdpcmModeChroma) + { + uiAbsSum = 0; + tu.getCoeffs(compID).fill(0); + TU::setCbfAtDepth(tu, compID, tu.depth, 0); + } +#endif + + DTRACE( g_trace_ctx, D_TU_ABS_SUM, "%d: comp=%d, abssum=%d\n", DTRACE_GET_COUNTER( g_trace_ctx, D_TU_ABS_SUM ), codeCompId, uiAbsSum ); + if( uiAbsSum > 0 ) + { + m_pcTrQuant->invTransformNxN(tu, codeCompId, codeResi, qpCbCr); + codedCbfMask += ( codeCompId == COMPONENT_Cb ? 2 : 1 ); + } + else + { + codeResi.fill(0); + } + + if( tu.jointCbCr ) + { + if( tu.jointCbCr == 3 && codedCbfMask == 2 ) + { + codedCbfMask = 3; + TU::setCbfAtDepth (tu, COMPONENT_Cr, tu.depth, true ); + } + if( tu.jointCbCr != codedCbfMask ) + { + ruiDist = std::numeric_limits<Distortion>::max(); + return; + } + m_pcTrQuant->invTransformICT( tu, piResi, crResi ); + uiAbsSum = codedCbfMask; + } + } + + //===== reconstruction ===== + if ( flag && uiAbsSum > 0 && isChroma(compID) && slice.getPicHeader()->getLmcsChromaResidualScaleFlag() ) + { + piResi.scaleSignal(tu.getChromaAdj(), 0, tu.cu->cs->slice->clpRng(compID)); + if( jointCbCr ) + { + crResi.scaleSignal(tu.getChromaAdj(), 0, tu.cu->cs->slice->clpRng(COMPONENT_Cr)); + } } if (bUseCrossCPrediction) { - CrossComponentPrediction::crossComponentPrediction(tu, compID, cs.getResiBuf(tu.Y()), piResi, piResi, true); + CrossComponentPrediction::crossComponentPrediction(tu, compID, cs.getResiBuf(tu.Y()), piResi, piResi, true); + if( jointCbCr ) + { + CrossComponentPrediction::crossComponentPrediction(tu, COMPONENT_Cr, cs.getResiBuf(tu.Y()), crResi, crResi, true); + } + } + + if (slice.getPicHeader()->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag() && compID == COMPONENT_Y) + { + CompArea tmpArea(COMPONENT_Y, area.chromaFormat, Position(0,0), area.size()); + PelBuf tmpPred = m_tmpStorageLCU.getBuf(tmpArea); + tmpPred.copyFrom(piPred); + piReco.reconstruct(tmpPred, piResi, cs.slice->clpRng(compID)); + } + else + { + piReco.reconstruct(piPred, piResi, cs.slice->clpRng( compID )); + if( jointCbCr ) + { + crReco.reconstruct(crPred, crResi, cs.slice->clpRng( COMPONENT_Cr )); + } + } + + + //===== update distortion ===== +#if WCG_EXT + if (m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled() || (m_pcEncCfg->getLmcs() + && slice.getPicHeader()->getLmcsEnabledFlag() && (m_pcReshape->getCTUFlag() || (isChroma(compID) && m_pcEncCfg->getReshapeIntraCMD())))) + { + const CPelBuf orgLuma = cs.getOrgBuf( cs.area.blocks[COMPONENT_Y] ); + if (compID == COMPONENT_Y && !(m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled())) + { + CompArea tmpArea1(COMPONENT_Y, area.chromaFormat, Position(0, 0), area.size()); + PelBuf tmpRecLuma = m_tmpStorageLCU.getBuf(tmpArea1); + tmpRecLuma.copyFrom(piReco); + tmpRecLuma.rspSignal(m_pcReshape->getInvLUT()); + ruiDist += m_pcRdCost->getDistPart(piOrg, tmpRecLuma, sps.getBitDepth(toChannelType(compID)), compID, DF_SSE_WTD, &orgLuma); + } + else + { + ruiDist += m_pcRdCost->getDistPart(piOrg, piReco, bitDepth, compID, DF_SSE_WTD, &orgLuma); + if( jointCbCr ) + { + ruiDist += m_pcRdCost->getDistPart(crOrg, crReco, bitDepth, COMPONENT_Cr, DF_SSE_WTD, &orgLuma); + } + } + } + else +#endif + { + ruiDist += m_pcRdCost->getDistPart( piOrg, piReco, bitDepth, compID, DF_SSE ); + if( jointCbCr ) + { + ruiDist += m_pcRdCost->getDistPart( crOrg, crReco, bitDepth, COMPONENT_Cr, DF_SSE ); + } + } +} + +void IntraSearch::xIntraCodingACTTUBlock(TransformUnit &tu, const ComponentID &compID, Distortion& ruiDist, std::vector<TrMode>* trModes, const bool loadTr) +{ + if (!tu.blocks[compID].valid()) + { + CHECK(1, "tu does not exist"); + } + + CodingStructure &cs = *tu.cs; + const SPS &sps = *cs.sps; + const Slice &slice = *cs.slice; + const CompArea &area = tu.blocks[compID]; + const CompArea &crArea = tu.blocks[COMPONENT_Cr]; + + PelBuf piOrgResi = cs.getOrgResiBuf(area); + PelBuf piResi = cs.getResiBuf(area); + PelBuf crOrgResi = cs.getOrgResiBuf(crArea); + PelBuf crResi = cs.getResiBuf(crArea); + TCoeff uiAbsSum = 0; + + CHECK(tu.jointCbCr && compID == COMPONENT_Cr, "wrong combination of compID and jointCbCr"); + bool jointCbCr = tu.jointCbCr && compID == COMPONENT_Cb; + + m_pcRdCost->setChromaFormat(cs.sps->getChromaFormatIdc()); + + m_pcTrQuant->lambdaAdjustColorTrans(true); + + if (jointCbCr) + { + ComponentID compIdCode = (tu.jointCbCr >> 1 ? COMPONENT_Cb : COMPONENT_Cr); + m_pcTrQuant->selectLambda(compIdCode); + } + else + { + m_pcTrQuant->selectLambda(compID); + } + + bool flag = slice.getPicHeader()->getLmcsEnabledFlag() && (slice.isIntra() || (!slice.isIntra() && m_pcReshape->getCTUFlag())) && (tu.blocks[compID].width*tu.blocks[compID].height > 4); + if (flag && isChroma(compID) && slice.getPicHeader()->getLmcsChromaResidualScaleFlag()) + { + int cResScaleInv = tu.getChromaAdj(); + double cResScale = (double)(1 << CSCALE_FP_PREC) / (double)cResScaleInv; + m_pcTrQuant->setLambda(m_pcTrQuant->getLambda() / (cResScale*cResScale)); + } + + if (jointCbCr) + { + // Lambda is loosened for the joint mode with respect to single modes as the same residual is used for both chroma blocks + const int absIct = abs(TU::getICTMode(tu)); + const double lfact = (absIct == 1 || absIct == 3 ? 0.8 : 0.5); + m_pcTrQuant->setLambda(lfact * m_pcTrQuant->getLambda()); + } + if (sps.getJointCbCrEnabledFlag() && isChroma(compID) && (slice.getSliceQp() > 18)) + { + m_pcTrQuant->setLambda(1.3 * m_pcTrQuant->getLambda()); + } + + if (isLuma(compID)) + { + QpParam cQP(tu, compID); + for (int qpIdx = 0; qpIdx < 2; qpIdx++) + { + cQP.Qps[qpIdx] = cQP.Qps[qpIdx] + (compID == COMPONENT_Cr ? DELTA_QP_FOR_Co : DELTA_QP_FOR_Y_Cg); + cQP.pers[qpIdx] = cQP.Qps[qpIdx] / 6; + cQP.rems[qpIdx] = cQP.Qps[qpIdx] % 6; + } + + if (trModes) + { + m_pcTrQuant->transformNxN(tu, compID, cQP, trModes, m_pcEncCfg->getMTSIntraMaxCand()); + tu.mtsIdx[compID] = trModes->at(0).first; + } + m_pcTrQuant->transformNxN(tu, compID, cQP, uiAbsSum, m_CABACEstimator->getCtx(), loadTr); + + if (uiAbsSum > 0) + { + m_pcTrQuant->invTransformNxN(tu, compID, piResi, cQP); + } + else + { + piResi.fill(0); + } + } + else + { + int codedCbfMask = 0; + ComponentID codeCompId = (tu.jointCbCr ? (tu.jointCbCr >> 1 ? COMPONENT_Cb : COMPONENT_Cr) : compID); + QpParam qpCbCr(tu, codeCompId); + for (int qpIdx = 0; qpIdx < 2; qpIdx++) + { + qpCbCr.Qps[qpIdx] = qpCbCr.Qps[qpIdx] + (codeCompId == COMPONENT_Cr ? DELTA_QP_FOR_Co : DELTA_QP_FOR_Y_Cg); + qpCbCr.pers[qpIdx] = qpCbCr.Qps[qpIdx] / 6; + qpCbCr.rems[qpIdx] = qpCbCr.Qps[qpIdx] % 6; + } + + if (tu.jointCbCr) + { + ComponentID otherCompId = (codeCompId == COMPONENT_Cr ? COMPONENT_Cb : COMPONENT_Cr); + tu.getCoeffs(otherCompId).fill(0); + TU::setCbfAtDepth(tu, otherCompId, tu.depth, false); + } + + PelBuf& codeResi = (codeCompId == COMPONENT_Cr ? crResi : piResi); + uiAbsSum = 0; + m_pcTrQuant->transformNxN(tu, codeCompId, qpCbCr, uiAbsSum, m_CABACEstimator->getCtx()); + if (uiAbsSum > 0) + { + m_pcTrQuant->invTransformNxN(tu, codeCompId, codeResi, qpCbCr); + codedCbfMask += (codeCompId == COMPONENT_Cb ? 2 : 1); + } + else + { + codeResi.fill(0); + } + + if (tu.jointCbCr) + { + if (tu.jointCbCr == 3 && codedCbfMask == 2) + { + codedCbfMask = 3; + TU::setCbfAtDepth(tu, COMPONENT_Cr, tu.depth, true); + } + if (tu.jointCbCr != codedCbfMask) + { + ruiDist = std::numeric_limits<Distortion>::max(); + m_pcTrQuant->lambdaAdjustColorTrans(false); + return; + } + m_pcTrQuant->invTransformICT(tu, piResi, crResi); + uiAbsSum = codedCbfMask; + } + } + + if (flag && uiAbsSum > 0 && isChroma(compID) && slice.getPicHeader()->getLmcsChromaResidualScaleFlag()) + { + piResi.scaleSignal(tu.getChromaAdj(), 0, slice.clpRng(compID)); + if (jointCbCr) + { + crResi.scaleSignal(tu.getChromaAdj(), 0, slice.clpRng(COMPONENT_Cr)); + } + } + + m_pcTrQuant->lambdaAdjustColorTrans(false); + + ruiDist += m_pcRdCost->getDistPart(piOrgResi, piResi, sps.getBitDepth(toChannelType(compID)), compID, DF_SSE); + if (jointCbCr) + { + ruiDist += m_pcRdCost->getDistPart(crOrgResi, crResi, sps.getBitDepth(toChannelType(COMPONENT_Cr)), COMPONENT_Cr, DF_SSE); + } +} + +bool IntraSearch::xIntraCodingLumaISP(CodingStructure& cs, Partitioner& partitioner, const double bestCostSoFar) +{ + int subTuCounter = 0; + const CodingUnit& cu = *cs.getCU(partitioner.currArea().lumaPos(), partitioner.chType); + bool earlySkipISP = false; + bool splitCbfLuma = false; + const PartSplit ispType = CU::getISPType(cu, COMPONENT_Y); + + cs.cost = 0; + + partitioner.splitCurrArea(ispType, cs); + + CUCtx cuCtx; + cuCtx.isDQPCoded = true; + cuCtx.isChromaQpAdjCoded = true; + + do // subpartitions loop + { + uint32_t numSig = 0; + Distortion singleDistTmpLuma = 0; + uint64_t singleTmpFracBits = 0; + double singleCostTmp = 0; + + TransformUnit& tu = cs.addTU(CS::getArea(cs, partitioner.currArea(), partitioner.chType), partitioner.chType); + tu.depth = partitioner.currTrDepth; + + // Encode TU + xIntraCodingTUBlock(tu, COMPONENT_Y, false, singleDistTmpLuma, 0, &numSig); + + if (singleDistTmpLuma == MAX_INT) // all zero CBF skip + { + earlySkipISP = true; + partitioner.exitCurrSplit(); + cs.cost = MAX_DOUBLE; + return false; + } + + { + if (m_pcRdCost->calcRdCost(cs.fracBits, cs.dist + singleDistTmpLuma) > bestCostSoFar) + { + // The accumulated cost + distortion is already larger than the best cost so far, so it is not necessary to calculate the rate + earlySkipISP = true; + } + else + { + singleTmpFracBits = xGetIntraFracBitsQT(cs, partitioner, true, false, subTuCounter, ispType, &cuCtx); + } + singleCostTmp = m_pcRdCost->calcRdCost(singleTmpFracBits, singleDistTmpLuma); + } + + cs.cost += singleCostTmp; + cs.dist += singleDistTmpLuma; + cs.fracBits += singleTmpFracBits; + + subTuCounter++; + + splitCbfLuma |= TU::getCbfAtDepth(*cs.getTU(partitioner.currArea().lumaPos(), partitioner.chType, subTuCounter - 1), COMPONENT_Y, partitioner.currTrDepth); + int nSubPartitions = m_ispTestedModes[cu.lfnstIdx].numTotalParts[cu.ispMode - 1]; + if (subTuCounter < nSubPartitions) + { + // exit condition if the accumulated cost is already larger than the best cost so far (no impact in RD performance) + if (cs.cost > bestCostSoFar) + { + earlySkipISP = true; + break; + } + else if (subTuCounter < nSubPartitions) + { + // more restrictive exit condition + double threshold = nSubPartitions == 2 ? 0.95 : subTuCounter == 1 ? 0.83 : 0.91; + if (subTuCounter < nSubPartitions && cs.cost > bestCostSoFar * threshold) + { + earlySkipISP = true; + break; + } + } + } + } while (partitioner.nextPart(cs)); // subpartitions loop + + partitioner.exitCurrSplit(); + const UnitArea& currArea = partitioner.currArea(); + const uint32_t currDepth = partitioner.currTrDepth; + + if (earlySkipISP) + { + cs.cost = MAX_DOUBLE; + } + else + { + cs.cost = m_pcRdCost->calcRdCost(cs.fracBits, cs.dist); + // The cost check is necessary here again to avoid superfluous operations if the maximum number of coded subpartitions was reached and yet ISP did not win + if (cs.cost < bestCostSoFar) + { + cs.setDecomp(cu.Y()); + cs.picture->getRecoBuf(currArea.Y()).copyFrom(cs.getRecoBuf(currArea.Y())); + + for (auto& ptu : cs.tus) + { + if (currArea.Y().contains(ptu->Y())) + { + TU::setCbfAtDepth(*ptu, COMPONENT_Y, currDepth, splitCbfLuma ? 1 : 0); + } + } + } + else + { + earlySkipISP = true; + } + } + return !earlySkipISP; +} + + +bool IntraSearch::xRecurIntraCodingLumaQT( CodingStructure &cs, Partitioner &partitioner, const double bestCostSoFar, const int subTuIdx, const PartSplit ispType, const bool ispIsCurrentWinner, bool mtsCheckRangeFlag, int mtsFirstCheckId, int mtsLastCheckId, bool moreProbMTSIdxFirst ) +{ + int subTuCounter = subTuIdx; + const UnitArea &currArea = partitioner.currArea(); + const CodingUnit &cu = *cs.getCU( currArea.lumaPos(), partitioner.chType ); + bool earlySkipISP = false; + uint32_t currDepth = partitioner.currTrDepth; + const SPS &sps = *cs.sps; + const PPS &pps = *cs.pps; + const bool keepResi = pps.getPpsRangeExtension().getCrossComponentPredictionEnabledFlag() || KEEP_PRED_AND_RESI_SIGNALS; + bool bCheckFull = true; + bool bCheckSplit = false; + bCheckFull = !partitioner.canSplit( TU_MAX_TR_SPLIT, cs ); + bCheckSplit = partitioner.canSplit( TU_MAX_TR_SPLIT, cs ); + + if( cu.ispMode ) + { + bCheckSplit = partitioner.canSplit( ispType, cs ); + bCheckFull = !bCheckSplit; + } + uint32_t numSig = 0; + + double dSingleCost = MAX_DOUBLE; + Distortion uiSingleDistLuma = 0; + uint64_t singleFracBits = 0; + bool checkTransformSkip = sps.getTransformSkipEnabledFlag(); + int bestModeId[ MAX_NUM_COMPONENT ] = { 0, 0, 0 }; + uint8_t nNumTransformCands = cu.mtsFlag ? 4 : 1; + uint8_t numTransformIndexCands = nNumTransformCands; + + const TempCtx ctxStart ( m_CtxCache, m_CABACEstimator->getCtx() ); + TempCtx ctxBest ( m_CtxCache ); + + CodingStructure *csSplit = nullptr; + CodingStructure *csFull = nullptr; + + CUCtx cuCtx; + cuCtx.isDQPCoded = true; + cuCtx.isChromaQpAdjCoded = true; + + if( bCheckSplit ) + { + csSplit = &cs; + } + else if( bCheckFull ) + { + csFull = &cs; + } + + bool validReturnFull = false; + + if( bCheckFull ) + { + csFull->cost = 0.0; + + TransformUnit &tu = csFull->addTU( CS::getArea( *csFull, currArea, partitioner.chType ), partitioner.chType ); + tu.depth = currDepth; + + const bool tsAllowed = TU::isTSAllowed( tu, COMPONENT_Y ); + const bool mtsAllowed = CU::isMTSAllowed( cu, COMPONENT_Y ); + std::vector<TrMode> trModes; + + if( sps.getUseLFNST() ) + { + checkTransformSkip &= tsAllowed; + checkTransformSkip &= !cu.mtsFlag; + checkTransformSkip &= !cu.lfnstIdx; + + if( !cu.mtsFlag && checkTransformSkip ) + { + trModes.push_back( TrMode( 0, true ) ); //DCT2 + trModes.push_back( TrMode( 1, true ) ); //TS + } + } + else + { + nNumTransformCands = 1 + ( tsAllowed ? 1 : 0 ) + ( mtsAllowed ? 4 : 0 ); // DCT + TS + 4 MTS = 6 tests + + trModes.push_back( TrMode( 0, true ) ); //DCT2 + if( tsAllowed ) + { + trModes.push_back( TrMode( 1, true ) ); + } + if( mtsAllowed ) + { + for( int i = 2; i < 6; i++ ) + { + trModes.push_back( TrMode( i, true ) ); + } + } + } + + CHECK( !tu.Y().valid(), "Invalid TU" ); + + CodingStructure &saveCS = *m_pSaveCS[0]; + + TransformUnit *tmpTU = nullptr; + + Distortion singleDistTmpLuma = 0; + uint64_t singleTmpFracBits = 0; + double singleCostTmp = 0; + int firstCheckId = ( sps.getUseLFNST() && mtsCheckRangeFlag && cu.mtsFlag ) ? mtsFirstCheckId : 0; + + //we add the MTS candidates to the loop. TransformSkip will still be the last one to be checked (when modeId == lastCheckId) as long as checkTransformSkip is true + int lastCheckId = sps.getUseLFNST() ? ( ( mtsCheckRangeFlag && cu.mtsFlag ) ? ( mtsLastCheckId + ( int ) checkTransformSkip ) : ( numTransformIndexCands - ( firstCheckId + 1 ) + ( int ) checkTransformSkip ) ) : + trModes[ nNumTransformCands - 1 ].first; + bool isNotOnlyOneMode = sps.getUseLFNST() ? lastCheckId != firstCheckId : nNumTransformCands != 1; + + if( isNotOnlyOneMode ) + { + saveCS.pcv = cs.pcv; + saveCS.picture = cs.picture; + saveCS.area.repositionTo(cs.area); + saveCS.clearTUs(); + tmpTU = &saveCS.addTU(currArea, partitioner.chType); + } + + bool cbfBestMode = false; + bool cbfBestModeValid = false; + bool cbfDCT2 = true; + + double bestDCT2cost = MAX_DOUBLE; + double threshold = m_pcEncCfg->getUseFastISP() && !cu.ispMode && ispIsCurrentWinner && nNumTransformCands > 1 ? 1 + 1.4 / sqrt( cu.lwidth() * cu.lheight() ) : 1; + for( int modeId = firstCheckId; modeId <= ( sps.getUseLFNST() ? lastCheckId : ( nNumTransformCands - 1 ) ); modeId++ ) + { + uint8_t transformIndex = modeId; + + if( sps.getUseLFNST() ) + { + if( ( transformIndex < lastCheckId ) || ( ( transformIndex == lastCheckId ) && !checkTransformSkip ) ) //we avoid this if the mode is transformSkip + { + // Skip checking other transform candidates if zero CBF is encountered and it is the best transform so far + if( m_pcEncCfg->getUseFastLFNST() && transformIndex && !cbfBestMode && cbfBestModeValid ) + { + continue; + } + } + } + else + { +#if JVET_AHG14_LOSSLESS + if( !( m_pcEncCfg->getCostMode() == COST_LOSSLESS_CODING ) ) + { +#endif + if( !cbfDCT2 || ( m_pcEncCfg->getUseTransformSkipFast() && bestModeId[ COMPONENT_Y ] == MTS_SKIP)) + { + break; + } + if( !trModes[ modeId ].second ) + { + continue; + } + //we compare the DCT-II cost against the best ISP cost so far (except for TS) + if (m_pcEncCfg->getUseFastISP() && !cu.ispMode && ispIsCurrentWinner && trModes[modeId].first != MTS_DCT2_DCT2 && (trModes[modeId].first != MTS_SKIP || !tsAllowed) && bestDCT2cost > bestCostSoFar * threshold) + { + continue; + } +#if JVET_AHG14_LOSSLESS + } +#endif + tu.mtsIdx[COMPONENT_Y] = trModes[modeId].first; + } + + + if ((modeId != firstCheckId) && isNotOnlyOneMode) + { + m_CABACEstimator->getCtx() = ctxStart; + } + + int default0Save1Load2 = 0; + singleDistTmpLuma = 0; + + if( modeId == firstCheckId && ( sps.getUseLFNST() ? ( modeId != lastCheckId ) : ( nNumTransformCands > 1 ) ) ) + { + default0Save1Load2 = 1; + } + else if (modeId != firstCheckId) + { + if( sps.getUseLFNST() && !cbfBestModeValid ) + { + default0Save1Load2 = 1; + } + else + { + default0Save1Load2 = 2; + } + } + if( cu.ispMode ) + { + default0Save1Load2 = 0; + } + if( sps.getUseLFNST() ) + { + if( cu.mtsFlag ) + { + if( moreProbMTSIdxFirst ) + { + const ChannelType chType = toChannelType( COMPONENT_Y ); + const CompArea& area = tu.blocks[ COMPONENT_Y ]; + const PredictionUnit& pu = *cs.getPU( area.pos(), chType ); + uint32_t uiIntraMode = pu.intraDir[ chType ]; + + if( transformIndex == 1 ) + { + tu.mtsIdx[COMPONENT_Y] = (uiIntraMode < 34) ? MTS_DST7_DCT8 : MTS_DCT8_DST7; + } + else if( transformIndex == 2 ) + { + tu.mtsIdx[COMPONENT_Y] = (uiIntraMode < 34) ? MTS_DCT8_DST7 : MTS_DST7_DCT8; + } + else + { + tu.mtsIdx[COMPONENT_Y] = MTS_DST7_DST7 + transformIndex; + } + } + else + { + tu.mtsIdx[COMPONENT_Y] = MTS_DST7_DST7 + transformIndex; + } + } + else + { + tu.mtsIdx[COMPONENT_Y] = transformIndex; + } + + if( !cu.mtsFlag && checkTransformSkip ) + { + xIntraCodingTUBlock( tu, COMPONENT_Y, false, singleDistTmpLuma, default0Save1Load2, &numSig, modeId == 0 ? &trModes : nullptr, true ); + if( modeId == 0 ) + { + for( int i = 0; i < 2; i++ ) + { + if( trModes[ i ].second ) + { + lastCheckId = trModes[ i ].first; + } + } + } + } + else + { + xIntraCodingTUBlock( tu, COMPONENT_Y, false, singleDistTmpLuma, default0Save1Load2, &numSig ); + } + } + else + { + if( nNumTransformCands > 1 ) + { + xIntraCodingTUBlock( tu, COMPONENT_Y, false, singleDistTmpLuma, default0Save1Load2, &numSig, modeId == 0 ? &trModes : nullptr, true ); + if( modeId == 0 ) + { + for( int i = 0; i < nNumTransformCands; i++ ) + { + if( trModes[ i ].second ) + { + lastCheckId = trModes[ i ].first; + } + } + } + } + else + { + xIntraCodingTUBlock( tu, COMPONENT_Y, false, singleDistTmpLuma, default0Save1Load2, &numSig ); + } + } + + //----- determine rate and r-d cost ----- + if( ( sps.getUseLFNST() ? ( modeId == lastCheckId && modeId != 0 && checkTransformSkip ) : ( trModes[ modeId ].first != 0 ) ) && !TU::getCbfAtDepth( tu, COMPONENT_Y, currDepth ) ) + { + //In order not to code TS flag when cbf is zero, the case for TS with cbf being zero is forbidden. + singleCostTmp = MAX_DOUBLE; + } + else + { + if( cu.ispMode && m_pcRdCost->calcRdCost( csFull->fracBits, csFull->dist + singleDistTmpLuma ) > bestCostSoFar ) + { + earlySkipISP = true; + } + else + { + singleTmpFracBits = xGetIntraFracBitsQT( *csFull, partitioner, true, false, subTuCounter, ispType, &cuCtx ); + } + singleCostTmp = m_pcRdCost->calcRdCost( singleTmpFracBits, singleDistTmpLuma ); + } + + if ( !cu.ispMode && nNumTransformCands > 1 && modeId == firstCheckId ) + { + bestDCT2cost = singleCostTmp; + } + + if (singleCostTmp < dSingleCost) + { + dSingleCost = singleCostTmp; + uiSingleDistLuma = singleDistTmpLuma; + singleFracBits = singleTmpFracBits; + + if( sps.getUseLFNST() ) + { + bestModeId[ COMPONENT_Y ] = modeId; + cbfBestMode = TU::getCbfAtDepth( tu, COMPONENT_Y, currDepth ); + cbfBestModeValid = true; + validReturnFull = true; + } + else + { + bestModeId[ COMPONENT_Y ] = trModes[ modeId ].first; + if( trModes[ modeId ].first == 0 ) + { + cbfDCT2 = TU::getCbfAtDepth( tu, COMPONENT_Y, currDepth ); + } + } + + if( bestModeId[COMPONENT_Y] != lastCheckId ) + { + saveCS.getPredBuf( tu.Y() ).copyFrom( csFull->getPredBuf( tu.Y() ) ); + saveCS.getRecoBuf( tu.Y() ).copyFrom( csFull->getRecoBuf( tu.Y() ) ); + + if( keepResi ) + { + saveCS.getResiBuf ( tu.Y() ).copyFrom( csFull->getResiBuf ( tu.Y() ) ); + saveCS.getOrgResiBuf( tu.Y() ).copyFrom( csFull->getOrgResiBuf( tu.Y() ) ); + } + + tmpTU->copyComponentFrom( tu, COMPONENT_Y ); + + ctxBest = m_CABACEstimator->getCtx(); + } + } + } + + if( sps.getUseLFNST() && !validReturnFull ) + { + csFull->cost = MAX_DOUBLE; + + if( bCheckSplit ) + { + ctxBest = m_CABACEstimator->getCtx(); + } + } + else + { + if( bestModeId[COMPONENT_Y] != lastCheckId ) + { + csFull->getPredBuf( tu.Y() ).copyFrom( saveCS.getPredBuf( tu.Y() ) ); + csFull->getRecoBuf( tu.Y() ).copyFrom( saveCS.getRecoBuf( tu.Y() ) ); + + if( keepResi ) + { + csFull->getResiBuf ( tu.Y() ).copyFrom( saveCS.getResiBuf ( tu.Y() ) ); + csFull->getOrgResiBuf( tu.Y() ).copyFrom( saveCS.getOrgResiBuf( tu.Y() ) ); + } + + tu.copyComponentFrom( *tmpTU, COMPONENT_Y ); + + if( !bCheckSplit ) + { + m_CABACEstimator->getCtx() = ctxBest; + } + } + else if( bCheckSplit ) + { + ctxBest = m_CABACEstimator->getCtx(); + } + + csFull->cost += dSingleCost; + csFull->dist += uiSingleDistLuma; + csFull->fracBits += singleFracBits; + } + } + + bool validReturnSplit = false; + if( bCheckSplit ) + { + //----- store full entropy coding status, load original entropy coding status ----- + if( bCheckFull ) + { + m_CABACEstimator->getCtx() = ctxStart; + } + //----- code splitted block ----- + csSplit->cost = 0; + + bool uiSplitCbfLuma = false; + bool splitIsSelected = true; + if( partitioner.canSplit( TU_MAX_TR_SPLIT, cs ) ) + { + partitioner.splitCurrArea( TU_MAX_TR_SPLIT, cs ); + } + + if( cu.ispMode ) + { + partitioner.splitCurrArea( ispType, *csSplit ); + } + do + { + bool tmpValidReturnSplit = xRecurIntraCodingLumaQT( *csSplit, partitioner, bestCostSoFar, subTuCounter, ispType, false, mtsCheckRangeFlag, mtsFirstCheckId, mtsLastCheckId ); + subTuCounter += subTuCounter != -1 ? 1 : 0; + if( sps.getUseLFNST() && !tmpValidReturnSplit ) + { + splitIsSelected = false; + break; + } + + if( !cu.ispMode ) + { + csSplit->setDecomp( partitioner.currArea().Y() ); + } + else if( CU::isISPFirst( cu, partitioner.currArea().Y(), COMPONENT_Y ) ) + { + csSplit->setDecomp( cu.Y() ); + } + + uiSplitCbfLuma |= TU::getCbfAtDepth( *csSplit->getTU( partitioner.currArea().lumaPos(), partitioner.chType, subTuCounter - 1 ), COMPONENT_Y, partitioner.currTrDepth ); + if( cu.ispMode ) + { + //exit condition if the accumulated cost is already larger than the best cost so far (no impact in RD performance) + if( csSplit->cost > bestCostSoFar ) + { + earlySkipISP = true; + splitIsSelected = false; + break; + } + else + { + //more restrictive exit condition + bool tuIsDividedInRows = CU::divideTuInRows( cu ); + int nSubPartitions = tuIsDividedInRows ? cu.lheight() >> floorLog2(cu.firstTU->lheight()) : cu.lwidth() >> floorLog2(cu.firstTU->lwidth()); + double threshold = nSubPartitions == 2 ? 0.95 : subTuCounter == 1 ? 0.83 : 0.91; + if( subTuCounter < nSubPartitions && csSplit->cost > bestCostSoFar*threshold ) + { + earlySkipISP = true; + splitIsSelected = false; + break; + } + } + } + + + + } while( partitioner.nextPart( *csSplit ) ); + + partitioner.exitCurrSplit(); + + if( splitIsSelected ) + { + for( auto &ptu : csSplit->tus ) + { + if( currArea.Y().contains( ptu->Y() ) ) + { + TU::setCbfAtDepth( *ptu, COMPONENT_Y, currDepth, uiSplitCbfLuma ? 1 : 0 ); + } + } + + //----- restore context states ----- + m_CABACEstimator->getCtx() = ctxStart; + + cuCtx.violatesLfnstConstrained[CHANNEL_TYPE_LUMA] = false; + cuCtx.violatesLfnstConstrained[CHANNEL_TYPE_CHROMA] = false; + cuCtx.lfnstLastScanPos = false; + cuCtx.violatesMtsCoeffConstraint = false; + + //----- determine rate and r-d cost ----- + csSplit->fracBits = xGetIntraFracBitsQT( *csSplit, partitioner, true, false, cu.ispMode ? 0 : -1, ispType, &cuCtx ); + + //--- update cost --- + csSplit->cost = m_pcRdCost->calcRdCost(csSplit->fracBits, csSplit->dist); + + validReturnSplit = true; + } + } + + bool retVal = false; + if( csFull || csSplit ) + { + if( !sps.getUseLFNST() || validReturnFull || validReturnSplit ) + { + { + // otherwise this would've happened in useSubStructure + cs.picture->getRecoBuf( currArea.Y() ).copyFrom( cs.getRecoBuf( currArea.Y() ) ); + cs.picture->getPredBuf( currArea.Y() ).copyFrom( cs.getPredBuf( currArea.Y() ) ); + } + + if( cu.ispMode && earlySkipISP ) + { + cs.cost = MAX_DOUBLE; + } + else + { + cs.cost = m_pcRdCost->calcRdCost( cs.fracBits, cs.dist ); + retVal = true; + } + } + } + return retVal; +} + +bool IntraSearch::xRecurIntraCodingACTQT(CodingStructure &cs, Partitioner &partitioner, bool mtsCheckRangeFlag, int mtsFirstCheckId, int mtsLastCheckId, bool moreProbMTSIdxFirst) +{ + const UnitArea &currArea = partitioner.currArea(); + uint32_t currDepth = partitioner.currTrDepth; + const Slice &slice = *cs.slice; + const SPS &sps = *cs.sps; + + bool bCheckFull = !partitioner.canSplit(TU_MAX_TR_SPLIT, cs); + bool bCheckSplit = !bCheckFull; + + TempCtx ctxStart(m_CtxCache, m_CABACEstimator->getCtx()); + TempCtx ctxBest(m_CtxCache); + + CodingStructure *csSplit = nullptr; + CodingStructure *csFull = nullptr; + if (bCheckSplit) + { + csSplit = &cs; + } + else if (bCheckFull) + { + csFull = &cs; + } + + bool validReturnFull = false; + + if (bCheckFull) + { + TransformUnit &tu = csFull->addTU(CS::getArea(*csFull, currArea, partitioner.chType), partitioner.chType); + tu.depth = currDepth; + const CodingUnit &cu = *csFull->getCU(tu.Y().pos(), CHANNEL_TYPE_LUMA); + const PredictionUnit &pu = *csFull->getPU(tu.Y().pos(), CHANNEL_TYPE_LUMA); + CHECK(!tu.Y().valid() || !tu.Cb().valid() || !tu.Cr().valid(), "Invalid TU"); + CHECK(tu.cu != &cu, "wrong CU fetch"); + CHECK(cu.ispMode, "adaptive color transform cannot be applied to ISP"); + CHECK(pu.intraDir[CHANNEL_TYPE_CHROMA] != DM_CHROMA_IDX, "chroma should use DM mode for adaptive color transform"); + + // 1. intra prediction and forward color transform + + PelUnitBuf orgBuf = csFull->getOrgBuf(tu); + PelUnitBuf predBuf = csFull->getPredBuf(tu); + PelUnitBuf resiBuf = csFull->getResiBuf(tu); + PelUnitBuf orgResiBuf = csFull->getOrgResiBuf(tu); + + for (int i = 0; i < getNumberValidComponents(tu.chromaFormat); i++) + { + ComponentID compID = (ComponentID)i; + const CompArea &area = tu.blocks[compID]; + const ChannelType chType = toChannelType(compID); + + PelBuf piOrg = orgBuf.bufs[compID]; + PelBuf piPred = predBuf.bufs[compID]; + PelBuf piResi = resiBuf.bufs[compID]; + + initIntraPatternChType(*tu.cu, area); + if (PU::isMIP(pu, chType)) + { + initIntraMip(pu, area); + predIntraMip(compID, piPred, pu); + } + else + { + predIntraAng(compID, piPred, pu); + } + + piResi.copyFrom(piOrg); + if (slice.getPicHeader()->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag() && compID == COMPONENT_Y) + { + CompArea tmpArea(COMPONENT_Y, area.chromaFormat, Position(0, 0), area.size()); + PelBuf tmpPred = m_tmpStorageLCU.getBuf(tmpArea); + tmpPred.copyFrom(piPred); + piResi.rspSignal(m_pcReshape->getFwdLUT()); + piResi.subtract(tmpPred); + } + else + piResi.subtract(piPred); + } + + resiBuf.colorSpaceConvert(orgResiBuf, true); + + // 2. luma residual optimization + double dSingleCostLuma = MAX_DOUBLE; + bool checkTransformSkip = sps.getTransformSkipEnabledFlag(); + int bestLumaModeId = 0; + uint8_t nNumTransformCands = cu.mtsFlag ? 4 : 1; + uint8_t numTransformIndexCands = nNumTransformCands; + + const bool tsAllowed = TU::isTSAllowed(tu, COMPONENT_Y); + const bool mtsAllowed = CU::isMTSAllowed(cu, COMPONENT_Y); + std::vector<TrMode> trModes; + + if (sps.getUseLFNST()) + { + checkTransformSkip &= tsAllowed; + checkTransformSkip &= !cu.mtsFlag; + checkTransformSkip &= !cu.lfnstIdx; + + if (!cu.mtsFlag && checkTransformSkip) + { + trModes.push_back(TrMode(0, true)); //DCT2 + trModes.push_back(TrMode(1, true)); //TS + } + } + else + { + nNumTransformCands = 1 + (tsAllowed ? 1 : 0) + (mtsAllowed ? 4 : 0); // DCT + TS + 4 MTS = 6 tests + + trModes.push_back(TrMode(0, true)); //DCT2 + if (tsAllowed) + { + trModes.push_back(TrMode(1, true)); + } + if (mtsAllowed) + { + for (int i = 2; i < 6; i++) + { + trModes.push_back(TrMode(i, true)); + } + } + } + + CodingStructure &saveLumaCS = *m_pSaveCS[0]; + TransformUnit *tmpTU = nullptr; + Distortion singleDistTmpLuma = 0; + uint64_t singleTmpFracBits = 0; + double singleCostTmp = 0; + int firstCheckId = (sps.getUseLFNST() && mtsCheckRangeFlag && cu.mtsFlag) ? mtsFirstCheckId : 0; + int lastCheckId = sps.getUseLFNST() ? ((mtsCheckRangeFlag && cu.mtsFlag) ? (mtsLastCheckId + (int)checkTransformSkip) : (numTransformIndexCands - (firstCheckId + 1) + (int)checkTransformSkip)) : trModes[nNumTransformCands - 1].first; + bool isNotOnlyOneMode = sps.getUseLFNST() ? lastCheckId != firstCheckId : nNumTransformCands != 1; + + if (isNotOnlyOneMode) + { + saveLumaCS.pcv = csFull->pcv; + saveLumaCS.picture = csFull->picture; + saveLumaCS.area.repositionTo(csFull->area); + saveLumaCS.clearTUs(); + tmpTU = &saveLumaCS.addTU(currArea, partitioner.chType); + } + + bool cbfBestMode = false; + bool cbfBestModeValid = false; + bool cbfDCT2 = true; + + m_pcRdCost->lambdaAdjustColorTrans(true, COMPONENT_Y); + + for (int modeId = firstCheckId; modeId <= lastCheckId; modeId++) + { + uint8_t transformIndex = modeId; + csFull->getResiBuf(tu.Y()).copyFrom(csFull->getOrgResiBuf(tu.Y())); + + m_CABACEstimator->getCtx() = ctxStart; + m_CABACEstimator->resetBits(); + + if (sps.getUseLFNST()) + { + if ((transformIndex < lastCheckId) || ((transformIndex == lastCheckId) && !checkTransformSkip)) //we avoid this if the mode is transformSkip + { + // Skip checking other transform candidates if zero CBF is encountered and it is the best transform so far + if (m_pcEncCfg->getUseFastLFNST() && transformIndex && !cbfBestMode && cbfBestModeValid) + { + continue; + } + } + } + else + { +#if JVET_AHG14_LOSSLESS + if (!(m_pcEncCfg->getCostMode() == COST_LOSSLESS_CODING)) + { +#endif + if (!cbfDCT2 || (m_pcEncCfg->getUseTransformSkipFast() && bestLumaModeId == 1)) + { + break; + } + if (!trModes[modeId].second) + { + continue; + } +#if JVET_AHG14_LOSSLESS + } +#endif + tu.mtsIdx[COMPONENT_Y] = trModes[modeId].first; + } + + singleDistTmpLuma = 0; + if (sps.getUseLFNST()) + { + if (cu.mtsFlag) + { + if (moreProbMTSIdxFirst) + { + uint32_t uiIntraMode = pu.intraDir[CHANNEL_TYPE_LUMA]; + + if (transformIndex == 1) + { + tu.mtsIdx[COMPONENT_Y] = (uiIntraMode < 34) ? MTS_DST7_DCT8 : MTS_DCT8_DST7; + } + else if (transformIndex == 2) + { + tu.mtsIdx[COMPONENT_Y] = (uiIntraMode < 34) ? MTS_DCT8_DST7 : MTS_DST7_DCT8; + } + else + { + tu.mtsIdx[COMPONENT_Y] = MTS_DST7_DST7 + transformIndex; + } + } + else + { + tu.mtsIdx[COMPONENT_Y] = MTS_DST7_DST7 + transformIndex; + } + } + else + { + tu.mtsIdx[COMPONENT_Y] = transformIndex; + } + + if (!cu.mtsFlag && checkTransformSkip) + { + xIntraCodingACTTUBlock(tu, COMPONENT_Y, singleDistTmpLuma, modeId == 0 ? &trModes : nullptr, true); + if (modeId == 0) + { + for (int i = 0; i < 2; i++) + { + if (trModes[i].second) + { + lastCheckId = trModes[i].first; + } + } + } + } + else + { + xIntraCodingACTTUBlock(tu, COMPONENT_Y, singleDistTmpLuma); + } + } + else + { + if (nNumTransformCands > 1) + { + xIntraCodingACTTUBlock(tu, COMPONENT_Y, singleDistTmpLuma, modeId == 0 ? &trModes : nullptr, true); + if (modeId == 0) + { + for (int i = 0; i < nNumTransformCands; i++) + { + if (trModes[i].second) + { + lastCheckId = trModes[i].first; + } + } + } + } + else + { + xIntraCodingACTTUBlock(tu, COMPONENT_Y, singleDistTmpLuma); + } + } + + //----- determine rate and r-d cost ----- + if ((sps.getUseLFNST() ? (modeId == lastCheckId && modeId != 0 && checkTransformSkip) : (trModes[modeId].first != 0)) && !TU::getCbfAtDepth(tu, COMPONENT_Y, currDepth)) + { + //In order not to code TS flag when cbf is zero, the case for TS with cbf being zero is forbidden. + singleCostTmp = MAX_DOUBLE; + } + else + { + singleTmpFracBits = xGetIntraFracBitsQT(*csFull, partitioner, true, false, -1, TU_NO_ISP); + singleCostTmp = m_pcRdCost->calcRdCost(singleTmpFracBits, singleDistTmpLuma); + } + + if (singleCostTmp < dSingleCostLuma) + { + dSingleCostLuma = singleCostTmp; + validReturnFull = true; + + if (sps.getUseLFNST()) + { + bestLumaModeId = modeId; + cbfBestMode = TU::getCbfAtDepth(tu, COMPONENT_Y, currDepth); + cbfBestModeValid = true; + } + else + { + bestLumaModeId = trModes[modeId].first; + if (trModes[modeId].first == 0) + { + cbfDCT2 = TU::getCbfAtDepth(tu, COMPONENT_Y, currDepth); + } + } + + if (bestLumaModeId != lastCheckId) + { + saveLumaCS.getResiBuf(tu.Y()).copyFrom(csFull->getResiBuf(tu.Y())); + tmpTU->copyComponentFrom(tu, COMPONENT_Y); + ctxBest = m_CABACEstimator->getCtx(); + } + } + } + + m_pcRdCost->lambdaAdjustColorTrans(false, COMPONENT_Y); + + if (sps.getUseLFNST()) + { + if (!validReturnFull) + { + csFull->cost = MAX_DOUBLE; + return false; + } + } + else + { + CHECK(!validReturnFull, "no transform mode was tested for luma"); + } + + csFull->setDecomp(currArea.Y(), true); + csFull->setDecomp(currArea.Cb(), true); + + if (bestLumaModeId != lastCheckId) + { + csFull->getResiBuf(tu.Y()).copyFrom(saveLumaCS.getResiBuf(tu.Y())); + tu.copyComponentFrom(*tmpTU, COMPONENT_Y); + m_CABACEstimator->getCtx() = ctxBest; + } + + // 3 chroma residual optimization + CodingStructure &saveChromaCS = *m_pSaveCS[1]; + saveChromaCS.pcv = csFull->pcv; + saveChromaCS.picture = csFull->picture; + saveChromaCS.area.repositionTo(csFull->area); + saveChromaCS.initStructData(MAX_INT, true); + tmpTU = &saveChromaCS.addTU(currArea, partitioner.chType); + + CompArea& cbArea = tu.blocks[COMPONENT_Cb]; + CompArea& crArea = tu.blocks[COMPONENT_Cr]; + + ctxStart = m_CABACEstimator->getCtx(); + m_CABACEstimator->resetBits(); + tu.jointCbCr = 0; + + bool doReshaping = (slice.getPicHeader()->getLmcsEnabledFlag() && slice.getPicHeader()->getLmcsChromaResidualScaleFlag() && (slice.isIntra() || m_pcReshape->getCTUFlag()) && (cbArea.width * cbArea.height > 4)); + if (doReshaping) + { + const Area area = tu.Y().valid() ? tu.Y() : Area(recalcPosition(tu.chromaFormat, tu.chType, CHANNEL_TYPE_LUMA, tu.blocks[tu.chType].pos()), recalcSize(tu.chromaFormat, tu.chType, CHANNEL_TYPE_LUMA, tu.blocks[tu.chType].size())); + const CompArea &areaY = CompArea(COMPONENT_Y, tu.chromaFormat, area); + int adj = m_pcReshape->calculateChromaAdjVpduNei(tu, areaY); + tu.setChromaAdj(adj); + } + + CompStorage orgResiCb[5], orgResiCr[5]; // 0:std, 1-3:jointCbCr (placeholder at this stage), 4:crossComp + orgResiCb[0].create(cbArea); + orgResiCr[0].create(crArea); + orgResiCb[0].copyFrom(csFull->getOrgResiBuf(cbArea)); + orgResiCr[0].copyFrom(csFull->getOrgResiBuf(crArea)); + if (doReshaping) + { + int cResScaleInv = tu.getChromaAdj(); + orgResiCb[0].scaleSignal(cResScaleInv, 1, slice.clpRng(COMPONENT_Cb)); + orgResiCr[0].scaleSignal(cResScaleInv, 1, slice.clpRng(COMPONENT_Cr)); + } + + // 3.1 regular chroma residual coding + csFull->getResiBuf(cbArea).copyFrom(orgResiCb[0]); + csFull->getResiBuf(crArea).copyFrom(orgResiCr[0]); + + for (uint32_t c = COMPONENT_Cb; c < ::getNumberValidTBlocks(*csFull->pcv); c++) + { + const ComponentID compID = ComponentID(c); + Distortion singleDistChroma = 0; + xIntraCodingACTTUBlock(tu, compID, singleDistChroma); + xGetIntraFracBitsQTChroma(tu, compID); + } + + Position tuPos = tu.Y(); + tuPos.relativeTo(cu.Y()); + const UnitArea relativeUnitArea(tu.chromaFormat, Area(tuPos, tu.Y().size())); + PelUnitBuf invColorTransResidual = m_colorTransResiBuf.getBuf(relativeUnitArea); + csFull->getResiBuf(tu).colorSpaceConvert(invColorTransResidual, false); + + Distortion totalDist = 0; + for (uint32_t c = COMPONENT_Y; c < ::getNumberValidTBlocks(*csFull->pcv); c++) + { + const ComponentID compID = ComponentID(c); + const CompArea& area = tu.blocks[compID]; + PelBuf piOrg = csFull->getOrgBuf(area); + PelBuf piReco = csFull->getRecoBuf(area); + PelBuf piPred = csFull->getPredBuf(area); + PelBuf piResi = invColorTransResidual.bufs[compID]; + + piReco.reconstruct(piPred, piResi, cs.slice->clpRng(compID)); + + if (m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled() || (m_pcEncCfg->getLmcs() + & slice.getPicHeader()->getLmcsEnabledFlag() && (m_pcReshape->getCTUFlag() || (isChroma(compID) && m_pcEncCfg->getReshapeIntraCMD())))) + { + const CPelBuf orgLuma = csFull->getOrgBuf(csFull->area.blocks[COMPONENT_Y]); + if (compID == COMPONENT_Y && !(m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled())) + { + CompArea tmpArea1(COMPONENT_Y, area.chromaFormat, Position(0, 0), area.size()); + PelBuf tmpRecLuma = m_tmpStorageLCU.getBuf(tmpArea1); + tmpRecLuma.copyFrom(piReco); + tmpRecLuma.rspSignal(m_pcReshape->getInvLUT()); + totalDist += m_pcRdCost->getDistPart(piOrg, tmpRecLuma, sps.getBitDepth(toChannelType(compID)), compID, DF_SSE_WTD, &orgLuma); + } + else + { + totalDist += m_pcRdCost->getDistPart(piOrg, piReco, sps.getBitDepth(toChannelType(compID)), compID, DF_SSE_WTD, &orgLuma); + } + } + else + { + totalDist += m_pcRdCost->getDistPart(piOrg, piReco, sps.getBitDepth(toChannelType(compID)), compID, DF_SSE); + } + } + + m_CABACEstimator->getCtx() = ctxStart; + uint64_t totalBits = xGetIntraFracBitsQT(*csFull, partitioner, true, true, -1, TU_NO_ISP); + double totalCost = m_pcRdCost->calcRdCost(totalBits, totalDist); + + saveChromaCS.getResiBuf(cbArea).copyFrom(csFull->getResiBuf(cbArea)); + saveChromaCS.getResiBuf(crArea).copyFrom(csFull->getResiBuf(crArea)); + saveChromaCS.getRecoBuf(tu).copyFrom(csFull->getRecoBuf(tu)); + tmpTU->copyComponentFrom(tu, COMPONENT_Cb); + tmpTU->copyComponentFrom(tu, COMPONENT_Cr); + ctxBest = m_CABACEstimator->getCtx(); + + // 3.2 jointCbCr + double bestCostJointCbCr = totalCost; + Distortion bestDistJointCbCr = totalDist; + uint64_t bestBitsJointCbCr = totalBits; + int bestJointCbCr = tu.jointCbCr; assert(!bestJointCbCr); + + bool lastIsBest = false; + std::vector<int> jointCbfMasksToTest; + if (sps.getJointCbCrEnabledFlag() && (TU::getCbf(tu, COMPONENT_Cb) || TU::getCbf(tu, COMPONENT_Cr))) + { + jointCbfMasksToTest = m_pcTrQuant->selectICTCandidates(tu, orgResiCb, orgResiCr); + } + + for (int cbfMask : jointCbfMasksToTest) + { + m_CABACEstimator->getCtx() = ctxStart; + m_CABACEstimator->resetBits(); + + Distortion distTmp = 0; + tu.jointCbCr = (uint8_t)cbfMask; + + csFull->getResiBuf(cbArea).copyFrom(orgResiCb[cbfMask]); + csFull->getResiBuf(crArea).copyFrom(orgResiCr[cbfMask]); + xIntraCodingACTTUBlock(tu, COMPONENT_Cb, distTmp); + + double costTmp = std::numeric_limits<double>::max(); + uint64_t bitsTmp = 0; + if (distTmp < std::numeric_limits<Distortion>::max()) + { + csFull->getResiBuf(tu).colorSpaceConvert(invColorTransResidual, false); + distTmp = 0; + for (uint32_t c = COMPONENT_Y; c < ::getNumberValidTBlocks(*csFull->pcv); c++) + { + const ComponentID compID = ComponentID(c); + const CompArea& area = tu.blocks[compID]; + PelBuf piOrg = csFull->getOrgBuf(area); + PelBuf piReco = csFull->getRecoBuf(area); + PelBuf piPred = csFull->getPredBuf(area); + PelBuf piResi = invColorTransResidual.bufs[compID]; + + piReco.reconstruct(piPred, piResi, cs.slice->clpRng(compID)); + if (m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled() || (m_pcEncCfg->getLmcs() + & slice.getPicHeader()->getLmcsEnabledFlag() && (m_pcReshape->getCTUFlag() || (isChroma(compID) && m_pcEncCfg->getReshapeIntraCMD())))) + { + const CPelBuf orgLuma = csFull->getOrgBuf(csFull->area.blocks[COMPONENT_Y]); + if (compID == COMPONENT_Y && !(m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled())) + { + CompArea tmpArea1(COMPONENT_Y, area.chromaFormat, Position(0, 0), area.size()); + PelBuf tmpRecLuma = m_tmpStorageLCU.getBuf(tmpArea1); + tmpRecLuma.copyFrom(piReco); + tmpRecLuma.rspSignal(m_pcReshape->getInvLUT()); + distTmp += m_pcRdCost->getDistPart(piOrg, tmpRecLuma, sps.getBitDepth(toChannelType(compID)), compID, DF_SSE_WTD, &orgLuma); + } + else + { + distTmp += m_pcRdCost->getDistPart(piOrg, piReco, sps.getBitDepth(toChannelType(compID)), compID, DF_SSE_WTD, &orgLuma); + } + } + else + { + distTmp += m_pcRdCost->getDistPart(piOrg, piReco, sps.getBitDepth(toChannelType(compID)), compID, DF_SSE); + } + } + + bitsTmp = xGetIntraFracBitsQT(*csFull, partitioner, true, true, -1, TU_NO_ISP); + costTmp = m_pcRdCost->calcRdCost(bitsTmp, distTmp); + } + + if (costTmp < bestCostJointCbCr) + { + bestCostJointCbCr = costTmp; + bestDistJointCbCr = distTmp; + bestBitsJointCbCr = bitsTmp; + bestJointCbCr = tu.jointCbCr; + lastIsBest = (cbfMask == jointCbfMasksToTest.back()); + + // store data + if (!lastIsBest) + { + saveChromaCS.getResiBuf(cbArea).copyFrom(csFull->getResiBuf(cbArea)); + saveChromaCS.getResiBuf(crArea).copyFrom(csFull->getResiBuf(crArea)); + saveChromaCS.getRecoBuf(tu).copyFrom(csFull->getRecoBuf(tu)); + tmpTU->copyComponentFrom(tu, COMPONENT_Cb); + tmpTU->copyComponentFrom(tu, COMPONENT_Cr); + + ctxBest = m_CABACEstimator->getCtx(); + } + } + } + + if (!lastIsBest) + { + csFull->getResiBuf(cbArea).copyFrom(saveChromaCS.getResiBuf(cbArea)); + csFull->getResiBuf(crArea).copyFrom(saveChromaCS.getResiBuf(crArea)); + csFull->getRecoBuf(tu).copyFrom(saveChromaCS.getRecoBuf(tu)); + tu.copyComponentFrom(*tmpTU, COMPONENT_Cb); + tu.copyComponentFrom(*tmpTU, COMPONENT_Cr); + + m_CABACEstimator->getCtx() = ctxBest; + } + tu.jointCbCr = bestJointCbCr; + csFull->picture->getRecoBuf(tu).copyFrom(csFull->getRecoBuf(tu)); + + csFull->dist += bestDistJointCbCr; + csFull->fracBits += bestBitsJointCbCr; + csFull->cost = m_pcRdCost->calcRdCost(csFull->fracBits, csFull->dist); } - if (slice.getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag() && compID == COMPONENT_Y) - { - CompArea tmpArea(COMPONENT_Y, area.chromaFormat, Position(0,0), area.size()); - PelBuf tmpPred = m_tmpStorageLCU.getBuf(tmpArea); - tmpPred.copyFrom(piPred); - piReco.reconstruct(tmpPred, piResi, cs.slice->clpRng(compID)); + bool validReturnSplit = false; + if (bCheckSplit) + { + if (partitioner.canSplit(TU_MAX_TR_SPLIT, *csSplit)) + { + partitioner.splitCurrArea(TU_MAX_TR_SPLIT, *csSplit); + } + + bool splitIsSelected = true; + do + { + bool tmpValidReturnSplit = xRecurIntraCodingACTQT(*csSplit, partitioner, mtsCheckRangeFlag, mtsFirstCheckId, mtsLastCheckId, moreProbMTSIdxFirst); + if (sps.getUseLFNST()) + { + if (!tmpValidReturnSplit) + { + splitIsSelected = false; + break; + } + } + else + { + CHECK(!tmpValidReturnSplit, "invalid RD of sub-TU partitions for ACT"); + } + } while (partitioner.nextPart(*csSplit)); + + partitioner.exitCurrSplit(); + + if (splitIsSelected) + { + unsigned compCbf[3] = { 0, 0, 0 }; + for (auto &currTU : csSplit->traverseTUs(currArea, partitioner.chType)) + { + for (unsigned ch = 0; ch < getNumberValidTBlocks(*csSplit->pcv); ch++) + { + compCbf[ch] |= (TU::getCbfAtDepth(currTU, ComponentID(ch), currDepth + 1) ? 1 : 0); + } + } + + for (auto &currTU : csSplit->traverseTUs(currArea, partitioner.chType)) + { + TU::setCbfAtDepth(currTU, COMPONENT_Y, currDepth, compCbf[COMPONENT_Y]); + TU::setCbfAtDepth(currTU, COMPONENT_Cb, currDepth, compCbf[COMPONENT_Cb]); + TU::setCbfAtDepth(currTU, COMPONENT_Cr, currDepth, compCbf[COMPONENT_Cr]); + } + + m_CABACEstimator->getCtx() = ctxStart; + csSplit->fracBits = xGetIntraFracBitsQT(*csSplit, partitioner, true, true, -1, TU_NO_ISP); + csSplit->cost = m_pcRdCost->calcRdCost(csSplit->fracBits, csSplit->dist); + + validReturnSplit = true; + } } - else - piReco.reconstruct(piPred, piResi, cs.slice->clpRng( compID )); - //===== update distortion ===== -#if WCG_EXT - if (m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled() || (m_pcEncCfg->getReshaper() - && slice.getReshapeInfo().getUseSliceReshaper() && (m_pcReshape->getCTUFlag() || (isChroma(compID) && m_pcEncCfg->getReshapeIntraCMD())))) + bool retVal = false; + if (csFull || csSplit) { - const CPelBuf orgLuma = cs.getOrgBuf( cs.area.blocks[COMPONENT_Y] ); - if (compID == COMPONENT_Y && !(m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled())) + if (sps.getUseLFNST()) { - CompArea tmpArea1(COMPONENT_Y, area.chromaFormat, Position(0, 0), area.size()); - PelBuf tmpRecLuma = m_tmpStorageLCU.getBuf(tmpArea1); - tmpRecLuma.copyFrom(piReco); - tmpRecLuma.rspSignal(m_pcReshape->getInvLUT()); - ruiDist += m_pcRdCost->getDistPart(piOrg, tmpRecLuma, sps.getBitDepth(toChannelType(compID)), compID, DF_SSE_WTD, &orgLuma); + if (validReturnFull || validReturnSplit) + { + retVal = true; + } } else - ruiDist += m_pcRdCost->getDistPart(piOrg, piReco, bitDepth, compID, DF_SSE_WTD, &orgLuma); - } - else -#endif - { - ruiDist += m_pcRdCost->getDistPart( piOrg, piReco, bitDepth, compID, DF_SSE ); + { + CHECK(!validReturnFull && !validReturnSplit, "illegal TU optimization"); + retVal = true; + } } + return retVal; } -void IntraSearch::xRecurIntraCodingLumaQT( CodingStructure &cs, Partitioner &partitioner, const double bestCostSoFar, const int subTuIdx, const PartSplit ispType, const bool ispIsCurrentWinnder ) +ChromaCbfs IntraSearch::xRecurIntraChromaCodingQT( CodingStructure &cs, Partitioner& partitioner, const double bestCostSoFar, const PartSplit ispType ) { - int subTuCounter = subTuIdx; - const UnitArea &currArea = partitioner.currArea(); - const CodingUnit &cu = *cs.getCU( currArea.lumaPos(), partitioner.chType ); - bool earlySkipISP = false; - uint32_t currDepth = partitioner.currTrDepth; - const PPS &pps = *cs.pps; - const bool keepResi = pps.getPpsRangeExtension().getCrossComponentPredictionEnabledFlag() || KEEP_PRED_AND_RESI_SIGNALS; - bool bCheckFull = true; - bool bCheckSplit = false; - bCheckFull = !partitioner.canSplit( TU_MAX_TR_SPLIT, cs ); - bCheckSplit = partitioner.canSplit( TU_MAX_TR_SPLIT, cs ); - - if( cu.ispMode ) - { - bCheckSplit = partitioner.canSplit( ispType, cs ); - bCheckFull = !bCheckSplit; - } - uint32_t numSig = 0; + UnitArea currArea = partitioner.currArea(); + const bool keepResi = cs.sps->getUseLMChroma() || KEEP_PRED_AND_RESI_SIGNALS; + if( !currArea.Cb().valid() ) return ChromaCbfs( false ); - double dSingleCost = MAX_DOUBLE; - Distortion uiSingleDistLuma = 0; - uint64_t singleFracBits = 0; - int bestModeId[MAX_NUM_COMPONENT] = { 0, 0, 0 }; - const TempCtx ctxStart ( m_CtxCache, m_CABACEstimator->getCtx() ); - TempCtx ctxBest ( m_CtxCache ); + TransformUnit &currTU = *cs.getTU( currArea.chromaPos(), CHANNEL_TYPE_CHROMA ); + const PredictionUnit &pu = *cs.getPU( currArea.chromaPos(), CHANNEL_TYPE_CHROMA ); - CodingStructure *csSplit = nullptr; - CodingStructure *csFull = nullptr; + bool lumaUsesISP = false; + uint32_t currDepth = partitioner.currTrDepth; + const PPS &pps = *cs.pps; + ChromaCbfs cbfs ( false ); - if( bCheckSplit ) - { - csSplit = &cs; - } - else if( bCheckFull ) + if (currDepth == currTU.depth) { - csFull = &cs; - } + if (!currArea.Cb().valid() || !currArea.Cr().valid()) + { + return cbfs; + } - if( bCheckFull ) - { - csFull->cost = 0.0; - TransformUnit &tu = csFull->addTU( CS::getArea( *csFull, currArea, partitioner.chType ), partitioner.chType ); - tu.depth = currDepth; + CodingStructure &saveCS = *m_pSaveCS[1]; + saveCS.pcv = cs.pcv; + saveCS.picture = cs.picture; + saveCS.area.repositionTo( cs.area ); + saveCS.initStructData( MAX_INT, true ); - const bool tsAllowed = TU::isTSAllowed ( tu, COMPONENT_Y ); - const bool mtsAllowed = TU::isMTSAllowed( tu, COMPONENT_Y ); - uint8_t nNumTransformCands = 1 + ( tsAllowed ? 1 : 0 ) + ( mtsAllowed ? 4 : 0 ); // DCT + TS + 4 MTS = 6 tests - std::vector<TrMode> trModes; - trModes.push_back( TrMode( 0, true ) ); //DCT2 - if( tsAllowed ) - { - trModes.push_back( TrMode( 1, true ) ); - } - if( mtsAllowed ) + if( !currTU.cu->isSepTree() && currTU.cu->ispMode ) { - for( int i = 2; i < 6; i++ ) - { - trModes.push_back( TrMode( i, true) ); - } + saveCS.clearCUs(); + CodingUnit& auxCU = saveCS.addCU( *currTU.cu, partitioner.chType ); + auxCU.ispMode = currTU.cu->ispMode; + saveCS.sps = currTU.cs->sps; + saveCS.clearPUs(); + saveCS.addPU( *currTU.cu->firstPU, partitioner.chType ); } - CHECK( !tu.Y().valid(), "Invalid TU" ); + TransformUnit &tmpTU = saveCS.addTU(currArea, partitioner.chType); - CodingStructure &saveCS = *m_pSaveCS[0]; - TransformUnit *tmpTU = nullptr; + cs.setDecomp(currArea.Cb(), true); // set in advance (required for Cb2/Cr2 in 4:2:2 video) - Distortion singleDistTmpLuma = 0; - uint64_t singleTmpFracBits = 0; - double singleCostTmp = 0; - int firstCheckId = 0; + const unsigned numTBlocks = ::getNumberValidTBlocks( *cs.pcv ); - int lastCheckId = trModes[nNumTransformCands-1].first; - bool isNotOnlyOneMode = nNumTransformCands != 1; + CompArea& cbArea = currTU.blocks[COMPONENT_Cb]; + CompArea& crArea = currTU.blocks[COMPONENT_Cr]; + double bestCostCb = MAX_DOUBLE; + double bestCostCr = MAX_DOUBLE; + Distortion bestDistCb = 0; + Distortion bestDistCr = 0; + int maxModesTested = 0; + bool earlyExitISP = false; - if( isNotOnlyOneMode ) + TempCtx ctxStartTU( m_CtxCache ); + TempCtx ctxStart ( m_CtxCache ); + TempCtx ctxBest ( m_CtxCache ); + + ctxStartTU = m_CABACEstimator->getCtx(); + currTU.jointCbCr = 0; + + // Do predictions here to avoid repeating the "default0Save1Load2" stuff + int predMode = pu.cu->bdpcmModeChroma ? BDPCM_IDX : PU::getFinalIntraMode(pu, CHANNEL_TYPE_CHROMA); + + PelBuf piPredCb = cs.getPredBuf(cbArea); + PelBuf piPredCr = cs.getPredBuf(crArea); + + initIntraPatternChType( *currTU.cu, cbArea); + initIntraPatternChType( *currTU.cu, crArea); + + if( PU::isLMCMode( predMode ) ) { - saveCS.pcv = cs.pcv; - saveCS.picture = cs.picture; - saveCS.area.repositionTo(cs.area); - saveCS.clearTUs(); - tmpTU = &saveCS.addTU(currArea, partitioner.chType); + xGetLumaRecPixels( pu, cbArea ); + predIntraChromaLM( COMPONENT_Cb, piPredCb, pu, cbArea, predMode ); + predIntraChromaLM( COMPONENT_Cr, piPredCr, pu, crArea, predMode ); + } + else + { + predIntraAng( COMPONENT_Cb, piPredCb, pu); + predIntraAng( COMPONENT_Cr, piPredCr, pu); } - bool cbfDCT2 = true; + // determination of chroma residuals including reshaping and cross-component prediction + //----- get chroma residuals ----- + PelBuf resiCb = cs.getResiBuf(cbArea); + PelBuf resiCr = cs.getResiBuf(crArea); + resiCb.copyFrom( cs.getOrgBuf (cbArea) ); + resiCr.copyFrom( cs.getOrgBuf (crArea) ); + resiCb.subtract( piPredCb ); + resiCr.subtract( piPredCr ); + + //----- get reshape parameter ---- + bool doReshaping = ( cs.picHeader->getLmcsEnabledFlag() && cs.picHeader->getLmcsChromaResidualScaleFlag() + && (cs.slice->isIntra() || m_pcReshape->getCTUFlag()) && (cbArea.width * cbArea.height > 4) ); + if( doReshaping ) + { + const Area area = currTU.Y().valid() ? currTU.Y() : Area(recalcPosition(currTU.chromaFormat, currTU.chType, CHANNEL_TYPE_LUMA, currTU.blocks[currTU.chType].pos()), recalcSize(currTU.chromaFormat, currTU.chType, CHANNEL_TYPE_LUMA, currTU.blocks[currTU.chType].size())); + const CompArea &areaY = CompArea(COMPONENT_Y, currTU.chromaFormat, area); + int adj = m_pcReshape->calculateChromaAdjVpduNei(currTU, areaY); + currTU.setChromaAdj(adj); + } - double bestDCT2cost = MAX_DOUBLE; - double threshold = m_pcEncCfg->getUseFastISP() && !cu.ispMode && ispIsCurrentWinnder && nNumTransformCands > 1 ? 1 + 1.4 / sqrt( cu.lwidth() * cu.lheight() ) : 1; - for( int modeId = firstCheckId; modeId < nNumTransformCands; modeId++ ) + //----- get cross component prediction parameters ----- + bool checkCrossComponentPrediction = PU::isChromaIntraModeCrossCheckMode( pu ) && pps.getPpsRangeExtension().getCrossComponentPredictionEnabledFlag() && TU::getCbf( currTU, COMPONENT_Y ); + int compAlpha[MAX_NUM_COMPONENT] = { 0, 0, 0 }; + if( checkCrossComponentPrediction ) { - if( !cbfDCT2 || ( m_pcEncCfg->getUseTransformSkipFast() && bestModeId[COMPONENT_Y] == 1 ) ) + compAlpha[COMPONENT_Cb] = xCalcCrossComponentPredictionAlpha( currTU, COMPONENT_Cb, m_pcEncCfg->getUseReconBasedCrossCPredictionEstimate() ); + compAlpha[COMPONENT_Cr] = xCalcCrossComponentPredictionAlpha( currTU, COMPONENT_Cr, m_pcEncCfg->getUseReconBasedCrossCPredictionEstimate() ); + if( compAlpha[COMPONENT_Cb] == 0 && compAlpha[COMPONENT_Cr] == 0 ) { - break; + checkCrossComponentPrediction = false; } - if( !trModes[modeId].second ) - { - continue; + } + + //===== store original residual signals (std and crossCompPred) ===== + CompStorage orgResiCb[5], orgResiCr[5]; // 0:std, 1-3:jointCbCr (placeholder at this stage), 4:crossComp + for( int k = 0; k < (checkCrossComponentPrediction?5:1); k+=4 ) + { + orgResiCb[k].create( cbArea ); + orgResiCr[k].create( crArea ); + if( k >= 4 ) { + CrossComponentPrediction::crossComponentPrediction( currTU, COMPONENT_Cb, cs.getResiBuf(currTU.Y()), resiCb, orgResiCb[k], false); + CrossComponentPrediction::crossComponentPrediction( currTU, COMPONENT_Cr, cs.getResiBuf(currTU.Y()), resiCr, orgResiCr[k], false); + } else { + orgResiCb[k].copyFrom( resiCb ); + orgResiCr[k].copyFrom( resiCr ); } - //we compare the DCT-II cost against the best ISP cost so far (except for TS) - if ( m_pcEncCfg->getUseFastISP() && !cu.ispMode && ispIsCurrentWinnder && trModes[modeId].first != 0 && ( trModes[modeId].first != 1 || !tsAllowed ) && bestDCT2cost > bestCostSoFar * threshold ) + if( doReshaping ) { - continue; + int cResScaleInv = currTU.getChromaAdj(); + orgResiCb[k].scaleSignal( cResScaleInv, 1, currTU.cu->cs->slice->clpRng(COMPONENT_Cb) ); + orgResiCr[k].scaleSignal( cResScaleInv, 1, currTU.cu->cs->slice->clpRng(COMPONENT_Cr) ); } - tu.mtsIdx = trModes[modeId].first; + } - if ((modeId != firstCheckId) && isNotOnlyOneMode) + for( uint32_t c = COMPONENT_Cb; c < numTBlocks; c++) + { + const ComponentID compID = ComponentID(c); + const CompArea& area = currTU.blocks[compID]; + + double dSingleCost = MAX_DOUBLE; + int bestModeId = 0; + Distortion singleDistCTmp = 0; + double singleCostTmp = 0; + const int crossCPredictionModesToTest = checkCrossComponentPrediction ? 2 : 1; + const bool tsAllowed = TU::isTSAllowed(currTU, compID) && (m_pcEncCfg->getUseChromaTS()); + uint8_t nNumTransformCands = 1 + (tsAllowed ? 1 : 0); // DCT + TS = 2 tests + std::vector<TrMode> trModes; + trModes.push_back(TrMode(0, true)); // DCT2 + + if (tsAllowed) { - m_CABACEstimator->getCtx() = ctxStart; + trModes.push_back(TrMode(1, true));//TS } + CHECK(!currTU.Cb().valid(), "Invalid TU"); + + const int totalModesToTest = crossCPredictionModesToTest * nNumTransformCands; + bool cbfDCT2 = true; + const bool isOneMode = false; + maxModesTested = totalModesToTest > maxModesTested ? totalModesToTest : maxModesTested; + int currModeId = 0; int default0Save1Load2 = 0; - singleDistTmpLuma = 0; - if( modeId == firstCheckId && nNumTransformCands > 1 ) - { - default0Save1Load2 = 1; - } - else if (modeId != firstCheckId) - { - default0Save1Load2 = 2; - } - if( cu.ispMode ) + + if (!isOneMode) { - default0Save1Load2 = 0; + ctxStart = m_CABACEstimator->getCtx(); } - if( nNumTransformCands > 1 ) + + for (int modeId = 0; modeId < nNumTransformCands; modeId++) { - xIntraCodingTUBlock( tu, COMPONENT_Y, false, singleDistTmpLuma, default0Save1Load2, &numSig, modeId == 0 ? &trModes : nullptr, true ); - if( modeId == 0 ) + for (int crossCPredictionModeId = 0; crossCPredictionModeId < crossCPredictionModesToTest; crossCPredictionModeId++) { - for( int i = 0; i < nNumTransformCands; i++ ) + resiCb.copyFrom( orgResiCb[4*crossCPredictionModeId] ); + resiCr.copyFrom( orgResiCr[4*crossCPredictionModeId] ); + + currTU.compAlpha [compID] = ( crossCPredictionModeId ? compAlpha[compID] : 0 ); + + currTU.mtsIdx[compID] = currTU.cu->bdpcmModeChroma ? MTS_SKIP : trModes[modeId].first; + + currModeId++; + + const bool isFirstMode = (currModeId == 1); + const bool isLastMode = false; // Always store output to saveCS and tmpTU + +#if JVET_AHG14_LOSSLESS + if( !( m_pcEncCfg->getCostMode() == COST_LOSSLESS_CODING ) ) + { +#endif + //if DCT2's cbf==0, skip ts search + if (!cbfDCT2 && trModes[modeId].first == MTS_SKIP) + { + break; + } + if (!trModes[modeId].second) + { + continue; + } +#if JVET_AHG14_LOSSLESS + } +#endif + + if (!isFirstMode) // if not first mode to be tested + { + m_CABACEstimator->getCtx() = ctxStart; + } + + singleDistCTmp = 0; + + if (nNumTransformCands > 1) + { + xIntraCodingTUBlock(currTU, compID, crossCPredictionModeId != 0, singleDistCTmp, default0Save1Load2, nullptr, modeId == 0 ? &trModes : nullptr, true); + } + else + { + xIntraCodingTUBlock(currTU, compID, crossCPredictionModeId != 0, singleDistCTmp, default0Save1Load2); + } + + if (((crossCPredictionModeId == 1) && (currTU.compAlpha[compID] == 0)) || ((currTU.mtsIdx[compID] == MTS_SKIP && !currTU.cu->bdpcmModeChroma) && !TU::getCbf(currTU, compID))) //In order not to code TS flag when cbf is zero, the case for TS with cbf being zero is forbidden. + { + singleCostTmp = MAX_DOUBLE; + } + else if( lumaUsesISP && bestCostSoFar != MAX_DOUBLE && c == COMPONENT_Cb ) + { + uint64_t fracBitsTmp = xGetIntraFracBitsQTSingleChromaComponent( cs, partitioner, ComponentID( c ) ); + singleCostTmp = m_pcRdCost->calcRdCost( fracBitsTmp, singleDistCTmp ); + if( isOneMode || ( !isOneMode && !isLastMode ) ) + { + m_CABACEstimator->getCtx() = ctxStart; + } + } + else if( !isOneMode ) + { + uint64_t fracBitsTmp = xGetIntraFracBitsQTChroma( currTU, compID ); + singleCostTmp = m_pcRdCost->calcRdCost( fracBitsTmp, singleDistCTmp ); + } + + if( singleCostTmp < dSingleCost ) { - if( trModes[i].second ) + dSingleCost = singleCostTmp; + bestModeId = currModeId; + + if ( c == COMPONENT_Cb ) + { + bestCostCb = singleCostTmp; + bestDistCb = singleDistCTmp; + } + else + { + bestCostCr = singleCostTmp; + bestDistCr = singleDistCTmp; + } + + if (currTU.mtsIdx[compID] == MTS_DCT2_DCT2) + { + cbfDCT2 = TU::getCbfAtDepth(currTU, compID, currDepth); + } + + if( !isLastMode ) { - lastCheckId = trModes[i].first; +#if KEEP_PRED_AND_RESI_SIGNALS + saveCS.getPredBuf (area).copyFrom(cs.getPredBuf (area)); + saveCS.getOrgResiBuf(area).copyFrom(cs.getOrgResiBuf(area)); +#endif + saveCS.getPredBuf (area).copyFrom(cs.getPredBuf (area)); + if( keepResi ) + { + saveCS.getResiBuf (area).copyFrom(cs.getResiBuf (area)); + } + saveCS.getRecoBuf (area).copyFrom(cs.getRecoBuf (area)); + + tmpTU.copyComponentFrom(currTU, compID); + + ctxBest = m_CABACEstimator->getCtx(); } } } } - else - { - xIntraCodingTUBlock( tu, COMPONENT_Y, false, singleDistTmpLuma, default0Save1Load2, &numSig ); - } - //----- determine rate and r-d cost ----- - if( ( trModes[modeId].first != 0 && !TU::getCbfAtDepth( tu, COMPONENT_Y, currDepth ) ) ) - { - //In order not to code TS flag when cbf is zero, the case for TS with cbf being zero is forbidden. - singleCostTmp = MAX_DOUBLE; - } - else + if( lumaUsesISP && dSingleCost > bestCostSoFar && c == COMPONENT_Cb ) { - if( cu.ispMode && m_pcRdCost->calcRdCost( csFull->fracBits, csFull->dist + singleDistTmpLuma ) > bestCostSoFar ) - { - earlySkipISP = true; - } - else - { - singleTmpFracBits = xGetIntraFracBitsQT( *csFull, partitioner, true, false, subTuCounter, ispType ); - } - singleCostTmp = m_pcRdCost->calcRdCost( singleTmpFracBits, singleDistTmpLuma ); + //Luma + Cb cost is already larger than the best cost, so we don't need to test Cr + cs.dist = MAX_UINT; + m_CABACEstimator->getCtx() = ctxStart; + earlyExitISP = true; + break; + //return cbfs; } - if ( !cu.ispMode && nNumTransformCands > 1 && modeId == firstCheckId ) + // Done with one component of separate coding of Cr and Cb, just switch to the best Cb contexts if Cr coding is still to be done + if ( c == COMPONENT_Cb && bestModeId < totalModesToTest) { - bestDCT2cost = singleCostTmp; + m_CABACEstimator->getCtx() = ctxBest; + + currTU.copyComponentFrom(tmpTU, COMPONENT_Cb); // Cbf of Cb is needed to estimate cost for Cr Cbf } + } - if (singleCostTmp < dSingleCost) + if ( !earlyExitISP ) + { + // Test using joint chroma residual coding + double bestCostCbCr = bestCostCb + bestCostCr; + Distortion bestDistCbCr = bestDistCb + bestDistCr; + int bestJointCbCr = 0; + bool lastIsBest = false; + std::vector<int> jointCbfMasksToTest; + if ( cs.sps->getJointCbCrEnabledFlag() && (TU::getCbf(tmpTU, COMPONENT_Cb) || TU::getCbf(tmpTU, COMPONENT_Cr))) { - dSingleCost = singleCostTmp; - uiSingleDistLuma = singleDistTmpLuma; - singleFracBits = singleTmpFracBits; + jointCbfMasksToTest = m_pcTrQuant->selectICTCandidates(currTU, orgResiCb, orgResiCr); + } + for( int cbfMask : jointCbfMasksToTest ) + { + Distortion distTmp = 0; + + currTU.jointCbCr = (uint8_t)cbfMask; + currTU.compAlpha[COMPONENT_Cb] = 0; + currTU.compAlpha[COMPONENT_Cr] = 0; + // encoder bugfix: initialize mtsIdx for chroma under JointCbCrMode. + currTU.mtsIdx[COMPONENT_Cb] = currTU.mtsIdx[COMPONENT_Cr] = MTS_DCT2_DCT2; + m_CABACEstimator->getCtx() = ctxStartTU; - bestModeId[COMPONENT_Y] = trModes[modeId].first; - if( trModes[modeId].first == 0 ) + resiCb.copyFrom( orgResiCb[cbfMask] ); + resiCr.copyFrom( orgResiCr[cbfMask] ); + xIntraCodingTUBlock( currTU, COMPONENT_Cb, false, distTmp, 0 ); + + double costTmp = std::numeric_limits<double>::max(); + if( distTmp < std::numeric_limits<Distortion>::max() ) { - cbfDCT2 = TU::getCbfAtDepth( tu, COMPONENT_Y, currDepth ); + uint64_t bits = xGetIntraFracBitsQTChroma( currTU, COMPONENT_Cb ); + costTmp = m_pcRdCost->calcRdCost( bits, distTmp ); } - if( bestModeId[COMPONENT_Y] != lastCheckId ) + if( costTmp < bestCostCbCr ) { - saveCS.getPredBuf( tu.Y() ).copyFrom( csFull->getPredBuf( tu.Y() ) ); - saveCS.getRecoBuf( tu.Y() ).copyFrom( csFull->getRecoBuf( tu.Y() ) ); + bestCostCbCr = costTmp; + bestDistCbCr = distTmp; + bestJointCbCr = currTU.jointCbCr; - if( keepResi ) + // store data + if( cbfMask != jointCbfMasksToTest.back() ) { - saveCS.getResiBuf ( tu.Y() ).copyFrom( csFull->getResiBuf ( tu.Y() ) ); - saveCS.getOrgResiBuf( tu.Y() ).copyFrom( csFull->getOrgResiBuf( tu.Y() ) ); - } +#if KEEP_PRED_AND_RESI_SIGNALS + saveCS.getOrgResiBuf(cbArea).copyFrom(cs.getOrgResiBuf(cbArea)); + saveCS.getOrgResiBuf(crArea).copyFrom(cs.getOrgResiBuf(crArea)); +#endif + saveCS.getPredBuf (cbArea).copyFrom(cs.getPredBuf (cbArea)); + saveCS.getPredBuf (crArea).copyFrom(cs.getPredBuf (crArea)); + if( keepResi ) + { + saveCS.getResiBuf (cbArea).copyFrom(cs.getResiBuf (cbArea)); + saveCS.getResiBuf (crArea).copyFrom(cs.getResiBuf (crArea)); + } + saveCS.getRecoBuf (cbArea).copyFrom(cs.getRecoBuf (cbArea)); + saveCS.getRecoBuf (crArea).copyFrom(cs.getRecoBuf (crArea)); - tmpTU->copyComponentFrom( tu, COMPONENT_Y ); + tmpTU.copyComponentFrom(currTU, COMPONENT_Cb); + tmpTU.copyComponentFrom(currTU, COMPONENT_Cr); - ctxBest = m_CABACEstimator->getCtx(); + ctxBest = m_CABACEstimator->getCtx(); + } + else + { + lastIsBest = true; + } } } - } - - if( bestModeId[COMPONENT_Y] != lastCheckId ) - { - csFull->getPredBuf( tu.Y() ).copyFrom( saveCS.getPredBuf( tu.Y() ) ); - csFull->getRecoBuf( tu.Y() ).copyFrom( saveCS.getRecoBuf( tu.Y() ) ); - if( keepResi ) + // Retrieve the best CU data (unless it was the very last one tested) + if ( !( maxModesTested == 1 && jointCbfMasksToTest.empty() ) && !lastIsBest ) { - csFull->getResiBuf ( tu.Y() ).copyFrom( saveCS.getResiBuf ( tu.Y() ) ); - csFull->getOrgResiBuf( tu.Y() ).copyFrom( saveCS.getOrgResiBuf( tu.Y() ) ); - } +#if KEEP_PRED_AND_RESI_SIGNALS + cs.getPredBuf (cbArea).copyFrom(saveCS.getPredBuf (cbArea)); + cs.getOrgResiBuf(cbArea).copyFrom(saveCS.getOrgResiBuf(cbArea)); + cs.getPredBuf (crArea).copyFrom(saveCS.getPredBuf (crArea)); + cs.getOrgResiBuf(crArea).copyFrom(saveCS.getOrgResiBuf(crArea)); +#endif + cs.getPredBuf (cbArea).copyFrom(saveCS.getPredBuf (cbArea)); + cs.getPredBuf (crArea).copyFrom(saveCS.getPredBuf (crArea)); - tu.copyComponentFrom( *tmpTU, COMPONENT_Y ); + if( keepResi ) + { + cs.getResiBuf (cbArea).copyFrom(saveCS.getResiBuf (cbArea)); + cs.getResiBuf (crArea).copyFrom(saveCS.getResiBuf (crArea)); + } + cs.getRecoBuf (cbArea).copyFrom(saveCS.getRecoBuf (cbArea)); + cs.getRecoBuf (crArea).copyFrom(saveCS.getRecoBuf (crArea)); + + currTU.copyComponentFrom(tmpTU, COMPONENT_Cb); + currTU.copyComponentFrom(tmpTU, COMPONENT_Cr); - if( !bCheckSplit ) - { m_CABACEstimator->getCtx() = ctxBest; } - } - else if( bCheckSplit ) - { - ctxBest = m_CABACEstimator->getCtx(); - } - csFull->cost += dSingleCost; - csFull->dist += uiSingleDistLuma; - csFull->fracBits += singleFracBits; - } + // Copy results to the picture structures + cs.picture->getRecoBuf(cbArea).copyFrom(cs.getRecoBuf(cbArea)); + cs.picture->getRecoBuf(crArea).copyFrom(cs.getRecoBuf(crArea)); + cs.picture->getPredBuf(cbArea).copyFrom(cs.getPredBuf(cbArea)); + cs.picture->getPredBuf(crArea).copyFrom(cs.getPredBuf(crArea)); - if( bCheckSplit ) - { - //----- store full entropy coding status, load original entropy coding status ----- - if( bCheckFull ) - { - m_CABACEstimator->getCtx() = ctxStart; + cbfs.cbf(COMPONENT_Cb) = TU::getCbf(currTU, COMPONENT_Cb); + cbfs.cbf(COMPONENT_Cr) = TU::getCbf(currTU, COMPONENT_Cr); + + currTU.jointCbCr = ( (cbfs.cbf(COMPONENT_Cb) + cbfs.cbf(COMPONENT_Cr)) ? bestJointCbCr : 0 ); + cs.dist += bestDistCbCr; } - //----- code splitted block ----- - csSplit->cost = 0; + } + else + { + unsigned numValidTBlocks = ::getNumberValidTBlocks( *cs.pcv ); + ChromaCbfs SplitCbfs ( false ); - bool uiSplitCbfLuma = false; - bool splitIsSelected = true; if( partitioner.canSplit( TU_MAX_TR_SPLIT, cs ) ) { partitioner.splitCurrArea( TU_MAX_TR_SPLIT, cs ); } - - if( cu.ispMode ) + else if( currTU.cu->ispMode ) { - partitioner.splitCurrArea( ispType, *csSplit ); + partitioner.splitCurrArea( ispType, cs ); } + else + THROW( "Implicit TU split not available" ); + do { - xRecurIntraCodingLumaQT( *csSplit, partitioner, bestCostSoFar, subTuCounter, ispType ); - subTuCounter += subTuCounter != -1 ? 1 : 0; + ChromaCbfs subCbfs = xRecurIntraChromaCodingQT( cs, partitioner, bestCostSoFar, ispType ); - if( !cu.ispMode ) - { - csSplit->setDecomp( partitioner.currArea().Y() ); - } - else if( CU::isISPFirst( cu, partitioner.currArea().Y(), COMPONENT_Y ) ) + for( uint32_t ch = COMPONENT_Cb; ch < numValidTBlocks; ch++ ) { - csSplit->setDecomp( cu.Y() ); + const ComponentID compID = ComponentID( ch ); + SplitCbfs.cbf( compID ) |= subCbfs.cbf( compID ); } + } while( partitioner.nextPart( cs ) ); - uiSplitCbfLuma |= TU::getCbfAtDepth( *csSplit->getTU( partitioner.currArea().lumaPos(), partitioner.chType, subTuCounter - 1 ), COMPONENT_Y, partitioner.currTrDepth ); - if( cu.ispMode ) + partitioner.exitCurrSplit(); + + if( lumaUsesISP && cs.dist == MAX_UINT ) + { + return cbfs; + } + { + + cbfs.Cb |= SplitCbfs.Cb; + cbfs.Cr |= SplitCbfs.Cr; + + if( !lumaUsesISP ) { - //exit condition if the accumulated cost is already larger than the best cost so far (no impact in RD performance) - if( csSplit->cost > bestCostSoFar ) - { - earlySkipISP = true; - splitIsSelected = false; - break; - } - else + for( auto &ptu : cs.tus ) { - //more restrictive exit condition - bool tuIsDividedInRows = CU::divideTuInRows( cu ); - int nSubPartitions = tuIsDividedInRows ? cu.lheight() >> g_aucLog2[cu.firstTU->lheight()] : cu.lwidth() >> g_aucLog2[cu.firstTU->lwidth()]; - double threshold = nSubPartitions == 2 ? 0.95 : subTuCounter == 1 ? 0.83 : 0.91; - if( subTuCounter < nSubPartitions && csSplit->cost > bestCostSoFar*threshold ) + if( currArea.Cb().contains( ptu->Cb() ) || ( !ptu->Cb().valid() && currArea.Y().contains( ptu->Y() ) ) ) { - earlySkipISP = true; - splitIsSelected = false; - break; + TU::setCbfAtDepth( *ptu, COMPONENT_Cb, currDepth, SplitCbfs.Cb ); + TU::setCbfAtDepth( *ptu, COMPONENT_Cr, currDepth, SplitCbfs.Cr ); } } } + } + } + return cbfs; +} +uint64_t IntraSearch::xFracModeBitsIntra(PredictionUnit &pu, const uint32_t &uiMode, const ChannelType &chType) +{ + uint32_t orgMode = uiMode; - } while( partitioner.nextPart( *csSplit ) ); + if (!pu.ciipFlag) + std::swap(orgMode, pu.intraDir[chType]); - partitioner.exitCurrSplit(); + m_CABACEstimator->resetBits(); - if( splitIsSelected ) + if( isLuma( chType ) ) + { + if (!pu.ciipFlag) { - for( auto &ptu : csSplit->tus ) - { - if( currArea.Y().contains( ptu->Y() ) ) - { - TU::setCbfAtDepth( *ptu, COMPONENT_Y, currDepth, uiSplitCbfLuma ? 1 : 0 ); - } - } + m_CABACEstimator->intra_luma_pred_mode(pu); + } + } + else + { + m_CABACEstimator->intra_chroma_pred_mode( pu ); + } - //----- restore context states ----- - m_CABACEstimator->getCtx() = ctxStart; + if ( !pu.ciipFlag ) + std::swap(orgMode, pu.intraDir[chType]); - //----- determine rate and r-d cost ----- - csSplit->fracBits = xGetIntraFracBitsQT( *csSplit, partitioner, true, false, cu.ispMode ? 0 : -1, ispType ); + return m_CABACEstimator->getEstFracBits(); +} - //--- update cost --- - csSplit->cost = m_pcRdCost->calcRdCost(csSplit->fracBits, csSplit->dist); - } +void IntraSearch::sortRdModeListFirstColorSpace(ModeInfo mode, double cost, char bdpcmMode, ModeInfo* rdModeList, double* rdCostList, char* bdpcmModeList, int& candNum) +{ + if (candNum == 0) + { + rdModeList[0] = mode; + rdCostList[0] = cost; + bdpcmModeList[0] = bdpcmMode; + candNum++; + return; } - if( csFull || csSplit ) + int insertPos = -1; + for (int pos = candNum - 1; pos >= 0; pos--) { + if (cost < rdCostList[pos]) { - // otherwise this would've happened in useSubStructure - cs.picture->getRecoBuf( currArea.Y() ).copyFrom( cs.getRecoBuf( currArea.Y() ) ); - cs.picture->getPredBuf( currArea.Y() ).copyFrom( cs.getPredBuf( currArea.Y() ) ); + insertPos = pos; } + } - if( cu.ispMode && earlySkipISP ) - { - cs.cost = MAX_DOUBLE; - } - else + if (insertPos >= 0) + { + for (int i = candNum - 1; i >= insertPos; i--) { - cs.cost = m_pcRdCost->calcRdCost( cs.fracBits, cs.dist ); + rdModeList[i + 1] = rdModeList[i]; + rdCostList[i + 1] = rdCostList[i]; + bdpcmModeList[i + 1] = bdpcmModeList[i]; } + rdModeList[insertPos] = mode; + rdCostList[insertPos] = cost; + bdpcmModeList[insertPos] = bdpcmMode; + candNum++; + } + else + { + rdModeList[candNum] = mode; + rdCostList[candNum] = cost; + bdpcmModeList[candNum] = bdpcmMode; + candNum++; } -} - -ChromaCbfs IntraSearch::xRecurIntraChromaCodingQT( CodingStructure &cs, Partitioner& partitioner, const double bestCostSoFar, const PartSplit ispType ) -{ - UnitArea currArea = partitioner.currArea(); - const bool keepResi = cs.sps->getUseLMChroma() || KEEP_PRED_AND_RESI_SIGNALS; - if( !currArea.Cb().valid() ) return ChromaCbfs( false ); + CHECK(candNum > FAST_UDI_MAX_RDMODE_NUM, "exceed intra mode candidate list capacity"); - TransformUnit &currTU = *cs.getTU( currArea.chromaPos(), CHANNEL_TYPE_CHROMA ); - const PredictionUnit &pu = *cs.getPU( currArea.chromaPos(), CHANNEL_TYPE_CHROMA ); + return; +} - bool lumaUsesISP = !CS::isDualITree( cs ) && currTU.cu->ispMode; - uint32_t currDepth = partitioner.currTrDepth; - const PPS &pps = *cs.pps; - ChromaCbfs cbfs ( false ); +void IntraSearch::invalidateBestRdModeFirstColorSpace() +{ + int numSaveRdClass = 4 * NUM_LFNST_NUM_PER_SET * 2; + int savedRdModeListSize = FAST_UDI_MAX_RDMODE_NUM; - if (currDepth == currTU.depth) + for (int i = 0; i < numSaveRdClass; i++) { - if (!currArea.Cb().valid() || !currArea.Cr().valid()) + m_numSavedRdModeFirstColorSpace[i] = 0; + for (int j = 0; j < savedRdModeListSize; j++) { - return cbfs; + m_savedRdModeFirstColorSpace[i][j] = ModeInfo(false, false, 0, NOT_INTRA_SUBPARTITIONS, 0); + m_savedBDPCMModeFirstColorSpace[i][j] = 0; + m_savedRdCostFirstColorSpace[i][j] = MAX_DOUBLE; } + } +} +template<typename T, size_t N> +void IntraSearch::reduceHadCandList(static_vector<T, N>& candModeList, static_vector<double, N>& candCostList, int& numModesForFullRD, const double thresholdHadCost, const double* mipHadCost, const PredictionUnit &pu, const bool fastMip) +{ + const int maxCandPerType = numModesForFullRD >> 1; + static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM> tempRdModeList; + static_vector<double, FAST_UDI_MAX_RDMODE_NUM> tempCandCostList; + const double minCost = candCostList[0]; + bool keepOneMip = candModeList.size() > numModesForFullRD; + + int numConv = 0; + int numMip = 0; + for (int idx = 0; idx < candModeList.size() - (keepOneMip?0:1); idx++) + { + bool addMode = false; + const ModeInfo& orgMode = candModeList[idx]; - CodingStructure &saveCS = *m_pSaveCS[1]; - saveCS.pcv = cs.pcv; - saveCS.picture = cs.picture; - saveCS.area.repositionTo( cs.area ); - saveCS.initStructData( MAX_INT, false, true ); - - if( !CS::isDualITree( cs ) && currTU.cu->ispMode ) + if (!orgMode.mipFlg) { - saveCS.clearCUs(); - CodingUnit& auxCU = saveCS.addCU( *currTU.cu, partitioner.chType ); - auxCU.ispMode = currTU.cu->ispMode; - saveCS.sps = currTU.cs->sps; - saveCS.clearPUs(); - saveCS.addPU( *currTU.cu->firstPU, partitioner.chType ); + addMode = (numConv < 3); + numConv += addMode ? 1:0; } + else + { + addMode = ( numMip < maxCandPerType || (candCostList[idx] < thresholdHadCost * minCost) || keepOneMip ); + keepOneMip = false; + numMip += addMode ? 1:0; + } + if( addMode ) + { + tempRdModeList.push_back(orgMode); + tempCandCostList.push_back(candCostList[idx]); + } + } - TransformUnit &tmpTU = saveCS.addTU(currArea, partitioner.chType); + if ((pu.lwidth() > 8 && pu.lheight() > 8)) + { + // Sort MIP candidates by Hadamard cost + const int transpOff = getNumModesMip( pu.Y() ); + static_vector<uint8_t, FAST_UDI_MAX_RDMODE_NUM> sortedMipModes(0); + static_vector<double, FAST_UDI_MAX_RDMODE_NUM> sortedMipCost(0); + for( uint8_t mode : { 0, 1, 2 } ) + { + uint8_t candMode = mode + uint8_t((mipHadCost[mode + transpOff] < mipHadCost[mode]) ? transpOff : 0); + updateCandList(candMode, mipHadCost[candMode], sortedMipModes, sortedMipCost, 3); + } + // Append MIP mode to RD mode list + const int modeListSize = int(tempRdModeList.size()); + for (int idx = 0; idx < 3; idx++) + { + const bool isTransposed = (sortedMipModes[idx] >= transpOff ? true : false); + const uint32_t mipIdx = (isTransposed ? sortedMipModes[idx] - transpOff : sortedMipModes[idx]); + const ModeInfo mipMode( true, isTransposed, 0, NOT_INTRA_SUBPARTITIONS, mipIdx ); + bool alreadyIncluded = false; + for (int modeListIdx = 0; modeListIdx < modeListSize; modeListIdx++) + { + if (tempRdModeList[modeListIdx] == mipMode) + { + alreadyIncluded = true; + break; + } + } - cs.setDecomp(currArea.Cb(), true); // set in advance (required for Cb2/Cr2 in 4:2:2 video) + if (!alreadyIncluded) + { + tempRdModeList.push_back(mipMode); + tempCandCostList.push_back(0); + if( fastMip ) break; + } + } + } - const unsigned numTBlocks = ::getNumberValidTBlocks( *cs.pcv ); + candModeList = tempRdModeList; + candCostList = tempCandCostList; + numModesForFullRD = int(candModeList.size()); +} - for( uint32_t c = COMPONENT_Cb; c < numTBlocks; c++) - { - const ComponentID compID = ComponentID(c); - const CompArea& area = currTU.blocks[compID]; +// It decides which modes from the ISP lists can be full RD tested +void IntraSearch::xGetNextISPMode(ModeInfo& modeInfo, const ModeInfo* lastMode, const Size cuSize) +{ + static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM>* rdModeLists[2] = { &m_ispCandListHor, &m_ispCandListVer }; - double dSingleCost = MAX_DOUBLE; - int bestModeId = 0; - Distortion singleDistC = 0; - Distortion singleDistCTmp = 0; - double singleCostTmp = 0; + const int curIspLfnstIdx = m_curIspLfnstIdx; + if (curIspLfnstIdx >= NUM_LFNST_NUM_PER_SET) + { + //All lfnst indices have been checked + return; + } - const bool checkCrossComponentPrediction = PU::isChromaIntraModeCrossCheckMode( pu ) && pps.getPpsRangeExtension().getCrossComponentPredictionEnabledFlag() && TU::getCbf( currTU, COMPONENT_Y ); + ISPType nextISPcandSplitType; + auto& ispTestedModes = m_ispTestedModes[curIspLfnstIdx]; + const bool horSplitIsTerminated = ispTestedModes.splitIsFinished[HOR_INTRA_SUBPARTITIONS - 1]; + const bool verSplitIsTerminated = ispTestedModes.splitIsFinished[VER_INTRA_SUBPARTITIONS - 1]; + if (!horSplitIsTerminated && !verSplitIsTerminated) + { + nextISPcandSplitType = !lastMode ? HOR_INTRA_SUBPARTITIONS : lastMode->ispMod == HOR_INTRA_SUBPARTITIONS ? VER_INTRA_SUBPARTITIONS : HOR_INTRA_SUBPARTITIONS; + } + else if (!horSplitIsTerminated && verSplitIsTerminated) + { + nextISPcandSplitType = HOR_INTRA_SUBPARTITIONS; + } + else if (horSplitIsTerminated && !verSplitIsTerminated) + { + nextISPcandSplitType = VER_INTRA_SUBPARTITIONS; + } + else + { + xFinishISPModes(); + return; // no more modes will be tested + } - const int crossCPredictionModesToTest = checkCrossComponentPrediction ? 2 : 1; - const int totalModesToTest = crossCPredictionModesToTest; - const bool isOneMode = (totalModesToTest == 1); + int maxNumSubPartitions = ispTestedModes.numTotalParts[nextISPcandSplitType - 1]; - int currModeId = 0; - int default0Save1Load2 = 0; + // We try to break the split here for lfnst > 0 according to the first mode + if (curIspLfnstIdx > 0 && ispTestedModes.numTestedModes[nextISPcandSplitType - 1] == 1) + { + int firstModeThisSplit = ispTestedModes.getTestedIntraMode(nextISPcandSplitType, 0); + int numSubPartsFirstModeThisSplit = ispTestedModes.getNumCompletedSubParts(nextISPcandSplitType, firstModeThisSplit); + CHECK(numSubPartsFirstModeThisSplit < 0, "wrong number of subpartitions!"); + bool stopThisSplit = false; + bool stopThisSplitAllLfnsts = false; + if (numSubPartsFirstModeThisSplit < maxNumSubPartitions) + { + stopThisSplit = true; + if (m_pcEncCfg->getUseFastISP() && curIspLfnstIdx == 1 && numSubPartsFirstModeThisSplit < maxNumSubPartitions - 1) + { + stopThisSplitAllLfnsts = true; + } + } - TempCtx ctxStart ( m_CtxCache ); - TempCtx ctxBest ( m_CtxCache ); + if (stopThisSplit) + { + ispTestedModes.splitIsFinished[nextISPcandSplitType - 1] = true; + if (curIspLfnstIdx == 1 && stopThisSplitAllLfnsts) + { + m_ispTestedModes[2].splitIsFinished[nextISPcandSplitType - 1] = true; + } + return; + } + } - if (!isOneMode) + // We try to break the split here for lfnst = 0 or all lfnst indices according to the first two modes + if (curIspLfnstIdx == 0 && ispTestedModes.numTestedModes[nextISPcandSplitType - 1] == 2) + { + // Split stop criteria after checking the performance of previously tested intra modes + const int thresholdSplit1 = maxNumSubPartitions; + bool stopThisSplit = false; + bool stopThisSplitForAllLFNSTs = false; + const int thresholdSplit1ForAllLFNSTs = maxNumSubPartitions - 1; + + int mode1 = ispTestedModes.getTestedIntraMode((ISPType)nextISPcandSplitType, 0); + mode1 = mode1 == DC_IDX ? -1 : mode1; + int numSubPartsBestMode1 = mode1 != -1 ? ispTestedModes.getNumCompletedSubParts((ISPType)nextISPcandSplitType, mode1) : -1; + int mode2 = ispTestedModes.getTestedIntraMode((ISPType)nextISPcandSplitType, 1); + mode2 = mode2 == DC_IDX ? -1 : mode2; + int numSubPartsBestMode2 = mode2 != -1 ? ispTestedModes.getNumCompletedSubParts((ISPType)nextISPcandSplitType, mode2) : -1; + + // 1) The 2 most promising modes do not reach a certain number of sub-partitions + if (numSubPartsBestMode1 != -1 && numSubPartsBestMode2 != -1) + { + if (numSubPartsBestMode1 < thresholdSplit1 && numSubPartsBestMode2 < thresholdSplit1) { - ctxStart = m_CABACEstimator->getCtx(); + stopThisSplit = true; + if (curIspLfnstIdx == 0 && numSubPartsBestMode1 < thresholdSplit1ForAllLFNSTs && numSubPartsBestMode2 < thresholdSplit1ForAllLFNSTs) + { + stopThisSplitForAllLFNSTs = true; + } } - + else { - for (int crossCPredictionModeId = 0; crossCPredictionModeId < crossCPredictionModesToTest; crossCPredictionModeId++) + //we stop also if the cost is MAX_DOUBLE for both modes + double mode1Cost = ispTestedModes.getRDCost(nextISPcandSplitType, mode1); + double mode2Cost = ispTestedModes.getRDCost(nextISPcandSplitType, mode2); + if (!(mode1Cost < MAX_DOUBLE || mode2Cost < MAX_DOUBLE)) { - currTU.compAlpha [compID] = 0; - - currModeId++; - - const bool isFirstMode = (currModeId == 1); - const bool isLastMode = (currModeId == totalModesToTest); // currModeId is indexed from 1 - - if (isOneMode) - { - default0Save1Load2 = 0; - } - else if (!isOneMode && (crossCPredictionModeId == 0)) - { - default0Save1Load2 = 1; //save prediction on first mode - } - else - { - default0Save1Load2 = 2; //load it on subsequent modes - } - - if (!isFirstMode) // if not first mode to be tested - { - m_CABACEstimator->getCtx() = ctxStart; - } - - singleDistCTmp = 0; - - xIntraCodingTUBlock( currTU, compID, crossCPredictionModeId != 0, singleDistCTmp, default0Save1Load2 ); + stopThisSplit = true; + } + } + } - if( ( ( crossCPredictionModeId == 1 ) && ( currTU.compAlpha[compID] == 0 ) ) ) //In order not to code TS flag when cbf is zero, the case for TS with cbf being zero is forbidden. - { - singleCostTmp = MAX_DOUBLE; - } - else if( lumaUsesISP && bestCostSoFar != MAX_DOUBLE && c == COMPONENT_Cb ) + if (!stopThisSplit) + { + // 2) One split type may be discarded by comparing the number of sub-partitions of the best angle modes of both splits + ISPType otherSplit = nextISPcandSplitType == HOR_INTRA_SUBPARTITIONS ? VER_INTRA_SUBPARTITIONS : HOR_INTRA_SUBPARTITIONS; + int numSubPartsBestMode2OtherSplit = mode2 != -1 ? ispTestedModes.getNumCompletedSubParts(otherSplit, mode2) : -1; + if (numSubPartsBestMode2OtherSplit != -1 && numSubPartsBestMode2 != -1 && ispTestedModes.bestSplitSoFar != nextISPcandSplitType) + { + if (numSubPartsBestMode2OtherSplit > numSubPartsBestMode2) + { + stopThisSplit = true; + } + // both have the same number of subpartitions + else if (numSubPartsBestMode2OtherSplit == numSubPartsBestMode2) + { + // both have the maximum number of subpartitions, so it compares RD costs to decide + if (numSubPartsBestMode2OtherSplit == maxNumSubPartitions) { - uint64_t fracBitsTmp = xGetIntraFracBitsQTSingleChromaComponent( cs, partitioner, ComponentID( c ) ); - singleCostTmp = m_pcRdCost->calcRdCost( fracBitsTmp, singleDistCTmp ); - if( isOneMode || ( !isOneMode && !isLastMode ) ) + double rdCostBestMode2ThisSplit = ispTestedModes.getRDCost(nextISPcandSplitType, mode2); + double rdCostBestMode2OtherSplit = ispTestedModes.getRDCost(otherSplit, mode2); + double threshold = 1.3; + if (rdCostBestMode2ThisSplit == MAX_DOUBLE || rdCostBestMode2OtherSplit < rdCostBestMode2ThisSplit * threshold) { - m_CABACEstimator->getCtx() = ctxStart; + stopThisSplit = true; } } - else if( !isOneMode ) - { - uint64_t fracBitsTmp = xGetIntraFracBitsQTChroma( currTU, compID ); - singleCostTmp = m_pcRdCost->calcRdCost( fracBitsTmp, singleDistCTmp ); - } - - if( singleCostTmp < dSingleCost ) + else // none of them reached the maximum number of subpartitions with the best angle modes, so it compares the results with the the planar mode { - dSingleCost = singleCostTmp; - singleDistC = singleDistCTmp; - bestModeId = currModeId; - - if( !isLastMode ) + int numSubPartsBestMode1OtherSplit = mode1 != -1 ? ispTestedModes.getNumCompletedSubParts(otherSplit, mode1) : -1; + if (numSubPartsBestMode1OtherSplit != -1 && numSubPartsBestMode1 != -1 && numSubPartsBestMode1OtherSplit > numSubPartsBestMode1) { -#if KEEP_PRED_AND_RESI_SIGNALS - saveCS.getPredBuf (area).copyFrom(cs.getPredBuf (area)); - saveCS.getOrgResiBuf(area).copyFrom(cs.getOrgResiBuf(area)); -#endif - saveCS.getPredBuf (area).copyFrom(cs.getPredBuf (area)); - if( keepResi ) - { - saveCS.getResiBuf (area).copyFrom(cs.getResiBuf (area)); - } - saveCS.getRecoBuf (area).copyFrom(cs.getRecoBuf (area)); - - tmpTU.copyComponentFrom(currTU, compID); - - ctxBest = m_CABACEstimator->getCtx(); + stopThisSplit = true; } } } } - - if( lumaUsesISP && dSingleCost > bestCostSoFar && c == COMPONENT_Cb ) + } + if (stopThisSplit) + { + ispTestedModes.splitIsFinished[nextISPcandSplitType - 1] = true; + if (stopThisSplitForAllLFNSTs) { - //Luma + Cb cost is already larger than the best cost, so we don't need to test Cr - cs.dist = MAX_UINT; - m_CABACEstimator->getCtx() = ctxStart; - break; - //return cbfs; + for (int lfnstIdx = 1; lfnstIdx < NUM_LFNST_NUM_PER_SET; lfnstIdx++) + { + m_ispTestedModes[lfnstIdx].splitIsFinished[nextISPcandSplitType - 1] = true; + } } + return; + } + } + + // Now a new mode is retrieved from the list and it has to be decided whether it should be tested or not + if (ispTestedModes.candIndexInList[nextISPcandSplitType - 1] < rdModeLists[nextISPcandSplitType - 1]->size()) + { + ModeInfo candidate = rdModeLists[nextISPcandSplitType - 1]->at(ispTestedModes.candIndexInList[nextISPcandSplitType - 1]); + ispTestedModes.candIndexInList[nextISPcandSplitType - 1]++; - if (bestModeId < totalModesToTest) + // extra modes are only tested if ISP has won so far + if (ispTestedModes.candIndexInList[nextISPcandSplitType - 1] > ispTestedModes.numOrigModesToTest) + { + if (ispTestedModes.bestSplitSoFar != candidate.ispMod || ispTestedModes.bestModeSoFar == PLANAR_IDX) { -#if KEEP_PRED_AND_RESI_SIGNALS - cs.getPredBuf (area).copyFrom(saveCS.getPredBuf (area)); - cs.getOrgResiBuf(area).copyFrom(saveCS.getOrgResiBuf(area)); -#endif - cs.getPredBuf (area).copyFrom(saveCS.getPredBuf (area)); - if( keepResi ) - { - cs.getResiBuf (area).copyFrom(saveCS.getResiBuf (area)); - } - cs.getRecoBuf (area).copyFrom(saveCS.getRecoBuf (area)); + ispTestedModes.splitIsFinished[nextISPcandSplitType - 1] = true; + return; + } + } - currTU.copyComponentFrom(tmpTU, compID); + bool testCandidate = true; - m_CABACEstimator->getCtx() = ctxBest; + // we look for a reference mode that has already been tested within the window and decide to test the new one according to the reference mode costs + if (maxNumSubPartitions > 2 && (curIspLfnstIdx > 0 || (candidate.modeId >= DC_IDX && ispTestedModes.numTestedModes[nextISPcandSplitType - 1] >= 2))) + { + int refLfnstIdx = -1; + const int angWindowSize = 5; + int numSubPartsLeftMode, numSubPartsRightMode, numSubPartsRefMode, leftIntraMode = -1, rightIntraMode = -1; + int windowSize = candidate.modeId > DC_IDX ? angWindowSize : 1; + int numSamples = cuSize.width << floorLog2(cuSize.height); + int numSubPartsLimit = numSamples >= 256 ? maxNumSubPartitions - 1 : 2; + + xFindAlreadyTestedNearbyIntraModes(curIspLfnstIdx, (int)candidate.modeId, &refLfnstIdx, &leftIntraMode, &rightIntraMode, (ISPType)candidate.ispMod, windowSize); + + if (refLfnstIdx != -1 && refLfnstIdx != curIspLfnstIdx) + { + CHECK(leftIntraMode != candidate.modeId || rightIntraMode != candidate.modeId, "wrong intra mode and lfnstIdx values!"); + numSubPartsRefMode = m_ispTestedModes[refLfnstIdx].getNumCompletedSubParts((ISPType)candidate.ispMod, candidate.modeId); + CHECK(numSubPartsRefMode <= 0, "Wrong value of the number of subpartitions completed!"); } + else + { + numSubPartsLeftMode = leftIntraMode != -1 ? ispTestedModes.getNumCompletedSubParts((ISPType)candidate.ispMod, leftIntraMode) : -1; + numSubPartsRightMode = rightIntraMode != -1 ? ispTestedModes.getNumCompletedSubParts((ISPType)candidate.ispMod, rightIntraMode) : -1; - cs.picture->getPredBuf(area).copyFrom(cs.getPredBuf(area)); - cs.picture->getRecoBuf(area).copyFrom(cs.getRecoBuf(area)); + numSubPartsRefMode = std::max(numSubPartsLeftMode, numSubPartsRightMode); + } - cbfs.cbf(compID) = TU::getCbf(currTU, compID); + if (numSubPartsRefMode > 0) + { + // The mode was found. Now we check the condition + testCandidate = numSubPartsRefMode > numSubPartsLimit; + } + } - cs.dist += singleDistC; + if (testCandidate) + { + modeInfo = candidate; } } else { - unsigned numValidTBlocks = ::getNumberValidTBlocks( *cs.pcv ); - ChromaCbfs SplitCbfs ( false ); + //the end of the list was reached, so the split is invalidated + ispTestedModes.splitIsFinished[nextISPcandSplitType - 1] = true; + } +} - if( partitioner.canSplit( TU_MAX_TR_SPLIT, cs ) ) +void IntraSearch::xFindAlreadyTestedNearbyIntraModes(int lfnstIdx, int currentIntraMode, int* refLfnstIdx, int* leftIntraMode, int* rightIntraMode, ISPType ispOption, int windowSize) +{ + bool leftModeFound = false, rightModeFound = false; + *leftIntraMode = -1; + *rightIntraMode = -1; + *refLfnstIdx = -1; + const unsigned st = ispOption - 1; + + //first we check if the exact intra mode was already tested for another lfnstIdx value + if (lfnstIdx > 0) + { + bool sameIntraModeFound = false; + if (lfnstIdx == 2 && m_ispTestedModes[1].modeHasBeenTested[currentIntraMode][st]) { - partitioner.splitCurrArea( TU_MAX_TR_SPLIT, cs ); + sameIntraModeFound = true; + *refLfnstIdx = 1; } - else if( currTU.cu->ispMode ) + else if (m_ispTestedModes[0].modeHasBeenTested[currentIntraMode][st]) { - partitioner.splitCurrArea( ispType, cs ); + sameIntraModeFound = true; + *refLfnstIdx = 0; } - else - THROW( "Implicit TU split not available" ); - do + if (sameIntraModeFound) { - ChromaCbfs subCbfs = xRecurIntraChromaCodingQT( cs, partitioner, bestCostSoFar, ispType ); - - for( uint32_t ch = COMPONENT_Cb; ch < numValidTBlocks; ch++ ) - { - const ComponentID compID = ComponentID( ch ); - SplitCbfs.cbf( compID ) |= subCbfs.cbf( compID ); - } - } while( partitioner.nextPart( cs ) ); + *leftIntraMode = currentIntraMode; + *rightIntraMode = currentIntraMode; + return; + } + } - partitioner.exitCurrSplit(); + //The mode has not been checked for another lfnstIdx value, so now we look for a similar mode within a window using the same lfnstIdx + for (int k = 1; k <= windowSize; k++) + { + int off = currentIntraMode - 2 - k; + int leftMode = (off < 0) ? NUM_LUMA_MODE + off : currentIntraMode - k; + int rightMode = currentIntraMode > DC_IDX ? (((int)currentIntraMode - 2 + k) % 65) + 2 : PLANAR_IDX; - if( lumaUsesISP && cs.dist == MAX_UINT ) + leftModeFound = leftMode != (int)currentIntraMode ? m_ispTestedModes[lfnstIdx].modeHasBeenTested[leftMode][st] : false; + rightModeFound = rightMode != (int)currentIntraMode ? m_ispTestedModes[lfnstIdx].modeHasBeenTested[rightMode][st] : false; + if (leftModeFound || rightModeFound) { - return cbfs; + *leftIntraMode = leftModeFound ? leftMode : -1; + *rightIntraMode = rightModeFound ? rightMode : -1; + *refLfnstIdx = lfnstIdx; + break; } - { + } +} - cbfs.Cb |= SplitCbfs.Cb; - cbfs.Cr |= SplitCbfs.Cr; +//It prepares the list of potential intra modes candidates that will be tested using RD costs +bool IntraSearch::xSortISPCandList(double bestCostSoFar, double bestNonISPCost, ModeInfo bestNonISPMode) +{ + int bestISPModeInRelCU = -1; + m_modeCtrl->setStopNonDCT2Transforms(false); - if( !lumaUsesISP ) + if (m_pcEncCfg->getUseFastISP()) + { + //we check if the ISP tests can be cancelled + double thSkipISP = 1.4; + if (bestNonISPCost > bestCostSoFar * thSkipISP) + { + for (int splitIdx = 0; splitIdx < NUM_INTRA_SUBPARTITIONS_MODES - 1; splitIdx++) { - for( auto &ptu : cs.tus ) + for (int j = 0; j < NUM_LFNST_NUM_PER_SET; j++) { - if( currArea.Cb().contains( ptu->Cb() ) || ( !ptu->Cb().valid() && currArea.Y().contains( ptu->Y() ) ) ) - { - TU::setCbfAtDepth( *ptu, COMPONENT_Cb, currDepth, SplitCbfs.Cb ); - TU::setCbfAtDepth( *ptu, COMPONENT_Cr, currDepth, SplitCbfs.Cr ); - } + m_ispTestedModes[j].splitIsFinished[splitIdx] = true; } } + return false; + } + if (!updateISPStatusFromRelCU(bestNonISPCost, bestNonISPMode, bestISPModeInRelCU)) + { + return false; } } - return cbfs; -} + for (int k = 0; k < m_ispCandListHor.size(); k++) + { + m_ispCandListHor.at(k).ispMod = HOR_INTRA_SUBPARTITIONS; //we set the correct ISP split type value + } -uint64_t IntraSearch::xFracModeBitsIntra(PredictionUnit &pu, const uint32_t &uiMode, const ChannelType &chType) -{ - uint32_t orgMode = uiMode; + auto origHadList = m_ispCandListHor; // save the original hadamard list of regular intra + bool modeIsInList[NUM_LUMA_MODE] = { false }; - if (!pu.mhIntraFlag) - std::swap(orgMode, pu.intraDir[chType]); + m_ispCandListHor.clear(); + m_ispCandListVer.clear(); - m_CABACEstimator->resetBits(); + // we sort the normal intra modes according to their full RD costs + std::sort(m_regIntraRDListWithCosts.begin(), m_regIntraRDListWithCosts.end(), ModeInfoWithCost::compareModeInfoWithCost); - if( isLuma( chType ) ) + // we get the best angle from the regular intra list + int bestNormalIntraAngle = -1; + for (int modeIdx = 0; modeIdx < m_regIntraRDListWithCosts.size(); modeIdx++) { - if ( pu.mhIntraFlag ) - m_CABACEstimator->MHIntra_luma_pred_modes(*pu.cu); - else + if (bestNormalIntraAngle == -1 && m_regIntraRDListWithCosts.at(modeIdx).modeId > DC_IDX) { - m_CABACEstimator->extend_ref_line(pu); - m_CABACEstimator->intra_luma_pred_mode(pu); + bestNormalIntraAngle = m_regIntraRDListWithCosts.at(modeIdx).modeId; + break; } } - else + + int mode1 = PLANAR_IDX; + int mode2 = bestNormalIntraAngle; + + ModeInfo refMode = origHadList.at(0); + auto* destListPtr = &m_ispCandListHor; + //List creation + + if (m_pcEncCfg->getUseFastISP() && bestISPModeInRelCU != -1) //RelCU intra mode { - m_CABACEstimator->intra_chroma_pred_mode( pu ); + destListPtr->push_back(ModeInfo(refMode.mipFlg, refMode.mipTrFlg, refMode.mRefId, refMode.ispMod, bestISPModeInRelCU)); + modeIsInList[bestISPModeInRelCU] = true; } - if ( !pu.mhIntraFlag ) - std::swap(orgMode, pu.intraDir[chType]); + // Planar + if (!modeIsInList[mode1]) + { + destListPtr->push_back(ModeInfo(refMode.mipFlg, refMode.mipTrFlg, refMode.mRefId, refMode.ispMod, mode1)); + modeIsInList[mode1] = true; + } + // Best angle in regular intra + if (mode2 != -1 && !modeIsInList[mode2]) + { + destListPtr->push_back(ModeInfo(refMode.mipFlg, refMode.mipTrFlg, refMode.mRefId, refMode.ispMod, mode2)); + modeIsInList[mode2] = true; + } + // Remaining regular intra modes that were full RD tested (except DC, which is added after the angles from regular intra) + int dcModeIndex = -1; + for (int remModeIdx = 0; remModeIdx < m_regIntraRDListWithCosts.size(); remModeIdx++) + { + int currentMode = m_regIntraRDListWithCosts.at(remModeIdx).modeId; + if (currentMode != mode1 && currentMode != mode2 && !modeIsInList[currentMode]) + { + if (currentMode > DC_IDX) + { + destListPtr->push_back(ModeInfo(refMode.mipFlg, refMode.mipTrFlg, refMode.mRefId, refMode.ispMod, currentMode)); + modeIsInList[currentMode] = true; + } + else if (currentMode == DC_IDX) + { + dcModeIndex = remModeIdx; + } + } + } - return m_CABACEstimator->getEstFracBits(); -} + // DC is added after the angles from regular intra + if (dcModeIndex != -1 && !modeIsInList[DC_IDX]) + { + destListPtr->push_back(ModeInfo(refMode.mipFlg, refMode.mipTrFlg, refMode.mRefId, refMode.ispMod, DC_IDX)); + modeIsInList[DC_IDX] = true; + } + // We add extra candidates to the list that will only be tested if ISP is likely to win + for (int j = 0; j < NUM_LFNST_NUM_PER_SET; j++) + { + m_ispTestedModes[j].numOrigModesToTest = (int)destListPtr->size(); + } + const int addedModesFromHadList = 3; + int newModesAdded = 0; + for (int k = 0; k < origHadList.size(); k++) + { + if (newModesAdded == addedModesFromHadList) + { + break; + } + if (!modeIsInList[origHadList.at(k).modeId]) + { + destListPtr->push_back( ModeInfo( refMode.mipFlg, refMode.mipTrFlg, refMode.mRefId, refMode.ispMod, origHadList.at(k).modeId ) ); + newModesAdded++; + } + } -void IntraSearch::encPredIntraDPCM( const ComponentID &compID, PelBuf &pOrg, PelBuf &pDst, const uint32_t &uiDirMode ) -{ - CHECK( pOrg.buf == 0, "Encoder DPCM called without original buffer" ); + if (m_pcEncCfg->getUseFastISP() && bestISPModeInRelCU != -1) + { + destListPtr->resize(1); + } - const int srcStride = m_topRefLength + 1; - CPelBuf pSrc = CPelBuf(getPredictorPtr(compID), srcStride, m_leftRefLength + 1); + // Copy modes to other split-type list + m_ispCandListVer = m_ispCandListHor; + for (int i = 0; i < m_ispCandListVer.size(); i++) + { + m_ispCandListVer[i].ispMod = VER_INTRA_SUBPARTITIONS; + } - // Sample Adaptive intra-Prediction (SAP) - if( uiDirMode == HOR_IDX ) + // Reset the tested modes information to 0 + for (int j = 0; j < NUM_LFNST_NUM_PER_SET; j++) { - // left column filled with reference samples, remaining columns filled with pOrg data - for( int y = 0; y < pDst.height; y++ ) + for (int i = 0; i < m_ispCandListHor.size(); i++) { - pDst.at( 0, y ) = pSrc.at( 0, 1 + y ); + m_ispTestedModes[j].clearISPModeInfo(m_ispCandListHor[i].modeId); } - CPelBuf orgRest = pOrg.subBuf( 0, 0, pOrg.width - 1, pOrg.height ); - PelBuf predRest = pDst.subBuf( 1, 0, pDst.width - 1, pDst.height ); + } + return true; +} + +void IntraSearch::xSortISPCandListLFNST() +{ + //It resorts the list of intra mode candidates for lfnstIdx > 0 by checking the RD costs for lfnstIdx = 0 + ISPTestedModesInfo& ispTestedModesRef = m_ispTestedModes[0]; + for (int splitIdx = 0; splitIdx < NUM_INTRA_SUBPARTITIONS_MODES - 1; splitIdx++) + { + ISPType ispMode = splitIdx ? VER_INTRA_SUBPARTITIONS : HOR_INTRA_SUBPARTITIONS; + if (!m_ispTestedModes[m_curIspLfnstIdx].splitIsFinished[splitIdx] && ispTestedModesRef.testedModes[splitIdx].size() > 1) + { + auto& candList = ispMode == HOR_INTRA_SUBPARTITIONS ? m_ispCandListHor : m_ispCandListVer; + int bestModeId = candList[1].modeId > DC_IDX ? candList[1].modeId : -1; + int bestSubParts = candList[1].modeId > DC_IDX ? ispTestedModesRef.getNumCompletedSubParts(ispMode, bestModeId) : -1; + double bestCost = candList[1].modeId > DC_IDX ? ispTestedModesRef.getRDCost(ispMode, bestModeId) : MAX_DOUBLE; + for (int i = 0; i < candList.size(); i++) + { + const int candSubParts = ispTestedModesRef.getNumCompletedSubParts(ispMode, candList[i].modeId); + const double candCost = ispTestedModesRef.getRDCost(ispMode, candList[i].modeId); + if (candSubParts > bestSubParts || candCost < bestCost) + { + bestModeId = candList[i].modeId; + bestCost = candCost; + bestSubParts = candSubParts; + } + } - predRest.copyFrom( orgRest ); + if (bestModeId != -1) + { + if (bestModeId != candList[0].modeId) + { + auto prevMode = candList[0]; + candList[0].modeId = bestModeId; + for (int i = 1; i < candList.size(); i++) + { + auto nextMode = candList[i]; + candList[i] = prevMode; + if (nextMode.modeId == bestModeId) + { + break; + } + prevMode = nextMode; + } + } + } + } } - else // VER_IDX +} + +bool IntraSearch::updateISPStatusFromRelCU( double bestNonISPCostCurrCu, ModeInfo bestNonISPModeCurrCu, int& bestISPModeInRelCU ) +{ + //It compares the data of a related CU with the current CU to cancel or reduce the ISP tests + bestISPModeInRelCU = -1; + if (m_modeCtrl->getRelatedCuIsValid()) { - // top row filled with reference samples, remaining rows filled with pOrg data - for( int x = 0; x < pDst.width; x++ ) + double bestNonISPCostRelCU = m_modeCtrl->getBestDCT2NonISPCostRelCU(); + double costRatio = bestNonISPCostCurrCu / bestNonISPCostRelCU; + bool bestModeRelCuIsMip = (m_modeCtrl->getIspPredModeValRelCU() >> 5) & 0x1; + bool bestModeCurrCuIsMip = bestNonISPModeCurrCu.mipFlg; + int relatedCuIntraMode = m_modeCtrl->getIspPredModeValRelCU() >> 9; + bool isSameTypeOfMode = (bestModeRelCuIsMip && bestModeCurrCuIsMip) || (!bestModeRelCuIsMip && !bestModeCurrCuIsMip); + bool bothModesAreAngular = bestNonISPModeCurrCu.modeId > DC_IDX && relatedCuIntraMode > DC_IDX; + bool modesAreComparable = isSameTypeOfMode && (bestModeCurrCuIsMip || bestNonISPModeCurrCu.modeId == relatedCuIntraMode || (bothModesAreAngular && abs(relatedCuIntraMode - (int)bestNonISPModeCurrCu.modeId) <= 5)); + int status = m_modeCtrl->getIspPredModeValRelCU(); + + if ((status & 0x3) == 0x3) //ISP was not selected in the relCU { - pDst.at( x, 0 ) = pSrc.at( 1 + x, 0 ); + double bestNonDCT2Cost = m_modeCtrl->getBestNonDCT2Cost(); + double ratioWithNonDCT2 = bestNonDCT2Cost / bestNonISPCostRelCU; + double margin = ratioWithNonDCT2 < 0.95 ? 0.2 : 0.1; + + if (costRatio > 1 - margin && costRatio < 1 + margin && modesAreComparable) + { + for (int lfnstVal = 0; lfnstVal < NUM_LFNST_NUM_PER_SET; lfnstVal++) + { + m_ispTestedModes[lfnstVal].splitIsFinished[HOR_INTRA_SUBPARTITIONS - 1] = true; + m_ispTestedModes[lfnstVal].splitIsFinished[VER_INTRA_SUBPARTITIONS - 1] = true; + } + return false; + } } - CPelBuf orgRest = pOrg.subBuf( 0, 0, pOrg.width, pOrg.height - 1 ); - PelBuf predRest = pDst.subBuf( 0, 1, pDst.width, pDst.height - 1 ); + else if ((status & 0x3) == 0x1) //ISP was selected in the relCU + { + double margin = 0.05; + + if (costRatio > 1 - margin && costRatio < 1 + margin && modesAreComparable) + { + int ispSplitIdx = (m_modeCtrl->getIspPredModeValRelCU() >> 2) & 0x1; + bool lfnstIdxIsNot0 = (bool)((m_modeCtrl->getIspPredModeValRelCU() >> 3) & 0x1); + bool lfnstIdxIs2 = (bool)((m_modeCtrl->getIspPredModeValRelCU() >> 4) & 0x1); + int lfnstIdx = !lfnstIdxIsNot0 ? 0 : lfnstIdxIs2 ? 2 : 1; + bestISPModeInRelCU = (int)m_modeCtrl->getBestISPIntraModeRelCU(); + + for (int splitIdx = 0; splitIdx < NUM_INTRA_SUBPARTITIONS_MODES - 1; splitIdx++) + { + for (int lfnstVal = 0; lfnstVal < NUM_LFNST_NUM_PER_SET; lfnstVal++) + { + if (lfnstVal == lfnstIdx && splitIdx == ispSplitIdx) + { + continue; + } + m_ispTestedModes[lfnstVal].splitIsFinished[splitIdx] = true; + } + } - predRest.copyFrom( orgRest ); + bool stopNonDCT2Transforms = (bool)((m_modeCtrl->getIspPredModeValRelCU() >> 6) & 0x1); + m_modeCtrl->setStopNonDCT2Transforms(stopNonDCT2Transforms); + } + } + else + { + THROW("Wrong ISP relCU status"); + } } + + return true; } -bool IntraSearch::useDPCMForFirstPassIntraEstimation( const PredictionUnit &pu, const uint32_t &uiDirMode ) +void IntraSearch::xFinishISPModes() { - return CU::isRDPCMEnabled( *pu.cu ) && pu.cu->transQuantBypass && (uiDirMode == HOR_IDX || uiDirMode == VER_IDX); + //Continue to the next lfnst index + m_curIspLfnstIdx++; + + if (m_curIspLfnstIdx < NUM_LFNST_NUM_PER_SET) + { + //Check if LFNST is applicable + if (m_curIspLfnstIdx == 1) + { + bool canTestLFNST = false; + for (int lfnstIdx = 1; lfnstIdx < NUM_LFNST_NUM_PER_SET; lfnstIdx++) + { + canTestLFNST |= !m_ispTestedModes[lfnstIdx].splitIsFinished[HOR_INTRA_SUBPARTITIONS - 1] || !m_ispTestedModes[lfnstIdx].splitIsFinished[VER_INTRA_SUBPARTITIONS - 1]; + } + if (canTestLFNST) + { + //Construct the intra modes candidates list for the lfnst > 0 cases + xSortISPCandListLFNST(); + } + } + } } + diff --git a/source/Lib/EncoderLib/IntraSearch.h b/source/Lib/EncoderLib/IntraSearch.h index 6350f8076c3f9767b35c686a37e75a35edac4900..72ccaa02148956bceded9d45694cbe2c7c58baf5 100644 --- a/source/Lib/EncoderLib/IntraSearch.h +++ b/source/Lib/EncoderLib/IntraSearch.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -56,9 +56,119 @@ // ==================================================================================================================== // Class definition // ==================================================================================================================== - class EncModeCtrl; +enum PLTScanMode +{ + PLT_SCAN_HORTRAV = 0, + PLT_SCAN_VERTRAV = 1, + NUM_PLT_SCAN = 2 +}; +class SortingElement +{ +public: + inline bool operator<(const SortingElement &other) const + { + return cnt > other.cnt; + } + SortingElement() { + cnt = shift = lastCnt = 0; + data[0] = data[1] = data[2] = 0; + sumData[0] = sumData[1] = sumData[2] = 0; + } + uint32_t getCnt() const { return cnt; } + void setCnt(uint32_t val) { cnt = val; } + int getSumData (int id) const { return sumData[id]; } + + void resetAll(ComponentID compBegin, uint32_t numComp) + { + shift = lastCnt = 0; + for (int ch = compBegin; ch < (compBegin + numComp); ch++) + { + data[ch] = 0; + sumData[ch] = 0; + } + } + void setAll(uint32_t* ui, ComponentID compBegin, uint32_t numComp) + { + for (int ch = compBegin; ch < (compBegin + numComp); ch++) + { + data[ch] = ui[ch]; + } + } + bool almostEqualData(SortingElement element, int errorLimit, const BitDepths& bitDepths, ComponentID compBegin, uint32_t numComp) + { + bool almostEqual = true; + for (int comp = compBegin; comp < (compBegin + numComp); comp++) + { + uint32_t absError = 0; + if (isChroma((ComponentID) comp)) + { + absError += int(double(std::abs(data[comp] - element.data[comp])) * PLT_CHROMA_WEIGHTING) >> (bitDepths.recon[CHANNEL_TYPE_CHROMA] - PLT_ENCBITDEPTH); + } + else + { + absError += (std::abs(data[comp] - element.data[comp]))>> (bitDepths.recon[CHANNEL_TYPE_LUMA] - PLT_ENCBITDEPTH); + } + if (absError > errorLimit) + { + almostEqual = false; + break; + } + } + return almostEqual; + } + uint32_t getSAD(SortingElement element, const BitDepths& bitDepths, ComponentID compBegin, uint32_t numComp) + { + uint32_t sumAd = 0; + for (int comp = compBegin; comp < (compBegin + numComp); comp++) + { + ChannelType chType = (comp > 0) ? CHANNEL_TYPE_CHROMA : CHANNEL_TYPE_LUMA; + sumAd += (std::abs(data[comp] - element.data[comp]) >> (bitDepths.recon[chType] - PLT_ENCBITDEPTH)); + } + return sumAd; + } + void copyDataFrom(SortingElement element, ComponentID compBegin, uint32_t numComp) + { + for (int comp = compBegin; comp < (compBegin + numComp); comp++) + { + data[comp] = element.data[comp]; + sumData[comp] = data[comp]; + } + shift = 0; lastCnt = 1; + } + void copyAllFrom(SortingElement element, ComponentID compBegin, uint32_t numComp) + { + copyDataFrom(element, compBegin, numComp); + cnt = element.cnt; + for (int comp = compBegin; comp < (compBegin + numComp); comp++) + { + sumData[comp] = element.sumData[comp]; + } + lastCnt = element.lastCnt; shift = element.shift; + } + void addElement(const SortingElement& element, ComponentID compBegin, uint32_t numComp) + { + cnt++; + for (int i = compBegin; i<(compBegin + numComp); i++) + { + sumData[i] += element.data[i]; + } + if (cnt>1 && cnt == 2 * lastCnt) + { + uint32_t rnd = 1 << shift; + shift++; + for (int i = compBegin; i<(compBegin + numComp); i++) + { + data[i] = (sumData[i] + rnd) >> shift; + } + lastCnt = cnt; + } + } +private: + uint32_t cnt; + int shift, lastCnt, data[3], sumData[3]; +}; /// encoder search class class IntraSearch : public IntraPrediction, CrossComponentPrediction { @@ -76,15 +186,171 @@ private: CodingStructure **m_pSaveCS; + bool m_saveCuCostInSCIPU; + uint8_t m_numCuInSCIPU; + Area m_cuAreaInSCIPU[NUM_INTER_CU_INFO_SAVE]; + double m_cuCostInSCIPU[NUM_INTER_CU_INFO_SAVE]; + + struct ModeInfo + { + bool mipFlg; // CU::mipFlag + bool mipTrFlg; // PU::mipTransposedFlag + int mRefId; // PU::multiRefIdx + uint8_t ispMod; // CU::ispMode + uint32_t modeId; // PU::intraDir[CHANNEL_TYPE_LUMA] + + ModeInfo() : mipFlg(false), mipTrFlg(false), mRefId(0), ispMod(NOT_INTRA_SUBPARTITIONS), modeId(0) {} + ModeInfo(const bool mipf, const bool miptf, const int mrid, const uint8_t ispm, const uint32_t mode) : mipFlg(mipf), mipTrFlg(miptf), mRefId(mrid), ispMod(ispm), modeId(mode) {} + bool operator==(const ModeInfo cmp) const { return (mipFlg == cmp.mipFlg && mipTrFlg == cmp.mipTrFlg && mRefId == cmp.mRefId && ispMod == cmp.ispMod && modeId == cmp.modeId); } + }; + struct ModeInfoWithCost : public ModeInfo + { + double rdCost; + ModeInfoWithCost() : ModeInfo(), rdCost(MAX_DOUBLE) {} + ModeInfoWithCost(const bool mipf, const bool miptf, const int mrid, const uint8_t ispm, const uint32_t mode, double cost) : ModeInfo(mipf, miptf, mrid, ispm, mode), rdCost(cost) {} + bool operator==(const ModeInfoWithCost cmp) const { return (mipFlg == cmp.mipFlg && mipTrFlg == cmp.mipTrFlg && mRefId == cmp.mRefId && ispMod == cmp.ispMod && modeId == cmp.modeId && rdCost == cmp.rdCost); } + static bool compareModeInfoWithCost(ModeInfoWithCost a, ModeInfoWithCost b) { return a.rdCost < b.rdCost; } + }; + + struct ISPTestedModeInfo + { + int numCompSubParts; + double rdCost; + + ISPTestedModeInfo() {} + + void setMode(int numParts, double cost) + { + numCompSubParts = numParts; + rdCost = cost; + } + void clear() + { + numCompSubParts = -1; + rdCost = MAX_DOUBLE; + } + }; + struct ISPTestedModesInfo + { + ISPTestedModeInfo intraMode[NUM_LUMA_MODE][2]; + bool modeHasBeenTested[NUM_LUMA_MODE][2]; + int numTotalParts[2]; + static_vector<int, FAST_UDI_MAX_RDMODE_NUM> testedModes[2]; + int bestModeSoFar; + ISPType bestSplitSoFar; + int bestMode[2]; + double bestCost[2]; + int numTestedModes[2]; + int candIndexInList[2]; + bool splitIsFinished[2]; + int numOrigModesToTest; + + // set a tested mode results + void setModeResults(ISPType splitType, int iModeIdx, int numCompletedParts, double rdCost, double currentBestCost) + { + const unsigned st = splitType - 1; + CHECKD(st > 1, "The split type is invalid!"); + const int maxNumParts = numTotalParts[st]; + intraMode[iModeIdx][st].setMode(numCompletedParts, numCompletedParts == maxNumParts ? rdCost : MAX_DOUBLE); + testedModes[st].push_back(iModeIdx); + numTestedModes[st]++; + modeHasBeenTested[iModeIdx][st] = true; + if (numCompletedParts == maxNumParts && rdCost < bestCost[st]) // best mode update + { + bestMode[st] = iModeIdx; + bestCost[st] = rdCost; + } + if (numCompletedParts == maxNumParts && rdCost < currentBestCost) // best mode update + { + bestModeSoFar = iModeIdx; + bestSplitSoFar = splitType; + } + } + + int getNumCompletedSubParts(ISPType splitType, int iModeIdx) + { + const unsigned st = splitType - 1; + CHECK(st < 0 || st > 1, "The split type is invalid!"); + CHECK(iModeIdx < 0 || iModeIdx >(NUM_LUMA_MODE - 1), "The modeIdx is invalid"); + return modeHasBeenTested[iModeIdx][st] ? intraMode[iModeIdx][st].numCompSubParts : -1; + } + + double getRDCost(ISPType splitType, int iModeIdx) + { + const unsigned st = splitType - 1; + CHECKD(st > 1, "The split type is invalid!"); + return modeHasBeenTested[iModeIdx][st] ? intraMode[iModeIdx][st].rdCost : MAX_DOUBLE; + } + + // get a tested intra mode index + int getTestedIntraMode(ISPType splitType, int pos) + { + const unsigned st = splitType - 1; + CHECKD(st > 1, "The split type is invalid!"); + return pos < testedModes[st].size() ? testedModes[st].at(pos) : -1; + } + + // set everything to default values + void clear() + { + for (int splitIdx = 0; splitIdx < NUM_INTRA_SUBPARTITIONS_MODES - 1; splitIdx++) + { + numTestedModes [splitIdx] = 0; + candIndexInList[splitIdx] = 0; + numTotalParts [splitIdx] = 0; + splitIsFinished[splitIdx] = false; + testedModes [splitIdx].clear(); + bestCost [splitIdx] = MAX_DOUBLE; + bestMode [splitIdx] = -1; + } + bestModeSoFar = -1; + bestSplitSoFar = NOT_INTRA_SUBPARTITIONS; + numOrigModesToTest = -1; + memset(modeHasBeenTested, 0, sizeof(modeHasBeenTested)); + } + void clearISPModeInfo(int idx) + { + intraMode[idx][0].clear(); + intraMode[idx][1].clear(); + } + void init(const int numTotalPartsHor, const int numTotalPartsVer) + { + clear(); + const int horSplit = HOR_INTRA_SUBPARTITIONS - 1, verSplit = VER_INTRA_SUBPARTITIONS - 1; + numTotalParts [horSplit] = numTotalPartsHor; + numTotalParts [verSplit] = numTotalPartsVer; + splitIsFinished[horSplit] = (numTotalParts[horSplit] == 0); + splitIsFinished[verSplit] = (numTotalParts[verSplit] == 0); + } + }; + + static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM> m_ispCandListHor, m_ispCandListVer; + static_vector<ModeInfoWithCost, FAST_UDI_MAX_RDMODE_NUM> m_regIntraRDListWithCosts; + + ISPTestedModesInfo m_ispTestedModes[NUM_LFNST_NUM_PER_SET]; + int m_curIspLfnstIdx; + //cost variables for the EMT algorithm and new modes list - static_vector<uint32_t, FAST_UDI_MAX_RDMODE_NUM> m_rdModeListWithoutMrl; - static_vector<uint32_t, FAST_UDI_MAX_RDMODE_NUM> m_rdModeListWithoutMrlHor; - static_vector<uint32_t, FAST_UDI_MAX_RDMODE_NUM> m_rdModeListWithoutMrlVer; + double m_bestModeCostStore[ NUM_LFNST_NUM_PER_SET ]; // RD cost of the best mode for each PU using DCT2 + bool m_bestModeCostValid[ NUM_LFNST_NUM_PER_SET ]; + double m_modeCostStore[ NUM_LFNST_NUM_PER_SET ][ NUM_LUMA_MODE ]; // RD cost of each mode for each PU using DCT2 + ModeInfo m_savedRdModeList[ NUM_LFNST_NUM_PER_SET ][ NUM_LUMA_MODE ]; + int32_t m_savedNumRdModes[ NUM_LFNST_NUM_PER_SET ]; + + ModeInfo m_savedRdModeFirstColorSpace[4 * NUM_LFNST_NUM_PER_SET * 2][FAST_UDI_MAX_RDMODE_NUM]; + char m_savedBDPCMModeFirstColorSpace[4 * NUM_LFNST_NUM_PER_SET * 2][FAST_UDI_MAX_RDMODE_NUM]; + double m_savedRdCostFirstColorSpace[4 * NUM_LFNST_NUM_PER_SET * 2][FAST_UDI_MAX_RDMODE_NUM]; + int m_numSavedRdModeFirstColorSpace[4 * NUM_LFNST_NUM_PER_SET * 2]; + int m_savedRdModeIdx; + + static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM> m_uiSavedRdModeListLFNST; + static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM> m_uiSavedHadModeListLFNST; + uint32_t m_uiSavedNumRdModesLFNST; + static_vector<double, FAST_UDI_MAX_RDMODE_NUM> m_dSavedModeCostLFNST; + static_vector<double, FAST_UDI_MAX_RDMODE_NUM> m_dSavedHadListLFNST; - static_vector<double, FAST_UDI_MAX_RDMODE_NUM> m_intraModeDiagRatio; - static_vector<double, FAST_UDI_MAX_RDMODE_NUM> m_intraModeHorVerRatio; - static_vector<int, FAST_UDI_MAX_RDMODE_NUM> m_intraModeTestedNormalIntra; PelStorage m_tmpStorageLCU; + PelStorage m_colorTransResiBuf; protected: // interface to option EncCfg* m_pcEncCfg; @@ -99,7 +365,18 @@ protected: CtxCache* m_CtxCache; bool m_isInitialized; - + uint32_t m_symbolSize; + uint16_t** m_truncBinBits; + uint16_t* m_escapeNumBins; + bool m_bestEscape; + double* m_indexError[MAXPLTSIZE + 1]; + uint8_t* m_minErrorIndexMap; // store the best index in terms of distortion for each pixel + uint8_t m_indexMapRDOQ [2][NUM_TRELLIS_STATE][2 * MAX_CU_BLKSIZE_PLT]; + bool m_runMapRDOQ [2][NUM_TRELLIS_STATE][2 * MAX_CU_BLKSIZE_PLT]; + uint8_t* m_statePtRDOQ [NUM_TRELLIS_STATE]; + bool m_prevRunTypeRDOQ[2][NUM_TRELLIS_STATE]; + int m_prevRunPosRDOQ [2][NUM_TRELLIS_STATE]; + double m_stateCostRDOQ [2][NUM_TRELLIS_STATE]; public: IntraSearch(); @@ -114,6 +391,7 @@ public: const uint32_t maxCUHeight, const uint32_t maxTotalCUDepth , EncReshape* m_pcReshape + , const unsigned bitDepthY ); void destroy (); @@ -124,12 +402,23 @@ public: void setModeCtrl ( EncModeCtrl *modeCtrl ) { m_modeCtrl = modeCtrl; } -public: + bool getSaveCuCostInSCIPU () { return m_saveCuCostInSCIPU; } + void setSaveCuCostInSCIPU ( bool b ) { m_saveCuCostInSCIPU = b; } + void setNumCuInSCIPU ( uint8_t i ) { m_numCuInSCIPU = i; } + void saveCuAreaCostInSCIPU ( Area area, double cost ); + void initCuAreaCostInSCIPU (); + double findInterCUCost ( CodingUnit &cu ); - void estIntraPredLumaQT ( CodingUnit &cu, Partitioner& pm, const double bestCostSoFar = MAX_DOUBLE ); +public: + bool estIntraPredLumaQT(CodingUnit &cu, Partitioner& pm, const double bestCostSoFar = MAX_DOUBLE, bool mtsCheckRangeFlag = false, int mtsFirstCheckId = 0, int mtsLastCheckId = 0, bool moreProbMTSIdxFirst = false, CodingStructure* bestCS = NULL); void estIntraPredChromaQT ( CodingUnit &cu, Partitioner& pm, const double maxCostAllowed = MAX_DOUBLE ); - void IPCMSearch (CodingStructure &cs, Partitioner& partitioner); + void PLTSearch ( CodingStructure &cs, Partitioner& partitioner, ComponentID compBegin, uint32_t numComp); uint64_t xFracModeBitsIntra (PredictionUnit &pu, const uint32_t &uiMode, const ChannelType &compID); + void invalidateBestModeCost () { for( int i = 0; i < NUM_LFNST_NUM_PER_SET; i++ ) m_bestModeCostValid[ i ] = false; }; + + void sortRdModeListFirstColorSpace(ModeInfo mode, double cost, char bdpcmMode, ModeInfo* rdModeList, double* rdCostList, char* bdpcmModeList, int& candNum); + void invalidateBestRdModeFirstColorSpace(); + void setSavedRdModeIdx(int idx) { m_savedRdModeIdx = idx; } protected: @@ -137,7 +426,6 @@ protected: // T & Q & Q-1 & T-1 // ------------------------------------------------------------------------------------------------------------------- - void xEncPCM (CodingStructure &cs, Partitioner& partitioner, const ComponentID &compID); // ------------------------------------------------------------------------------------------------------------------- // Intra search @@ -145,21 +433,38 @@ protected: void xEncIntraHeader ( CodingStructure &cs, Partitioner& pm, const bool &luma, const bool &chroma, const int subTuIdx = -1 ); void xEncSubdivCbfQT ( CodingStructure &cs, Partitioner& pm, const bool &luma, const bool &chroma, const int subTuIdx = -1, const PartSplit ispType = TU_NO_ISP ); - uint64_t xGetIntraFracBitsQT ( CodingStructure &cs, Partitioner& pm, const bool &luma, const bool &chroma, const int subTuIdx = -1, const PartSplit ispType = TU_NO_ISP ); + uint64_t xGetIntraFracBitsQT ( CodingStructure &cs, Partitioner& pm, const bool &luma, const bool &chroma, const int subTuIdx = -1, const PartSplit ispType = TU_NO_ISP, CUCtx * cuCtx = nullptr ); uint64_t xGetIntraFracBitsQTSingleChromaComponent( CodingStructure &cs, Partitioner& pm, const ComponentID compID ); uint64_t xGetIntraFracBitsQTChroma(TransformUnit& tu, const ComponentID &compID); - void xEncCoeffQT ( CodingStructure &cs, Partitioner& pm, const ComponentID compID, const int subTuIdx = -1, const PartSplit ispType = TU_NO_ISP ); - + void xEncCoeffQT ( CodingStructure &cs, Partitioner& pm, const ComponentID compID, const int subTuIdx = -1, const PartSplit ispType = TU_NO_ISP, CUCtx * cuCtx = nullptr ); void xIntraCodingTUBlock (TransformUnit &tu, const ComponentID &compID, const bool &checkCrossCPrediction, Distortion& ruiDist, const int &default0Save1Load2 = 0, uint32_t* numSig = nullptr, std::vector<TrMode>* trModes=nullptr, const bool loadTr=false ); + void xIntraCodingACTTUBlock(TransformUnit &tu, const ComponentID &compID, Distortion& ruiDist, std::vector<TrMode>* trModes = nullptr, const bool loadTr = false); ChromaCbfs xRecurIntraChromaCodingQT( CodingStructure &cs, Partitioner& pm, const double bestCostSoFar = MAX_DOUBLE, const PartSplit ispType = TU_NO_ISP ); - void xRecurIntraCodingLumaQT ( CodingStructure &cs, Partitioner& pm, const double bestCostSoFar = MAX_DOUBLE, const int subTuIdx = -1, const PartSplit ispType = TU_NO_ISP, const bool ispIsCurrentWinnder = false ); - - - void encPredIntraDPCM( const ComponentID &compID, PelBuf &pOrg, PelBuf &pDst, const uint32_t &uiDirMode ); - static bool useDPCMForFirstPassIntraEstimation( const PredictionUnit &pu, const uint32_t &uiDirMode ); + bool xRecurIntraCodingLumaQT ( CodingStructure &cs, Partitioner& pm, const double bestCostSoFar = MAX_DOUBLE, const int subTuIdx = -1, const PartSplit ispType = TU_NO_ISP, const bool ispIsCurrentWinner = false, bool mtsCheckRangeFlag = false, int mtsFirstCheckId = 0, int mtsLastCheckId = 0, bool moreProbMTSIdxFirst = false ); + bool xRecurIntraCodingACTQT(CodingStructure &cs, Partitioner& pm, bool mtsCheckRangeFlag = false, int mtsFirstCheckId = 0, int mtsLastCheckId = 0, bool moreProbMTSIdxFirst = false); + bool xIntraCodingLumaISP ( CodingStructure& cs, Partitioner& pm, const double bestCostSoFar = MAX_DOUBLE ); + + template<typename T, size_t N> + void reduceHadCandList(static_vector<T, N>& candModeList, static_vector<double, N>& candCostList, int& numModesForFullRD, const double thresholdHadCost, const double* mipHadCost, const PredictionUnit &pu, const bool fastMip); + void derivePLTLossy ( CodingStructure& cs, Partitioner& partitioner, ComponentID compBegin, uint32_t numComp); + void calcPixelPred ( CodingStructure& cs, Partitioner& partitioner, uint32_t yPos, uint32_t xPos, ComponentID compBegin, uint32_t numComp); + void preCalcPLTIndexRD (CodingStructure& cs, Partitioner& partitioner, ComponentID compBegin, uint32_t numComp); + void calcPixelPredRD (CodingStructure& cs, Partitioner& partitioner, Pel* orgBuf, Pel* pixelValue, Pel* recoValue, ComponentID compBegin, uint32_t numComp); + void deriveIndexMap (CodingStructure& cs, Partitioner& partitioner, ComponentID compBegin, uint32_t numComp, PLTScanMode pltScanMode, double& dCost); + bool deriveSubblockIndexMap(CodingStructure& cs, Partitioner& partitioner, ComponentID compBegin, PLTScanMode pltScanMode, int minSubPos, int maxSubPos, const BinFracBits& fracBitsPltRunType, const BinFracBits* fracBitsPltIndexINDEX, const BinFracBits* fracBitsPltIndexCOPY, const double minCost, bool useRotate); + double rateDistOptPLT (bool RunType, uint8_t RunIndex, bool prevRunType, uint8_t prevRunIndex, uint8_t aboveRunIndex, bool& prevCodedRunType, int& prevCodedRunPos, int scanPos, uint32_t width, int dist, int indexMaxValue, const BinFracBits* IndexfracBits, const BinFracBits& TypefracBits); + void initTBCTable (int bitDepth); + uint32_t getTruncBinBits (uint32_t symbol, uint32_t maxSymbol); + uint32_t getEpExGolombNumBins (uint32_t symbol, uint32_t count); + void xGetNextISPMode ( ModeInfo& modeInfo, const ModeInfo* lastMode, const Size cuSize ); + bool xSortISPCandList ( double bestCostSoFar, double bestNonISPCost, ModeInfo bestNonISPMode ); + void xSortISPCandListLFNST ( ); + void xFindAlreadyTestedNearbyIntraModes ( int currentLfnstIdx, int currentIntraMode, int* refLfnstIdx, int* leftIntraMode, int* rightIntraMode, ISPType ispOption, int windowSize ); + bool updateISPStatusFromRelCU ( double bestNonISPCostCurrCu, ModeInfo bestNonISPModeCurrCu, int& bestISPModeInRelCU ); + void xFinishISPModes ( ); };// END CLASS DEFINITION EncSearch //! \} diff --git a/source/Lib/EncoderLib/NALwrite.cpp b/source/Lib/EncoderLib/NALwrite.cpp index 93b09b4d90535882605a4c272a0246dc76114690..4764029a595e9daa55484faec86163de40b055cd 100644 --- a/source/Lib/EncoderLib/NALwrite.cpp +++ b/source/Lib/EncoderLib/NALwrite.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -49,11 +49,14 @@ static const uint8_t emulation_prevention_three_byte = 3; void writeNalUnitHeader(ostream& out, OutputNALUnit& nalu) // nal_unit_header() { OutputBitstream bsNALUHeader; - - bsNALUHeader.write(0,1); // forbidden_zero_bit - bsNALUHeader.write(nalu.m_nalUnitType, 6); // nal_unit_type - bsNALUHeader.write(nalu.m_nuhLayerId, 6); // nuh_layer_id - bsNALUHeader.write(nalu.m_temporalId+1, 3); // nuh_temporal_id_plus1 + int forbiddenZero = 0; + bsNALUHeader.write(forbiddenZero, 1); // forbidden_zero_bit + int nuhReservedZeroBit = 0; + bsNALUHeader.write(nuhReservedZeroBit, 1); // nuh_reserved_zero_bit + CHECK(nalu.m_nuhLayerId > 55, "The value of nuh_layer_id shall be in the range of 0 to 55, inclusive"); + bsNALUHeader.write(nalu.m_nuhLayerId, 6); // nuh_layer_id + bsNALUHeader.write(nalu.m_nalUnitType, 5); // nal_unit_type + bsNALUHeader.write(nalu.m_temporalId + 1, 3); // nuh_temporal_id_plus1 out.write(reinterpret_cast<const char*>(bsNALUHeader.getByteStream()), bsNALUHeader.getByteStreamLength()); } diff --git a/source/Lib/EncoderLib/NALwrite.h b/source/Lib/EncoderLib/NALwrite.h index b107a2f4c3425e7d3d123c25372e7946bbc1140a..34a787d4064aca797b52f4be8289fdaa199a9ab3 100644 --- a/source/Lib/EncoderLib/NALwrite.h +++ b/source/Lib/EncoderLib/NALwrite.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -58,9 +58,10 @@ struct OutputNALUnit : public NALUnit */ OutputNALUnit( NalUnitType nalUnitType, + uint32_t layerId = 0, uint32_t temporalID = 0, uint32_t reserved_zero_6bits = 0) - : NALUnit(nalUnitType, temporalID, reserved_zero_6bits) + : NALUnit( nalUnitType, temporalID, reserved_zero_6bits, 0, layerId ) , m_Bitstream() {} diff --git a/source/Lib/EncoderLib/RateCtrl.cpp b/source/Lib/EncoderLib/RateCtrl.cpp index da6c26d849c4f665402d2456c7ab9522e1d39fa5..e53ce655f3cc8206950f0ce5867649614d408d9d 100644 --- a/source/Lib/EncoderLib/RateCtrl.cpp +++ b/source/Lib/EncoderLib/RateCtrl.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -306,6 +306,8 @@ EncRCGOP::EncRCGOP() m_targetBits = 0; m_picLeft = 0; m_bitsLeft = 0; + m_minEstLambda = 0.0; + m_maxEstLambda = 0.0; } EncRCGOP::~EncRCGOP() @@ -476,6 +478,11 @@ void EncRCGOP::create( EncRCSeq* encRCSeq, int numPic ) m_targetBits = targetBits; m_picLeft = m_numPic; m_bitsLeft = m_targetBits; + int bitdepth_luma_scale = + 2 * (encRCSeq->getbitDepth() - 8 + - DISTORTION_PRECISION_ADJUSTMENT(encRCSeq->getbitDepth())); + m_minEstLambda = 0.1; + m_maxEstLambda = 10000.0 * pow(2.0, bitdepth_luma_scale); } void EncRCGOP::xCalEquaCoeff( EncRCSeq* encRCSeq, double* lambdaRatio, double* equaCoeffA, double* equaCoeffB, int GOPSize ) @@ -493,8 +500,8 @@ void EncRCGOP::xCalEquaCoeff( EncRCSeq* encRCSeq, double* lambdaRatio, double* e double EncRCGOP::xSolveEqua(EncRCSeq* encRCSeq, double targetBpp, double* equaCoeffA, double* equaCoeffB, int GOPSize) { double solution = 100.0; - double minNumber = 0.1; - double maxNumber = 10000.0; + double minNumber = m_minEstLambda; + double maxNumber = m_maxEstLambda; for ( int i=0; i<g_RCIterationNum; i++ ) { double fx = 0.0; @@ -522,7 +529,7 @@ double EncRCGOP::xSolveEqua(EncRCSeq* encRCSeq, double targetBpp, double* equaCo } } - solution = Clip3( 0.1, 10000.0, solution ); + solution = Clip3(m_minEstLambda, m_maxEstLambda, solution); return solution; } @@ -776,6 +783,10 @@ double EncRCPic::estimatePicLambda( list<EncRCPic*>& listPreviousPictures, bool double beta = m_encRCSeq->getPicPara( m_frameLevel ).m_beta; double bpp = (double)m_targetBits/(double)m_numberOfPixel; + int bitdepth_luma_scale = + 2 * (m_encRCSeq->getbitDepth() - 8 + - DISTORTION_PRECISION_ADJUSTMENT(m_encRCSeq->getbitDepth())); + int lastPicValPix = 0; if (listPreviousPictures.size() > 0) { @@ -816,28 +827,28 @@ double EncRCPic::estimatePicLambda( list<EncRCPic*>& listPreviousPictures, bool if ( lastLevelLambda > 0.0 ) { - lastLevelLambda = Clip3( 0.1, 10000.0, lastLevelLambda ); + lastLevelLambda = Clip3(m_encRCGOP->getMinEstLambda(), m_encRCGOP->getMaxEstLambda(), lastLevelLambda); estLambda = Clip3( lastLevelLambda * pow( 2.0, -3.0/3.0 ), lastLevelLambda * pow( 2.0, 3.0/3.0 ), estLambda ); } if ( lastPicLambda > 0.0 ) { - lastPicLambda = Clip3( 0.1, 2000.0, lastPicLambda ); + lastPicLambda = Clip3(m_encRCGOP->getMinEstLambda(), 2000.0 * pow(2.0, bitdepth_luma_scale), lastPicLambda); estLambda = Clip3( lastPicLambda * pow( 2.0, -10.0/3.0 ), lastPicLambda * pow( 2.0, 10.0/3.0 ), estLambda ); } else if ( lastValidLambda > 0.0 ) { - lastValidLambda = Clip3( 0.1, 2000.0, lastValidLambda ); + lastValidLambda = Clip3(m_encRCGOP->getMinEstLambda(), 2000.0 * pow(2.0, bitdepth_luma_scale), lastValidLambda); estLambda = Clip3( lastValidLambda * pow(2.0, -10.0/3.0), lastValidLambda * pow(2.0, 10.0/3.0), estLambda ); } else { - estLambda = Clip3( 0.1, 10000.0, estLambda ); + estLambda = Clip3(m_encRCGOP->getMinEstLambda(), m_encRCGOP->getMaxEstLambda(), estLambda); } - if ( estLambda < 0.1 ) + if ( estLambda < m_encRCGOP->getMinEstLambda()) { - estLambda = 0.1; + estLambda = m_encRCGOP->getMinEstLambda(); } //Avoid different results in different platforms. The problem is caused by the different results of pow() in different platforms. @@ -1007,7 +1018,11 @@ double EncRCPic::getLCUEstLambda( double bpp ) } else { - estLambda = Clip3( 10.0, 1000.0, estLambda ); + int bitdepth_luma_scale = + 2 + * (m_encRCSeq->getbitDepth() - 8 + - DISTORTION_PRECISION_ADJUSTMENT(m_encRCSeq->getbitDepth())); + estLambda = Clip3(10.0 * pow(2.0, bitdepth_luma_scale), 1000.0 * pow(2.0, bitdepth_luma_scale), estLambda); } if ( estLambda < 0.1 ) @@ -1086,8 +1101,8 @@ void EncRCPic::updateAfterCTU(int LCUIdx, int bits, int QP, double lambda, doubl alpha *= ( 1.0 - m_encRCSeq->getAlphaUpdate() / 2.0 ); beta *= ( 1.0 - m_encRCSeq->getBetaUpdate() / 2.0 ); - alpha = Clip3( g_RCAlphaMinValue, g_RCAlphaMaxValue, alpha ); - beta = Clip3( g_RCBetaMinValue, g_RCBetaMaxValue, beta ); + alpha = clipRcAlpha( m_encRCSeq->getbitDepth(), alpha ); + beta = clipRcBeta( beta ); TRCParameter rcPara; rcPara.m_alpha = alpha; @@ -1103,21 +1118,16 @@ void EncRCPic::updateAfterCTU(int LCUIdx, int bits, int QP, double lambda, doubl } double MSE = m_LCUs[LCUIdx].m_actualMSE; - double updatedK = bpp * inputLambda / MSE; + double updatedK = MSE > 0 ? bpp * inputLambda / MSE : 0.0; double updatedC = MSE / pow(bpp, -updatedK); rcPara.m_alpha = updatedC * updatedK; rcPara.m_beta = -updatedK - 1.0; - - if (bpp > 0 && updatedK > 0.0001) + if (MSE > 0) { + rcPara.m_alpha = clipRcAlpha( m_encRCSeq->getbitDepth(), rcPara.m_alpha ); + rcPara.m_beta = clipRcBeta( rcPara.m_beta ); m_encRCSeq->setLCUPara(m_frameLevel, LCUIdx, rcPara); } - else - { - rcPara.m_alpha = Clip3(0.0001, g_RCAlphaMaxValue, rcPara.m_alpha); - m_encRCSeq->setLCUPara(m_frameLevel, LCUIdx, rcPara); - } - return; } @@ -1127,8 +1137,8 @@ void EncRCPic::updateAfterCTU(int LCUIdx, int bits, int QP, double lambda, doubl lnbpp = Clip3( -5.0, -0.1, lnbpp ); beta += m_encRCSeq->getBetaUpdate() * ( log( inputLambda ) - log( calLambda ) ) * lnbpp; - alpha = Clip3( g_RCAlphaMinValue, g_RCAlphaMaxValue, alpha ); - beta = Clip3( g_RCBetaMinValue, g_RCBetaMaxValue, beta ); + alpha = clipRcAlpha( m_encRCSeq->getbitDepth(), alpha ); + beta = clipRcBeta( beta ); TRCParameter rcPara; rcPara.m_alpha = alpha; @@ -1144,21 +1154,17 @@ void EncRCPic::updateAfterCTU(int LCUIdx, int bits, int QP, double lambda, doubl } double MSE = m_LCUs[LCUIdx].m_actualMSE; - double updatedK = bpp * inputLambda / MSE; + double updatedK = MSE > 0 ? bpp * inputLambda / MSE : 0.0; double updatedC = MSE / pow(bpp, -updatedK); rcPara.m_alpha = updatedC * updatedK; rcPara.m_beta = -updatedK - 1.0; - if (bpp > 0 && updatedK > 0.0001) + if (MSE > 0) { + rcPara.m_alpha = clipRcAlpha( m_encRCSeq->getbitDepth(), rcPara.m_alpha ); + rcPara.m_beta = clipRcBeta( rcPara.m_beta ); m_encRCSeq->setLCUPara(m_frameLevel, LCUIdx, rcPara); } - else - { - rcPara.m_alpha = Clip3(0.0001, g_RCAlphaMaxValue, rcPara.m_alpha); - m_encRCSeq->setLCUPara(m_frameLevel, LCUIdx, rcPara); - } - } double EncRCPic::calAverageQP() @@ -1243,7 +1249,6 @@ void EncRCPic::updateAfterPicture( int actualHeaderBits, int actualTotalBits, do m_picQP = g_RCInvalidQPValue; } m_picLambda = averageLambda; - double alpha = m_encRCSeq->getPicPara( m_frameLevel ).m_alpha; double beta = m_encRCSeq->getPicPara( m_frameLevel ).m_beta; double skipRatio = 0; @@ -1262,7 +1267,7 @@ void EncRCPic::updateAfterPicture( int actualHeaderBits, int actualTotalBits, do { // update parameters double picActualBits = ( double )m_picActualBits; - double picActualBpp = picActualBits / (double)m_validPixelsInPic; + double picActualBpp = m_validPixelsInPic > 0 ? picActualBits / (double)m_validPixelsInPic : 0.001; double calLambda = alpha * pow( picActualBpp, beta ); double inputLambda = m_picLambda; @@ -1271,8 +1276,8 @@ void EncRCPic::updateAfterPicture( int actualHeaderBits, int actualTotalBits, do alpha *= ( 1.0 - m_encRCSeq->getAlphaUpdate() / 2.0 ); beta *= ( 1.0 - m_encRCSeq->getBetaUpdate() / 2.0 ); - alpha = Clip3( g_RCAlphaMinValue, g_RCAlphaMaxValue, alpha ); - beta = Clip3( g_RCBetaMinValue, g_RCBetaMaxValue, beta ); + alpha = clipRcAlpha( m_encRCSeq->getbitDepth(), alpha ); + beta = clipRcBeta( beta ); TRCParameter rcPara; rcPara.m_alpha = alpha; @@ -1292,6 +1297,8 @@ void EncRCPic::updateAfterPicture( int actualHeaderBits, int actualTotalBits, do if (m_validPixelsInPic > 0) { + rcPara.m_alpha = clipRcAlpha( m_encRCSeq->getbitDepth(), rcPara.m_alpha ); + rcPara.m_beta = clipRcBeta( rcPara.m_beta ); m_encRCSeq->setPicPara(m_frameLevel, rcPara); } @@ -1302,19 +1309,17 @@ void EncRCPic::updateAfterPicture( int actualHeaderBits, int actualTotalBits, do alpha += m_encRCSeq->getAlphaUpdate() * ( log( inputLambda ) - log( calLambda ) ) * alpha; double lnbpp = log( picActualBpp ); lnbpp = Clip3( -5.0, -0.1, lnbpp ); - beta += m_encRCSeq->getBetaUpdate() * ( log( inputLambda ) - log( calLambda ) ) * lnbpp; - alpha = Clip3( g_RCAlphaMinValue, g_RCAlphaMaxValue, alpha ); - beta = Clip3( g_RCBetaMinValue, g_RCBetaMaxValue, beta ); + alpha = clipRcAlpha( m_encRCSeq->getbitDepth(), alpha ); + beta = clipRcBeta( beta ); } TRCParameter rcPara; rcPara.m_alpha = alpha; rcPara.m_beta = beta; rcPara.m_skipRatio = skipRatio; - double picActualBpp = (double)m_picActualBits / (double)m_validPixelsInPic; - + double picActualBpp = m_validPixelsInPic > 0 ? m_picActualBits / (double)m_validPixelsInPic : 0.001; double avgMSE = getPicMSE(); double updatedK = picActualBpp * averageLambda / avgMSE; double updatedC = avgMSE / pow(picActualBpp, -updatedK); @@ -1328,17 +1333,33 @@ void EncRCPic::updateAfterPicture( int actualHeaderBits, int actualTotalBits, do if (m_validPixelsInPic > 0) { + rcPara.m_alpha = clipRcAlpha(m_encRCSeq->getbitDepth(), rcPara.m_alpha); + rcPara.m_beta = clipRcBeta( rcPara.m_beta ); m_encRCSeq->setPicPara(m_frameLevel, rcPara); } if ( m_frameLevel == 1 ) { - double currLambda = Clip3( 0.1, 10000.0, m_picLambda ); + double currLambda = Clip3(m_encRCGOP->getMinEstLambda(), m_encRCGOP->getMaxEstLambda(), m_picLambda); double updateLastLambda = g_RCWeightHistoryLambda * m_encRCSeq->getLastLambda() + g_RCWeightCurrentLambda * currLambda; m_encRCSeq->setLastLambda( updateLastLambda ); } } +double EncRCPic::clipRcAlpha(const int bitdepth, const double alpha) +{ + int bitdepth_luma_scale = + 2 + * (bitdepth - 8 + - DISTORTION_PRECISION_ADJUSTMENT(m_encRCSeq->getbitDepth())); + return Clip3(g_RCAlphaMinValue, g_RCAlphaMaxValue * pow(2.0, bitdepth_luma_scale), alpha); +} + +double EncRCPic::clipRcBeta(const double beta) +{ + return Clip3(g_RCBetaMinValue, g_RCBetaMaxValue, beta); +} + int EncRCPic::getRefineBitsForIntra( int orgBits ) { double alpha=0.25, beta=0.5582; @@ -1822,7 +1843,7 @@ int RateCtrl::updateCpbState(int actualBits) return cpbState; } -void RateCtrl::initHrdParam(const HRD* pcHrd, int iFrameRate, double fInitialCpbFullness) +void RateCtrl::initHrdParam(const HRDParameters* pcHrd, int iFrameRate, double fInitialCpbFullness) { m_CpbSaturationEnabled = true; m_cpbSize = (pcHrd->getCpbSizeValueMinus1(0, 0, 0) + 1) << (4 + pcHrd->getCpbSizeScale()); diff --git a/source/Lib/EncoderLib/RateCtrl.h b/source/Lib/EncoderLib/RateCtrl.h index 09e84409f6444d836277cba90bbdcc239c695853..afb298a2ebdd9559a9a0c50749090d977555d4e9 100644 --- a/source/Lib/EncoderLib/RateCtrl.h +++ b/source/Lib/EncoderLib/RateCtrl.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -209,6 +209,8 @@ public: int getPicLeft() { return m_picLeft; } int getBitsLeft() { return m_bitsLeft; } int getTargetBitInGOP( int i ) { return m_picTargetBitInGOP[i]; } + double getMinEstLambda() { return m_minEstLambda; } + double getMaxEstLambda() { return m_maxEstLambda; } private: EncRCSeq* m_encRCSeq; @@ -217,6 +219,8 @@ private: int m_targetBits; int m_picLeft; int m_bitsLeft; + double m_minEstLambda; + double m_maxEstLambda; }; class EncRCPic @@ -243,6 +247,9 @@ public: void updateAfterCTU(int LCUIdx, int bits, int QP, double lambda, double skipRatio, bool updateLCUParameter = true); void updateAfterPicture( int actualHeaderBits, int actualTotalBits, double averageQP, double averageLambda, bool isIRAP); + double clipRcAlpha(const int bitdepth, const double alpha); + double clipRcBeta(const double beta); + void addToPictureLsit( list<EncRCPic*>& listPreviousPictures ); double calAverageQP(); double calAverageLambda(); @@ -347,7 +354,7 @@ public: uint32_t getCpbSize() { return m_cpbSize; } uint32_t getBufferingRate() { return m_bufferingRate; } int updateCpbState(int actualBits); - void initHrdParam(const HRD* pcHrd, int iFrameRate, double fInitialCpbFullness); + void initHrdParam(const HRDParameters* pcHrd, int iFrameRate, double fInitialCpbFullness); #endif private: diff --git a/source/Lib/EncoderLib/SEIEncoder.cpp b/source/Lib/EncoderLib/SEIEncoder.cpp index 748036209ad0e43a8a787e61b85fd30de2ee90cf..7e94dcd31fbca6d6ab8f90270b3d6185dc5873e6 100644 --- a/source/Lib/EncoderLib/SEIEncoder.cpp +++ b/source/Lib/EncoderLib/SEIEncoder.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -44,29 +44,6 @@ std::string hashToString(const PictureHash &digest, int numChar); //! \ingroup EncoderLib //! \{ -#if HEVC_VPS -void SEIEncoder::initSEIActiveParameterSets (SEIActiveParameterSets *seiActiveParameterSets, const VPS *vps, const SPS *sps) -#else -void SEIEncoder::initSEIActiveParameterSets (SEIActiveParameterSets *seiActiveParameterSets, const SPS *sps) -#endif -{ - CHECK(!(m_isInitialized), "Unspecified error"); - CHECK(!(seiActiveParameterSets!=NULL), "Unspecified error"); -#if HEVC_VPS - CHECK(!(vps!=NULL), "Unspecified error"); -#endif - CHECK(!(sps!=NULL), "Unspecified error"); - -#if HEVC_VPS - seiActiveParameterSets->activeVPSId = vps->getVPSId(); -#endif - seiActiveParameterSets->m_selfContainedCvsFlag = false; - seiActiveParameterSets->m_noParameterSetUpdateFlag = false; - seiActiveParameterSets->numSpsIdsMinus1 = 0; - seiActiveParameterSets->activeSeqParameterSetId.resize(seiActiveParameterSets->numSpsIdsMinus1 + 1); - seiActiveParameterSets->activeSeqParameterSetId[0] = sps->getSPSId(); -} - void SEIEncoder::initSEIFramePacking(SEIFramePacking *seiFramePacking, int currPicNum) { CHECK(!(m_isInitialized), "Unspecified error"); @@ -93,178 +70,308 @@ void SEIEncoder::initSEIFramePacking(SEIFramePacking *seiFramePacking, int currP seiFramePacking->m_upsampledAspectRatio = 0; } -void SEIEncoder::initSEISegmentedRectFramePacking(SEISegmentedRectFramePacking *seiSegmentedRectFramePacking) +void SEIEncoder::initSEIBufferingPeriod(SEIBufferingPeriod *bufferingPeriodSEI, bool noLeadingPictures) { - CHECK(!(m_isInitialized), "Unspecified error"); - CHECK(!(seiSegmentedRectFramePacking!=NULL), "Unspecified error"); + CHECK(!(m_isInitialized), "bufferingPeriodSEI already initialized"); + CHECK(!(bufferingPeriodSEI != nullptr), "Need a bufferingPeriodSEI for initialization (got nullptr)"); - seiSegmentedRectFramePacking->m_arrangementCancelFlag = m_pcCfg->getSegmentedRectFramePackingArrangementSEICancel(); - seiSegmentedRectFramePacking->m_contentInterpretationType = m_pcCfg->getSegmentedRectFramePackingArrangementSEIType(); - seiSegmentedRectFramePacking->m_arrangementPersistenceFlag = m_pcCfg->getSegmentedRectFramePackingArrangementSEIPersistence(); + uint32_t uiInitialCpbRemovalDelay = (90000/2); // 0.5 sec + bufferingPeriodSEI->m_bpNalCpbParamsPresentFlag = true; + bufferingPeriodSEI->m_bpVclCpbParamsPresentFlag = true; + bufferingPeriodSEI->m_bpMaxSubLayers = m_pcCfg->getMaxTempLayer() ; + bufferingPeriodSEI->m_bpCpbCnt = 1; + for(int i=0; i < bufferingPeriodSEI->m_bpMaxSubLayers; i++) + { + for(int j=0; j < bufferingPeriodSEI->m_bpCpbCnt; j++) + { + bufferingPeriodSEI->m_initialCpbRemovalDelay[j][i][0] = uiInitialCpbRemovalDelay; + bufferingPeriodSEI->m_initialCpbRemovalDelay[j][i][1] = uiInitialCpbRemovalDelay; + bufferingPeriodSEI->m_initialCpbRemovalOffset[j][i][0] = uiInitialCpbRemovalDelay; + bufferingPeriodSEI->m_initialCpbRemovalOffset[j][i][1] = uiInitialCpbRemovalDelay; + } + } + // We don't set concatenation_flag here. max_initial_removal_delay_for_concatenation depends on the usage scenario. + // The parameters could be added to config file, but as long as the initialisation of generic buffering parameters is + // not controllable, it does not seem to make sense to provide settings for these. + bufferingPeriodSEI->m_concatenationFlag = false; + bufferingPeriodSEI->m_maxInitialRemovalDelayForConcatenation = uiInitialCpbRemovalDelay; + + bufferingPeriodSEI->m_bpDecodingUnitHrdParamsPresentFlag = m_pcCfg->getNoPicPartitionFlag() == false; + bufferingPeriodSEI->m_decodingUnitCpbParamsInPicTimingSeiFlag = !m_pcCfg->getDecodingUnitInfoSEIEnabled(); + + bufferingPeriodSEI->m_initialCpbRemovalDelayLength = 16; // assuming 0.5 sec, log2( 90,000 * 0.5 ) = 16-bit + // Note: The following parameters require some knowledge about the GOP structure. + // Using getIntraPeriod() should be avoided though, because it assumes certain GOP + // properties, which are only valid in CTC. + // Still copying this setting from HM for consistency, improvements welcome + bool isRandomAccess = m_pcCfg->getIntraPeriod() > 0; + if( isRandomAccess ) + { + bufferingPeriodSEI->m_cpbRemovalDelayLength = 6; // 32 = 2^5 (plus 1) + bufferingPeriodSEI->m_dpbOutputDelayLength = 6; // 32 + 3 = 2^6 + } + else + { + bufferingPeriodSEI->m_cpbRemovalDelayLength = 9; // max. 2^10 + bufferingPeriodSEI->m_dpbOutputDelayLength = 9; // max. 2^10 + } + bufferingPeriodSEI->m_duCpbRemovalDelayIncrementLength = 7; // ceil( log2( tick_divisor_minus2 + 2 ) ) + bufferingPeriodSEI->m_dpbOutputDelayDuLength = bufferingPeriodSEI->m_dpbOutputDelayLength + bufferingPeriodSEI->m_duCpbRemovalDelayIncrementLength; + //for the concatenation, it can be set to one during splicing. + bufferingPeriodSEI->m_concatenationFlag = 0; + //since the temporal layer HRDParameters is not ready, we assumed it is fixed + bufferingPeriodSEI->m_auCpbRemovalDelayDelta = 1; + bufferingPeriodSEI->m_cpbRemovalDelayDeltasPresentFlag = m_pcCfg->getBpDeltasGOPStructure() ; + if (bufferingPeriodSEI->m_cpbRemovalDelayDeltasPresentFlag) + { + switch (m_pcCfg->getGOPSize()) + { + case 8: + { + if (noLeadingPictures) + { + bufferingPeriodSEI->m_numCpbRemovalDelayDeltas = 5; + bufferingPeriodSEI->m_cpbRemovalDelayDelta[0] = 1; + bufferingPeriodSEI->m_cpbRemovalDelayDelta[1] = 2; + bufferingPeriodSEI->m_cpbRemovalDelayDelta[2] = 3; + bufferingPeriodSEI->m_cpbRemovalDelayDelta[3] = 6; + bufferingPeriodSEI->m_cpbRemovalDelayDelta[4] = 7; + } + else + { + bufferingPeriodSEI->m_numCpbRemovalDelayDeltas = 3; + bufferingPeriodSEI->m_cpbRemovalDelayDelta[0] = 1; + bufferingPeriodSEI->m_cpbRemovalDelayDelta[1] = 2; + bufferingPeriodSEI->m_cpbRemovalDelayDelta[2] = 3; + } + } + break; + case 16: + { + if (noLeadingPictures) + { + bufferingPeriodSEI->m_numCpbRemovalDelayDeltas = 9; + bufferingPeriodSEI->m_cpbRemovalDelayDelta[0] = 1; + bufferingPeriodSEI->m_cpbRemovalDelayDelta[1] = 2; + bufferingPeriodSEI->m_cpbRemovalDelayDelta[2] = 3; + bufferingPeriodSEI->m_cpbRemovalDelayDelta[3] = 4; + bufferingPeriodSEI->m_cpbRemovalDelayDelta[4] = 6; + bufferingPeriodSEI->m_cpbRemovalDelayDelta[5] = 7; + bufferingPeriodSEI->m_cpbRemovalDelayDelta[6] = 9; + bufferingPeriodSEI->m_cpbRemovalDelayDelta[7] = 14; + bufferingPeriodSEI->m_cpbRemovalDelayDelta[8] = 15; + } + else + { + bufferingPeriodSEI->m_numCpbRemovalDelayDeltas = 5; + bufferingPeriodSEI->m_cpbRemovalDelayDelta[0] = 1; + bufferingPeriodSEI->m_cpbRemovalDelayDelta[1] = 2; + bufferingPeriodSEI->m_cpbRemovalDelayDelta[2] = 3; + bufferingPeriodSEI->m_cpbRemovalDelayDelta[3] = 6; + bufferingPeriodSEI->m_cpbRemovalDelayDelta[4] = 7; + } + } + break; + default: + { + THROW("m_cpbRemovalDelayDelta not applicable for the GOP size"); + } + break; + } + } + // A commercial encoder should track the buffer state for all layers and sub-layers + // to ensure CPB conformance. Such tracking is required for calculating alternative + // CPB parameters. + // Unfortunately VTM does not have such tracking. Thus we cannot encode alternative + // CPB parameters here. + bufferingPeriodSEI->m_altCpbParamsPresentFlag = false; + bufferingPeriodSEI->m_useAltCpbParamsFlag = false; } -void SEIEncoder::initSEIDisplayOrientation(SEIDisplayOrientation* seiDisplayOrientation) +void SEIEncoder::initSEIErp(SEIEquirectangularProjection* seiEquirectangularProjection) { - CHECK(!(m_isInitialized), "Unspecified error"); - CHECK(!(seiDisplayOrientation!=NULL), "Unspecified error"); + CHECK(!(m_isInitialized), "seiEquirectangularProjection already initialized"); + CHECK(!(seiEquirectangularProjection != nullptr), "Need a seiEquirectangularProjection for initialization (got nullptr)"); - seiDisplayOrientation->cancelFlag = false; - seiDisplayOrientation->horFlip = false; - seiDisplayOrientation->verFlip = false; - seiDisplayOrientation->anticlockwiseRotation = m_pcCfg->getDisplayOrientationSEIAngle(); + seiEquirectangularProjection->m_erpCancelFlag = m_pcCfg->getErpSEICancelFlag(); + if (!seiEquirectangularProjection->m_erpCancelFlag) + { + seiEquirectangularProjection->m_erpPersistenceFlag = m_pcCfg->getErpSEIPersistenceFlag(); + seiEquirectangularProjection->m_erpGuardBandFlag = m_pcCfg->getErpSEIGuardBandFlag(); + if (seiEquirectangularProjection->m_erpGuardBandFlag == 1) + { + seiEquirectangularProjection->m_erpGuardBandType = m_pcCfg->getErpSEIGuardBandType(); + seiEquirectangularProjection->m_erpLeftGuardBandWidth = m_pcCfg->getErpSEILeftGuardBandWidth(); + seiEquirectangularProjection->m_erpRightGuardBandWidth = m_pcCfg->getErpSEIRightGuardBandWidth(); + } + } } -void SEIEncoder::initSEIToneMappingInfo(SEIToneMappingInfo *seiToneMappingInfo) +void SEIEncoder::initSEISphereRotation(SEISphereRotation* seiSphereRotation) { - CHECK(!(m_isInitialized), "Unspecified error"); - CHECK(!(seiToneMappingInfo!=NULL), "Unspecified error"); + CHECK(!(m_isInitialized), "seiSphereRotation already initialized"); + CHECK(!(seiSphereRotation != nullptr), "Need a seiSphereRotation for initialization (got nullptr)"); - seiToneMappingInfo->m_toneMapId = m_pcCfg->getTMISEIToneMapId(); - seiToneMappingInfo->m_toneMapCancelFlag = m_pcCfg->getTMISEIToneMapCancelFlag(); - seiToneMappingInfo->m_toneMapPersistenceFlag = m_pcCfg->getTMISEIToneMapPersistenceFlag(); + seiSphereRotation->m_sphereRotationCancelFlag = m_pcCfg->getSphereRotationSEICancelFlag(); + if ( !seiSphereRotation->m_sphereRotationCancelFlag ) + { + seiSphereRotation->m_sphereRotationPersistenceFlag = m_pcCfg->getSphereRotationSEIPersistenceFlag(); + seiSphereRotation->m_sphereRotationYaw = m_pcCfg->getSphereRotationSEIYaw(); + seiSphereRotation->m_sphereRotationPitch = m_pcCfg->getSphereRotationSEIPitch(); + seiSphereRotation->m_sphereRotationRoll = m_pcCfg->getSphereRotationSEIRoll(); + } +} - seiToneMappingInfo->m_codedDataBitDepth = m_pcCfg->getTMISEICodedDataBitDepth(); - CHECK(!(seiToneMappingInfo->m_codedDataBitDepth >= 8 && seiToneMappingInfo->m_codedDataBitDepth <= 14), "Unspecified error"); - seiToneMappingInfo->m_targetBitDepth = m_pcCfg->getTMISEITargetBitDepth(); - CHECK(!(seiToneMappingInfo->m_targetBitDepth >= 1 && seiToneMappingInfo->m_targetBitDepth <= 17), "Unspecified error"); - seiToneMappingInfo->m_modelId = m_pcCfg->getTMISEIModelID(); - CHECK(!(seiToneMappingInfo->m_modelId >=0 &&seiToneMappingInfo->m_modelId<=4), "Unspecified error"); +void SEIEncoder::initSEIOmniViewport(SEIOmniViewport* seiOmniViewport) +{ + CHECK(!(m_isInitialized), "seiOmniViewport already initialized"); + CHECK(!(seiOmniViewport != nullptr), "Need a seiOmniViewport for initialization (got nullptr)"); - switch( seiToneMappingInfo->m_modelId) + seiOmniViewport->m_omniViewportId = m_pcCfg->getOmniViewportSEIId(); + seiOmniViewport->m_omniViewportCancelFlag = m_pcCfg->getOmniViewportSEICancelFlag(); + if ( !seiOmniViewport->m_omniViewportCancelFlag ) { - case 0: - { - seiToneMappingInfo->m_minValue = m_pcCfg->getTMISEIMinValue(); - seiToneMappingInfo->m_maxValue = m_pcCfg->getTMISEIMaxValue(); - break; - } - case 1: + seiOmniViewport->m_omniViewportPersistenceFlag = m_pcCfg->getOmniViewportSEIPersistenceFlag(); + seiOmniViewport->m_omniViewportCntMinus1 = m_pcCfg->getOmniViewportSEICntMinus1(); + + seiOmniViewport->m_omniViewportRegions.resize(seiOmniViewport->m_omniViewportCntMinus1+1); + for (uint32_t i = 0; i <= seiOmniViewport->m_omniViewportCntMinus1; i++) { - seiToneMappingInfo->m_sigmoidMidpoint = m_pcCfg->getTMISEISigmoidMidpoint(); - seiToneMappingInfo->m_sigmoidWidth = m_pcCfg->getTMISEISigmoidWidth(); - break; + SEIOmniViewport::OmniViewport &viewport = seiOmniViewport->m_omniViewportRegions[i]; + viewport.azimuthCentre = m_pcCfg->getOmniViewportSEIAzimuthCentre(i); + viewport.elevationCentre = m_pcCfg->getOmniViewportSEIElevationCentre(i); + viewport.tiltCentre = m_pcCfg->getOmniViewportSEITiltCentre(i); + viewport.horRange = m_pcCfg->getOmniViewportSEIHorRange(i); + viewport.verRange = m_pcCfg->getOmniViewportSEIVerRange(i); } - case 2: + } +} + +void SEIEncoder::initSEIRegionWisePacking(SEIRegionWisePacking *seiRegionWisePacking) +{ + CHECK(!(m_isInitialized), "seiRegionWisePacking already initialized"); + CHECK(!(seiRegionWisePacking != nullptr), "Need a seiRegionWisePacking for initialization (got nullptr)"); + + seiRegionWisePacking->m_rwpCancelFlag = m_pcCfg->getRwpSEIRwpCancelFlag(); + seiRegionWisePacking->m_rwpPersistenceFlag = m_pcCfg->getRwpSEIRwpPersistenceFlag(); + seiRegionWisePacking->m_constituentPictureMatchingFlag = m_pcCfg->getRwpSEIConstituentPictureMatchingFlag(); + seiRegionWisePacking->m_numPackedRegions = m_pcCfg->getRwpSEINumPackedRegions(); + seiRegionWisePacking->m_projPictureWidth = m_pcCfg->getRwpSEIProjPictureWidth(); + seiRegionWisePacking->m_projPictureHeight = m_pcCfg->getRwpSEIProjPictureHeight(); + seiRegionWisePacking->m_packedPictureWidth = m_pcCfg->getRwpSEIPackedPictureWidth(); + seiRegionWisePacking->m_packedPictureHeight = m_pcCfg->getRwpSEIPackedPictureHeight(); + seiRegionWisePacking->m_rwpTransformType.resize(seiRegionWisePacking->m_numPackedRegions); + seiRegionWisePacking->m_rwpGuardBandFlag.resize(seiRegionWisePacking->m_numPackedRegions); + seiRegionWisePacking->m_projRegionWidth.resize(seiRegionWisePacking->m_numPackedRegions); + seiRegionWisePacking->m_projRegionHeight.resize(seiRegionWisePacking->m_numPackedRegions); + seiRegionWisePacking->m_rwpProjRegionTop.resize(seiRegionWisePacking->m_numPackedRegions); + seiRegionWisePacking->m_projRegionLeft.resize(seiRegionWisePacking->m_numPackedRegions); + seiRegionWisePacking->m_packedRegionWidth.resize(seiRegionWisePacking->m_numPackedRegions); + seiRegionWisePacking->m_packedRegionHeight.resize(seiRegionWisePacking->m_numPackedRegions); + seiRegionWisePacking->m_packedRegionTop.resize(seiRegionWisePacking->m_numPackedRegions); + seiRegionWisePacking->m_packedRegionLeft.resize(seiRegionWisePacking->m_numPackedRegions); + seiRegionWisePacking->m_rwpLeftGuardBandWidth.resize(seiRegionWisePacking->m_numPackedRegions); + seiRegionWisePacking->m_rwpRightGuardBandWidth.resize(seiRegionWisePacking->m_numPackedRegions); + seiRegionWisePacking->m_rwpTopGuardBandHeight.resize(seiRegionWisePacking->m_numPackedRegions); + seiRegionWisePacking->m_rwpBottomGuardBandHeight.resize(seiRegionWisePacking->m_numPackedRegions); + seiRegionWisePacking->m_rwpGuardBandNotUsedForPredFlag.resize(seiRegionWisePacking->m_numPackedRegions); + seiRegionWisePacking->m_rwpGuardBandType.resize(4*seiRegionWisePacking->m_numPackedRegions); + for( int i=0; i < seiRegionWisePacking->m_numPackedRegions; i++ ) + { + seiRegionWisePacking->m_rwpTransformType[i] = m_pcCfg->getRwpSEIRwpTransformType(i); + seiRegionWisePacking->m_rwpGuardBandFlag[i] = m_pcCfg->getRwpSEIRwpGuardBandFlag(i); + seiRegionWisePacking->m_projRegionWidth[i] = m_pcCfg->getRwpSEIProjRegionWidth(i); + seiRegionWisePacking->m_projRegionHeight[i] = m_pcCfg->getRwpSEIProjRegionHeight(i); + seiRegionWisePacking->m_rwpProjRegionTop[i] = m_pcCfg->getRwpSEIRwpSEIProjRegionTop(i); + seiRegionWisePacking->m_projRegionLeft[i] = m_pcCfg->getRwpSEIProjRegionLeft(i); + seiRegionWisePacking->m_packedRegionWidth[i] = m_pcCfg->getRwpSEIPackedRegionWidth(i); + seiRegionWisePacking->m_packedRegionHeight[i] = m_pcCfg->getRwpSEIPackedRegionHeight(i); + seiRegionWisePacking->m_packedRegionTop[i] = m_pcCfg->getRwpSEIPackedRegionTop(i); + seiRegionWisePacking->m_packedRegionLeft[i] = m_pcCfg->getRwpSEIPackedRegionLeft(i); + if( seiRegionWisePacking->m_rwpGuardBandFlag[i] ) { - uint32_t num = 1u<<(seiToneMappingInfo->m_targetBitDepth); - seiToneMappingInfo->m_startOfCodedInterval.resize(num); - int* ptmp = m_pcCfg->getTMISEIStartOfCodedInterva(); - if(ptmp) + seiRegionWisePacking->m_rwpLeftGuardBandWidth[i] = m_pcCfg->getRwpSEIRwpLeftGuardBandWidth(i); + seiRegionWisePacking->m_rwpRightGuardBandWidth[i] = m_pcCfg->getRwpSEIRwpRightGuardBandWidth(i); + seiRegionWisePacking->m_rwpTopGuardBandHeight[i] = m_pcCfg->getRwpSEIRwpTopGuardBandHeight(i); + seiRegionWisePacking->m_rwpBottomGuardBandHeight[i] = m_pcCfg->getRwpSEIRwpBottomGuardBandHeight(i); + seiRegionWisePacking->m_rwpGuardBandNotUsedForPredFlag[i] = m_pcCfg->getRwpSEIRwpGuardBandNotUsedForPredFlag(i); + for( int j=0; j < 4; j++ ) { - for(int i=0; i<num;i++) - { - seiToneMappingInfo->m_startOfCodedInterval[i] = ptmp[i]; - } + seiRegionWisePacking->m_rwpGuardBandType[i*4 + j] = m_pcCfg->getRwpSEIRwpGuardBandType(i*4 + j); } - break; } - case 3: + } +} + +void SEIEncoder::initSEIGcmp(SEIGeneralizedCubemapProjection* seiGeneralizedCubemapProjection) +{ + CHECK(!(m_isInitialized), "seiGeneralizedCubemapProjection already initialized"); + CHECK(!(seiGeneralizedCubemapProjection != nullptr), "Need a seiGeneralizedCubemapProjection for initialization (got nullptr)"); + + seiGeneralizedCubemapProjection->m_gcmpCancelFlag = m_pcCfg->getGcmpSEICancelFlag(); + if (!seiGeneralizedCubemapProjection->m_gcmpCancelFlag) + { + seiGeneralizedCubemapProjection->m_gcmpPersistenceFlag = m_pcCfg->getGcmpSEIPersistenceFlag(); + seiGeneralizedCubemapProjection->m_gcmpPackingType = m_pcCfg->getGcmpSEIPackingType(); + seiGeneralizedCubemapProjection->m_gcmpMappingFunctionType = m_pcCfg->getGcmpSEIMappingFunctionType(); + + int numFace = seiGeneralizedCubemapProjection->m_gcmpPackingType == 4 || seiGeneralizedCubemapProjection->m_gcmpPackingType == 5 ? 5 : 6; + seiGeneralizedCubemapProjection->m_gcmpFaceIndex.resize(numFace); + seiGeneralizedCubemapProjection->m_gcmpFaceRotation.resize(numFace); + if (seiGeneralizedCubemapProjection->m_gcmpMappingFunctionType == 2) { - seiToneMappingInfo->m_numPivots = m_pcCfg->getTMISEINumPivots(); - seiToneMappingInfo->m_codedPivotValue.resize(seiToneMappingInfo->m_numPivots); - seiToneMappingInfo->m_targetPivotValue.resize(seiToneMappingInfo->m_numPivots); - int* ptmpcoded = m_pcCfg->getTMISEICodedPivotValue(); - int* ptmptarget = m_pcCfg->getTMISEITargetPivotValue(); - if(ptmpcoded&&ptmptarget) - { - for(int i=0; i<(seiToneMappingInfo->m_numPivots);i++) - { - seiToneMappingInfo->m_codedPivotValue[i]=ptmpcoded[i]; - seiToneMappingInfo->m_targetPivotValue[i]=ptmptarget[i]; - } - } - break; + seiGeneralizedCubemapProjection->m_gcmpFunctionCoeffU.resize(numFace); + seiGeneralizedCubemapProjection->m_gcmpFunctionUAffectedByVFlag.resize(numFace); + seiGeneralizedCubemapProjection->m_gcmpFunctionCoeffV.resize(numFace); + seiGeneralizedCubemapProjection->m_gcmpFunctionVAffectedByUFlag.resize(numFace); } - case 4: + for (int i = 0; i < numFace; i++) { - seiToneMappingInfo->m_cameraIsoSpeedIdc = m_pcCfg->getTMISEICameraIsoSpeedIdc(); - seiToneMappingInfo->m_cameraIsoSpeedValue = m_pcCfg->getTMISEICameraIsoSpeedValue(); - CHECK(!( seiToneMappingInfo->m_cameraIsoSpeedValue !=0 ), "Unspecified error"); - seiToneMappingInfo->m_exposureIndexIdc = m_pcCfg->getTMISEIExposurIndexIdc(); - seiToneMappingInfo->m_exposureIndexValue = m_pcCfg->getTMISEIExposurIndexValue(); - CHECK(!( seiToneMappingInfo->m_exposureIndexValue !=0 ), "Unspecified error"); - seiToneMappingInfo->m_exposureCompensationValueSignFlag = m_pcCfg->getTMISEIExposureCompensationValueSignFlag(); - seiToneMappingInfo->m_exposureCompensationValueNumerator = m_pcCfg->getTMISEIExposureCompensationValueNumerator(); - seiToneMappingInfo->m_exposureCompensationValueDenomIdc = m_pcCfg->getTMISEIExposureCompensationValueDenomIdc(); - seiToneMappingInfo->m_refScreenLuminanceWhite = m_pcCfg->getTMISEIRefScreenLuminanceWhite(); - seiToneMappingInfo->m_extendedRangeWhiteLevel = m_pcCfg->getTMISEIExtendedRangeWhiteLevel(); - CHECK(!( seiToneMappingInfo->m_extendedRangeWhiteLevel >= 100 ), "Unspecified error"); - seiToneMappingInfo->m_nominalBlackLevelLumaCodeValue = m_pcCfg->getTMISEINominalBlackLevelLumaCodeValue(); - seiToneMappingInfo->m_nominalWhiteLevelLumaCodeValue = m_pcCfg->getTMISEINominalWhiteLevelLumaCodeValue(); - CHECK(!( seiToneMappingInfo->m_nominalWhiteLevelLumaCodeValue > seiToneMappingInfo->m_nominalBlackLevelLumaCodeValue ), "Unspecified error"); - seiToneMappingInfo->m_extendedWhiteLevelLumaCodeValue = m_pcCfg->getTMISEIExtendedWhiteLevelLumaCodeValue(); - CHECK(!( seiToneMappingInfo->m_extendedWhiteLevelLumaCodeValue >= seiToneMappingInfo->m_nominalWhiteLevelLumaCodeValue ), "Unspecified error"); - break; + seiGeneralizedCubemapProjection->m_gcmpFaceIndex[i] = m_pcCfg->getGcmpSEIFaceIndex(i); + seiGeneralizedCubemapProjection->m_gcmpFaceRotation[i] = m_pcCfg->getGcmpSEIFaceRotation(i); + if (seiGeneralizedCubemapProjection->m_gcmpMappingFunctionType == 2) + { + seiGeneralizedCubemapProjection->m_gcmpFunctionCoeffU[i] = std::max<uint8_t>(1, (uint8_t)(128.0 * m_pcCfg->getGcmpSEIFunctionCoeffU(i) + 0.5)) - 1; + seiGeneralizedCubemapProjection->m_gcmpFunctionUAffectedByVFlag[i] = m_pcCfg->getGcmpSEIFunctionUAffectedByVFlag(i); + seiGeneralizedCubemapProjection->m_gcmpFunctionCoeffV[i] = std::max<uint8_t>(1, (uint8_t)(128.0 * m_pcCfg->getGcmpSEIFunctionCoeffV(i) + 0.5)) - 1; + seiGeneralizedCubemapProjection->m_gcmpFunctionVAffectedByUFlag[i] = m_pcCfg->getGcmpSEIFunctionVAffectedByUFlag(i); + } } - default: + + seiGeneralizedCubemapProjection->m_gcmpGuardBandFlag = m_pcCfg->getGcmpSEIGuardBandFlag(); + if (seiGeneralizedCubemapProjection->m_gcmpGuardBandFlag) { - CHECK(!(!"Undefined SEIToneMapModelId"), "Unspecified error"); - break; + seiGeneralizedCubemapProjection->m_gcmpGuardBandBoundaryType = m_pcCfg->getGcmpSEIGuardBandBoundaryType(); + seiGeneralizedCubemapProjection->m_gcmpGuardBandSamplesMinus1 = m_pcCfg->getGcmpSEIGuardBandSamplesMinus1(); } } } -void SEIEncoder::initSEISOPDescription(SEISOPDescription *sopDescriptionSEI, Slice *slice, int picInGOP, int lastIdr, int currGOPSize) +void SEIEncoder::initSEISampleAspectRatioInfo(SEISampleAspectRatioInfo* seiSampleAspectRatioInfo) { - CHECK(!(m_isInitialized), "Unspecified error"); - CHECK(!(sopDescriptionSEI != NULL), "Unspecified error"); - CHECK(!(slice != NULL), "Unspecified error"); - - int sopCurrPOC = slice->getPOC(); - sopDescriptionSEI->m_sopSeqParameterSetId = slice->getSPS()->getSPSId(); + CHECK(!(m_isInitialized), "seiSampleAspectRatioInfo already initialized"); + CHECK(!(seiSampleAspectRatioInfo != nullptr), "Need a seiSampleAspectRatioInfo for initialization (got nullptr)"); - int i = 0; - int prevEntryId = picInGOP; - for (int j = picInGOP; j < currGOPSize; j++) + seiSampleAspectRatioInfo->m_sariCancelFlag = m_pcCfg->getSariCancelFlag(); + if (!seiSampleAspectRatioInfo->m_sariCancelFlag) { - int deltaPOC = m_pcCfg->getGOPEntry(j).m_POC - m_pcCfg->getGOPEntry(prevEntryId).m_POC; - if ((sopCurrPOC + deltaPOC) < m_pcCfg->getFramesToBeEncoded()) + seiSampleAspectRatioInfo->m_sariPersistenceFlag = m_pcCfg->getSariPersistenceFlag(); + seiSampleAspectRatioInfo->m_sariAspectRatioIdc = m_pcCfg->getSariAspectRatioIdc(); + if (seiSampleAspectRatioInfo->m_sariAspectRatioIdc == 255) + { + seiSampleAspectRatioInfo->m_sariSarWidth = m_pcCfg->getSariSarWidth(); + seiSampleAspectRatioInfo->m_sariSarHeight = m_pcCfg->getSariSarHeight(); + } + else { - sopCurrPOC += deltaPOC; - sopDescriptionSEI->m_sopDescVclNaluType[i] = m_pcEncGOP->getNalUnitType(sopCurrPOC, lastIdr, slice->getPic()->fieldPic); - sopDescriptionSEI->m_sopDescTemporalId[i] = m_pcCfg->getGOPEntry(j).m_temporalId; - sopDescriptionSEI->m_sopDescStRpsIdx[i] = m_pcEncLib->getReferencePictureSetIdxForSOP(sopCurrPOC, j); - sopDescriptionSEI->m_sopDescPocDelta[i] = deltaPOC; - - prevEntryId = j; - i++; + seiSampleAspectRatioInfo->m_sariSarWidth = 0; + seiSampleAspectRatioInfo->m_sariSarHeight = 0; } } - - sopDescriptionSEI->m_numPicsInSopMinus1 = i - 1; } -void SEIEncoder::initSEIBufferingPeriod(SEIBufferingPeriod *bufferingPeriodSEI, Slice *slice) -{ - CHECK(!(m_isInitialized), "Unspecified error"); - CHECK(!(bufferingPeriodSEI != NULL), "Unspecified error"); - CHECK(!(slice != NULL), "Unspecified error"); - - uint32_t uiInitialCpbRemovalDelay = (90000/2); // 0.5 sec - bufferingPeriodSEI->m_initialCpbRemovalDelay [0][0] = uiInitialCpbRemovalDelay; - bufferingPeriodSEI->m_initialCpbRemovalDelayOffset[0][0] = uiInitialCpbRemovalDelay; - bufferingPeriodSEI->m_initialCpbRemovalDelay [0][1] = uiInitialCpbRemovalDelay; - bufferingPeriodSEI->m_initialCpbRemovalDelayOffset[0][1] = uiInitialCpbRemovalDelay; - - double dTmp = (double)slice->getSPS()->getVuiParameters()->getTimingInfo()->getNumUnitsInTick() / (double)slice->getSPS()->getVuiParameters()->getTimingInfo()->getTimeScale(); - - uint32_t uiTmp = (uint32_t)( dTmp * 90000.0 ); - uiInitialCpbRemovalDelay -= uiTmp; - uiInitialCpbRemovalDelay -= uiTmp / ( slice->getSPS()->getVuiParameters()->getHrdParameters()->getTickDivisorMinus2() + 2 ); - bufferingPeriodSEI->m_initialAltCpbRemovalDelay [0][0] = uiInitialCpbRemovalDelay; - bufferingPeriodSEI->m_initialAltCpbRemovalDelayOffset[0][0] = uiInitialCpbRemovalDelay; - bufferingPeriodSEI->m_initialAltCpbRemovalDelay [0][1] = uiInitialCpbRemovalDelay; - bufferingPeriodSEI->m_initialAltCpbRemovalDelayOffset[0][1] = uiInitialCpbRemovalDelay; - - bufferingPeriodSEI->m_rapCpbParamsPresentFlag = 0; - //for the concatenation, it can be set to one during splicing. - bufferingPeriodSEI->m_concatenationFlag = 0; - //since the temporal layer HRD is not ready, we assumed it is fixed - bufferingPeriodSEI->m_auCpbRemovalDelayDelta = 1; - bufferingPeriodSEI->m_cpbDelayOffset = 0; - bufferingPeriodSEI->m_dpbDelayOffset = 0; -} +#if HEVC_SEI //! initialize scalable nesting SEI message. //! Note: The SEI message structures input into this function will become part of the scalable nesting SEI and will be //! automatically freed, when the nesting SEI is disposed. @@ -298,7 +405,7 @@ void SEIEncoder::initSEIRecoveryPoint(SEIRecoveryPoint *recoveryPointSEI, Slice recoveryPointSEI->m_exactMatchingFlag = ( slice->getPOC() == 0 ) ? (true) : (false); recoveryPointSEI->m_brokenLinkFlag = false; } - +#endif //! calculate hashes for entire reconstructed picture void SEIEncoder::initDecodedPictureHashSEI(SEIDecodedPictureHash *decodedPictureHashSEI, PelUnitBuf& pic, std::string &rHashString, const BitDepths &bitDepths) @@ -331,6 +438,13 @@ void SEIEncoder::initDecodedPictureHashSEI(SEIDecodedPictureHash *decodedPicture } } +void SEIEncoder::initSEIDependentRAPIndication(SEIDependentRAPIndication *seiDependentRAPIndication) +{ + CHECK(!(m_isInitialized), "Unspecified error"); + CHECK(!(seiDependentRAPIndication!=NULL), "Unspecified error"); +} + +#if HEVC_SEI void SEIEncoder::initTemporalLevel0IndexSEI(SEITemporalLevel0Index *temporalLevel0IndexSEI, Slice *slice) { CHECK(!(m_isInitialized), "Unspecified error"); @@ -350,14 +464,13 @@ void SEIEncoder::initTemporalLevel0IndexSEI(SEITemporalLevel0Index *temporalLeve temporalLevel0IndexSEI->rapIdx = m_rapIdx; } -#if HEVC_TILES_WPP void SEIEncoder::initSEITempMotionConstrainedTileSets (SEITempMotionConstrainedTileSets *sei, const PPS *pps) { CHECK(!(m_isInitialized), "Unspecified error"); CHECK(!(sei!=NULL), "Unspecified error"); CHECK(!(pps!=NULL), "Unspecified error"); - if(pps->getTilesEnabledFlag()) + if(!pps->getSingleTileInPicFlag()) { if (m_pcCfg->getMCTSEncConstraint()) { @@ -395,7 +508,6 @@ void SEIEncoder::initSEITempMotionConstrainedTileSets (SEITempMotionConstrainedT CHECK(!(!"Tile is not enabled"), "Unspecified error"); } } -#endif void SEIEncoder::initSEIKneeFunctionInfo(SEIKneeFunctionInfo *seiKneeFunctionInfo) { @@ -427,6 +539,7 @@ void SEIEncoder::initSEIKneeFunctionInfo(SEIKneeFunctionInfo *seiKneeFunctionInf } } } +#endif template <typename T> static void readTokenValue(T &returnedValue, /// value returned @@ -496,204 +609,117 @@ static void readTokenValueAndValidate(T &returnedValue, /// value ret } } -// bool version does not have maximum and minimum values. -static void readTokenValueAndValidate(bool &returnedValue, /// value returned - bool &failed, /// used and updated - std::istream &is, /// stream to read token from - const char *pToken) /// token string +#if U0033_ALTERNATIVE_TRANSFER_CHARACTERISTICS_SEI +void SEIEncoder::initSEIAlternativeTransferCharacteristics(SEIAlternativeTransferCharacteristics *seiAltTransCharacteristics) { - readTokenValue(returnedValue, failed, is, pToken); + CHECK(!(m_isInitialized), "Unspecified error"); + CHECK(!(seiAltTransCharacteristics!=NULL), "Unspecified error"); + // Set SEI message parameters read from command line options + seiAltTransCharacteristics->m_preferredTransferCharacteristics = m_pcCfg->getSEIPreferredTransferCharacteristics(); } - -bool SEIEncoder::initSEIColourRemappingInfo(SEIColourRemappingInfo* seiColourRemappingInfo, int currPOC) // returns true on success, false on failure. +#endif +void SEIEncoder::initSEIFilmGrainCharacteristics(SEIFilmGrainCharacteristics *seiFilmGrain) { CHECK(!(m_isInitialized), "Unspecified error"); - CHECK(!(seiColourRemappingInfo!=NULL), "Unspecified error"); - - // reading external Colour Remapping Information SEI message parameters from file - if( !m_pcCfg->getColourRemapInfoSEIFileRoot().empty()) + CHECK(!(seiFilmGrain != NULL), "Unspecified error"); + // Set SEI message parameters read from command line options + seiFilmGrain->m_filmGrainCharacteristicsCancelFlag = m_pcCfg->getFilmGrainCharactersticsSEICancelFlag(); + seiFilmGrain->m_filmGrainCharacteristicsPersistenceFlag = m_pcCfg->getFilmGrainCharactersticsSEIPersistenceFlag(); + seiFilmGrain->m_filmGrainModelId = m_pcCfg->getFilmGrainCharactersticsSEIModelID(); + seiFilmGrain->m_separateColourDescriptionPresentFlag = m_pcCfg->getFilmGrainCharactersticsSEISepColourDescPresent(); + seiFilmGrain->m_blendingModeId = m_pcCfg->getFilmGrainCharactersticsSEIBlendingModeID(); + seiFilmGrain->m_log2ScaleFactor = m_pcCfg->getFilmGrainCharactersticsSEILog2ScaleFactor(); + for (int i = 0; i < MAX_NUM_COMPONENT; i++) { - bool failed=false; - - // building the CRI file name with poc num in prefix "_poc.txt" - std::string colourRemapSEIFileWithPoc(m_pcCfg->getColourRemapInfoSEIFileRoot()); - { - std::stringstream suffix; - suffix << "_" << currPOC << ".txt"; - colourRemapSEIFileWithPoc+=suffix.str(); - } - - std::ifstream fic(colourRemapSEIFileWithPoc.c_str()); - if (!fic.good() || !fic.is_open()) - { - std::cerr << "No Colour Remapping Information SEI parameters file " << colourRemapSEIFileWithPoc << " for POC " << currPOC << std::endl; - return false; - } - - // TODO: identify and remove duplication with decoder parsing through abstraction. - - readTokenValueAndValidate(seiColourRemappingInfo->m_colourRemapId, failed, fic, "colour_remap_id", uint32_t(0), uint32_t(0x7fffffff) ); - readTokenValueAndValidate(seiColourRemappingInfo->m_colourRemapCancelFlag, failed, fic, "colour_remap_cancel_flag" ); - if( !seiColourRemappingInfo->m_colourRemapCancelFlag ) - { - readTokenValueAndValidate(seiColourRemappingInfo->m_colourRemapPersistenceFlag, failed, fic, "colour_remap_persistence_flag" ); - readTokenValueAndValidate(seiColourRemappingInfo->m_colourRemapVideoSignalInfoPresentFlag, failed, fic, "colour_remap_video_signal_info_present_flag"); - if( seiColourRemappingInfo->m_colourRemapVideoSignalInfoPresentFlag ) - { - readTokenValueAndValidate(seiColourRemappingInfo->m_colourRemapFullRangeFlag, failed, fic, "colour_remap_full_range_flag" ); - readTokenValueAndValidate(seiColourRemappingInfo->m_colourRemapPrimaries, failed, fic, "colour_remap_primaries", int(0), int(255) ); - readTokenValueAndValidate(seiColourRemappingInfo->m_colourRemapTransferFunction, failed, fic, "colour_remap_transfer_function", int(0), int(255) ); - readTokenValueAndValidate(seiColourRemappingInfo->m_colourRemapMatrixCoefficients, failed, fic, "colour_remap_matrix_coefficients", int(0), int(255) ); - } - readTokenValueAndValidate(seiColourRemappingInfo->m_colourRemapInputBitDepth, failed, fic, "colour_remap_input_bit_depth", int(8), int(16) ); - readTokenValueAndValidate(seiColourRemappingInfo->m_colourRemapBitDepth, failed, fic, "colour_remap_bit_depth", int(8), int(16) ); - - const int maximumInputValue = (1 << (((seiColourRemappingInfo->m_colourRemapInputBitDepth + 7) >> 3) << 3)) - 1; - const int maximumRemappedValue = (1 << (((seiColourRemappingInfo->m_colourRemapBitDepth + 7) >> 3) << 3)) - 1; - - for( int c=0 ; c<3 ; c++ ) - { - readTokenValueAndValidate(seiColourRemappingInfo->m_preLutNumValMinus1[c], failed, fic, "pre_lut_num_val_minus1[c]", int(0), int(32) ); - if( seiColourRemappingInfo->m_preLutNumValMinus1[c]>0 ) - { - seiColourRemappingInfo->m_preLut[c].resize(seiColourRemappingInfo->m_preLutNumValMinus1[c]+1); - for( int i=0 ; i<=seiColourRemappingInfo->m_preLutNumValMinus1[c] ; i++ ) - { - readTokenValueAndValidate(seiColourRemappingInfo->m_preLut[c][i].codedValue, failed, fic, "pre_lut_coded_value[c][i]", int(0), maximumInputValue ); - readTokenValueAndValidate(seiColourRemappingInfo->m_preLut[c][i].targetValue, failed, fic, "pre_lut_target_value[c][i]", int(0), maximumRemappedValue ); - } - } - } - readTokenValueAndValidate(seiColourRemappingInfo->m_colourRemapMatrixPresentFlag, failed, fic, "colour_remap_matrix_present_flag" ); - if( seiColourRemappingInfo->m_colourRemapMatrixPresentFlag ) - { - readTokenValueAndValidate(seiColourRemappingInfo->m_log2MatrixDenom, failed, fic, "log2_matrix_denom", int(0), int(15) ); - for( int c=0 ; c<3 ; c++ ) - { - for( int i=0 ; i<3 ; i++ ) - { - readTokenValueAndValidate(seiColourRemappingInfo->m_colourRemapCoeffs[c][i], failed, fic, "colour_remap_coeffs[c][i]", -32768, 32767 ); - } - } - } - for( int c=0 ; c<3 ; c++ ) - { - readTokenValueAndValidate(seiColourRemappingInfo->m_postLutNumValMinus1[c], failed, fic, "post_lut_num_val_minus1[c]", int(0), int(32) ); - if( seiColourRemappingInfo->m_postLutNumValMinus1[c]>0 ) - { - seiColourRemappingInfo->m_postLut[c].resize(seiColourRemappingInfo->m_postLutNumValMinus1[c]+1); - for( int i=0 ; i<=seiColourRemappingInfo->m_postLutNumValMinus1[c] ; i++ ) - { - readTokenValueAndValidate(seiColourRemappingInfo->m_postLut[c][i].codedValue, failed, fic, "post_lut_coded_value[c][i]", int(0), maximumRemappedValue ); - readTokenValueAndValidate(seiColourRemappingInfo->m_postLut[c][i].targetValue, failed, fic, "post_lut_target_value[c][i]", int(0), maximumRemappedValue ); - } - } - } - } + seiFilmGrain->m_compModel[i].presentFlag = m_pcCfg->getFGCSEICompModelPresent(i); + } +} - if( failed ) +void SEIEncoder::initSEIMasteringDisplayColourVolume(SEIMasteringDisplayColourVolume *seiMDCV) +{ + CHECK(!(m_isInitialized), "Unspecified error"); + CHECK(!(seiMDCV != NULL), "Unspecified error"); + // Set SEI message parameters read from command line options + for (int j = 0; j <= 1; j++) + { + for (int i = 0; i <= 2; i++) { - EXIT( "Error while reading Colour Remapping Information SEI parameters file '" << colourRemapSEIFileWithPoc << "'" ); + seiMDCV->values.primaries[i][j] = m_pcCfg->getMasteringDisplaySEI().primaries[i][j]; } + seiMDCV->values.whitePoint[j] = m_pcCfg->getMasteringDisplaySEI().whitePoint[j]; } - return true; + seiMDCV->values.maxLuminance = m_pcCfg->getMasteringDisplaySEI().maxLuminance; + seiMDCV->values.minLuminance = m_pcCfg->getMasteringDisplaySEI().minLuminance; } -void SEIEncoder::initSEIChromaResamplingFilterHint(SEIChromaResamplingFilterHint *seiChromaResamplingFilterHint, int iHorFilterIndex, int iVerFilterIndex) +void SEIEncoder::initSEIContentLightLevel(SEIContentLightLevelInfo *seiCLL) { CHECK(!(m_isInitialized), "Unspecified error"); - CHECK(!(seiChromaResamplingFilterHint!=NULL), "Unspecified error"); + CHECK(!(seiCLL != NULL), "Unspecified error"); + // Set SEI message parameters read from command line options + seiCLL->m_maxContentLightLevel = m_pcCfg->getCLLSEIMaxContentLightLevel(); + seiCLL->m_maxPicAverageLightLevel = m_pcCfg->getCLLSEIMaxPicAvgLightLevel(); +} - seiChromaResamplingFilterHint->m_verChromaFilterIdc = iVerFilterIndex; - seiChromaResamplingFilterHint->m_horChromaFilterIdc = iHorFilterIndex; - seiChromaResamplingFilterHint->m_verFilteringFieldProcessingFlag = 1; - seiChromaResamplingFilterHint->m_targetFormatIdc = 3; - seiChromaResamplingFilterHint->m_perfectReconstructionFlag = false; +void SEIEncoder::initSEIAmbientViewingEnvironment(SEIAmbientViewingEnvironment *seiAmbViewEnvironment) +{ + CHECK(!(m_isInitialized), "Unspecified error"); + CHECK(!(seiAmbViewEnvironment != NULL), "Unspecified error"); + // Set SEI message parameters read from command line options + seiAmbViewEnvironment->m_ambientIlluminance = m_pcCfg->getAmbientViewingEnvironmentSEIIlluminance(); + seiAmbViewEnvironment->m_ambientLightX = m_pcCfg->getAmbientViewingEnvironmentSEIAmbientLightX(); + seiAmbViewEnvironment->m_ambientLightY = m_pcCfg->getAmbientViewingEnvironmentSEIAmbientLightY(); +} - // this creates some example filter values, if explicit filter definition is selected - if (seiChromaResamplingFilterHint->m_verChromaFilterIdc == 1) +void SEIEncoder::initSEIContentColourVolume(SEIContentColourVolume *seiContentColourVolume) +{ + assert(m_isInitialized); + assert(seiContentColourVolume != NULL); + seiContentColourVolume->m_ccvCancelFlag = m_pcCfg->getCcvSEICancelFlag(); + seiContentColourVolume->m_ccvPersistenceFlag = m_pcCfg->getCcvSEIPersistenceFlag(); + + seiContentColourVolume->m_ccvPrimariesPresentFlag = m_pcCfg->getCcvSEIPrimariesPresentFlag(); + seiContentColourVolume->m_ccvMinLuminanceValuePresentFlag = m_pcCfg->getCcvSEIMinLuminanceValuePresentFlag(); + seiContentColourVolume->m_ccvMaxLuminanceValuePresentFlag = m_pcCfg->getCcvSEIMaxLuminanceValuePresentFlag(); + seiContentColourVolume->m_ccvAvgLuminanceValuePresentFlag = m_pcCfg->getCcvSEIAvgLuminanceValuePresentFlag(); + + // Currently we are using a floor operation for setting up the "integer" values for this SEI. + // This applies to both primaries and luminance limits. + if (seiContentColourVolume->m_ccvPrimariesPresentFlag == true) { - const int numVerticalFilters = 3; - const int verTapLengthMinus1[] = {5,3,3}; - - seiChromaResamplingFilterHint->m_verFilterCoeff.resize(numVerticalFilters); - for(int i = 0; i < numVerticalFilters; i ++) + for (int i = 0; i < MAX_NUM_COMPONENT; i++) { - seiChromaResamplingFilterHint->m_verFilterCoeff[i].resize(verTapLengthMinus1[i]+1); + seiContentColourVolume->m_ccvPrimariesX[i] = (int32_t)(50000.0 * m_pcCfg->getCcvSEIPrimariesX(i)); + seiContentColourVolume->m_ccvPrimariesY[i] = (int32_t)(50000.0 * m_pcCfg->getCcvSEIPrimariesY(i)); } - // Note: C++11 -> seiChromaResamplingFilterHint->m_verFilterCoeff[0] = {-3,13,31,23,3,-3}; - seiChromaResamplingFilterHint->m_verFilterCoeff[0][0] = -3; - seiChromaResamplingFilterHint->m_verFilterCoeff[0][1] = 13; - seiChromaResamplingFilterHint->m_verFilterCoeff[0][2] = 31; - seiChromaResamplingFilterHint->m_verFilterCoeff[0][3] = 23; - seiChromaResamplingFilterHint->m_verFilterCoeff[0][4] = 3; - seiChromaResamplingFilterHint->m_verFilterCoeff[0][5] = -3; - - seiChromaResamplingFilterHint->m_verFilterCoeff[1][0] = -1; - seiChromaResamplingFilterHint->m_verFilterCoeff[1][1] = 25; - seiChromaResamplingFilterHint->m_verFilterCoeff[1][2] = 247; - seiChromaResamplingFilterHint->m_verFilterCoeff[1][3] = -15; - - seiChromaResamplingFilterHint->m_verFilterCoeff[2][0] = -20; - seiChromaResamplingFilterHint->m_verFilterCoeff[2][1] = 186; - seiChromaResamplingFilterHint->m_verFilterCoeff[2][2] = 100; - seiChromaResamplingFilterHint->m_verFilterCoeff[2][3] = -10; - } - else - { - seiChromaResamplingFilterHint->m_verFilterCoeff.resize(0); } - if (seiChromaResamplingFilterHint->m_horChromaFilterIdc == 1) + if (seiContentColourVolume->m_ccvMinLuminanceValuePresentFlag == true) { - int const numHorizontalFilters = 1; - const int horTapLengthMinus1[] = {3}; - - seiChromaResamplingFilterHint->m_horFilterCoeff.resize(numHorizontalFilters); - for(int i = 0; i < numHorizontalFilters; i ++) - { - seiChromaResamplingFilterHint->m_horFilterCoeff[i].resize(horTapLengthMinus1[i]+1); - } - seiChromaResamplingFilterHint->m_horFilterCoeff[0][0] = 1; - seiChromaResamplingFilterHint->m_horFilterCoeff[0][1] = 6; - seiChromaResamplingFilterHint->m_horFilterCoeff[0][2] = 1; + seiContentColourVolume->m_ccvMinLuminanceValue = (uint32_t)(10000000 * m_pcCfg->getCcvSEIMinLuminanceValue()); } - else + if (seiContentColourVolume->m_ccvMaxLuminanceValuePresentFlag == true) { - seiChromaResamplingFilterHint->m_horFilterCoeff.resize(0); + seiContentColourVolume->m_ccvMaxLuminanceValue = (uint32_t)(10000000 * m_pcCfg->getCcvSEIMaxLuminanceValue()); } -} - -void SEIEncoder::initSEITimeCode(SEITimeCode *seiTimeCode) -{ - CHECK(!(m_isInitialized), "Unspecified error"); - CHECK(!(seiTimeCode!=NULL), "Unspecified error"); - // Set data as per command line options - seiTimeCode->numClockTs = m_pcCfg->getNumberOfTimesets(); - for(int i = 0; i < seiTimeCode->numClockTs; i++) + if (seiContentColourVolume->m_ccvAvgLuminanceValuePresentFlag == true) { - seiTimeCode->timeSetArray[i] = m_pcCfg->getTimeSet(i); + seiContentColourVolume->m_ccvAvgLuminanceValue = (uint32_t)(10000000 * m_pcCfg->getCcvSEIAvgLuminanceValue()); } } - -#if U0033_ALTERNATIVE_TRANSFER_CHARACTERISTICS_SEI -void SEIEncoder::initSEIAlternativeTransferCharacteristics(SEIAlternativeTransferCharacteristics *seiAltTransCharacteristics) -{ - CHECK(!(m_isInitialized), "Unspecified error"); - CHECK(!(seiAltTransCharacteristics!=NULL), "Unspecified error"); - // Set SEI message parameters read from command line options - seiAltTransCharacteristics->m_preferredTransferCharacteristics = m_pcCfg->getSEIPreferredTransferCharacteristics(); -} -#endif - -void SEIEncoder::initSEIGreenMetadataInfo(SEIGreenMetadataInfo *seiGreenMetadataInfo, uint32_t u) +void SEIEncoder::initSEISubpictureLevelInfo(SEISubpicureLevelInfo *sei, const SPS *sps) { - CHECK(!(m_isInitialized), "Unspecified error"); - CHECK(!(seiGreenMetadataInfo!=NULL), "Unspecified error"); - - seiGreenMetadataInfo->m_greenMetadataType = m_pcCfg->getSEIGreenMetadataType(); - seiGreenMetadataInfo->m_xsdMetricType = m_pcCfg->getSEIXSDMetricType(); - seiGreenMetadataInfo->m_xsdMetricValue = u; + // subpicture level information should be specified via config file + // unfortunately the implementation of subpictures is still not available + // TODO: implement config file parameters and intialization + fprintf(stderr, "SEISubpicureLevelInfo depends on subpictures! Initializing to dummy values!\n"); + + sei->m_sliSeqParameterSetId = sps->getSPSId(); + sei->m_numRefLevels = 2; + sei->m_refLevelIdc.resize(2); + sei->m_refLevelIdc[0] = Level::LEVEL4; + sei->m_refLevelIdc[1] = Level::LEVEL8_5; + sei->m_explicitFractionPresentFlag = false; } diff --git a/source/Lib/EncoderLib/SEIEncoder.h b/source/Lib/EncoderLib/SEIEncoder.h index b4058a56855213ff2ada2e3a9f26e02989420566..dcb9730f8aa1daea730d111b226447024a44a807 100644 --- a/source/Lib/EncoderLib/SEIEncoder.h +++ b/source/Lib/EncoderLib/SEIEncoder.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -52,9 +52,11 @@ public: :m_pcCfg(NULL) ,m_pcEncLib(NULL) ,m_pcEncGOP(NULL) +#if HEVC_SEI ,m_tl0Idx(0) ,m_rapIdx(0) - ,m_isInitialized(false) +#endif + ,m_isInitialized(false) {}; virtual ~SEIEncoder(){}; @@ -67,44 +69,40 @@ public: }; // leading SEIs -#if HEVC_VPS - void initSEIActiveParameterSets (SEIActiveParameterSets *sei, const VPS *vps, const SPS *sps); -#else - void initSEIActiveParameterSets (SEIActiveParameterSets *sei, const SPS *sps); -#endif void initSEIFramePacking(SEIFramePacking *sei, int currPicNum); - void initSEIDisplayOrientation(SEIDisplayOrientation *sei); - void initSEIToneMappingInfo(SEIToneMappingInfo *sei); - void initSEISOPDescription(SEISOPDescription *sei, Slice *slice, int picInGOP, int lastIdr, int currGOPSize); - void initSEIBufferingPeriod(SEIBufferingPeriod *sei, Slice *slice); - void initSEIScalableNesting(SEIScalableNesting *sei, SEIMessages &nestedSEIs); - void initSEIRecoveryPoint(SEIRecoveryPoint *sei, Slice *slice); - void initSEISegmentedRectFramePacking(SEISegmentedRectFramePacking *sei); -#if HEVC_TILES_WPP - void initSEITempMotionConstrainedTileSets (SEITempMotionConstrainedTileSets *sei, const PPS *pps); -#endif - void initSEIKneeFunctionInfo(SEIKneeFunctionInfo *sei); - void initSEIChromaResamplingFilterHint(SEIChromaResamplingFilterHint *sei, int iHorFilterIndex, int iVerFilterIndex); - void initSEITimeCode(SEITimeCode *sei); - bool initSEIColourRemappingInfo(SEIColourRemappingInfo *sei, int currPOC); // returns true on success, false on failure. + void initSEIDependentRAPIndication(SEIDependentRAPIndication *sei); + void initSEIBufferingPeriod(SEIBufferingPeriod *sei, bool noLeadingPictures); #if U0033_ALTERNATIVE_TRANSFER_CHARACTERISTICS_SEI void initSEIAlternativeTransferCharacteristics(SEIAlternativeTransferCharacteristics *sei); #endif - // trailing SEIs void initDecodedPictureHashSEI(SEIDecodedPictureHash *sei, PelUnitBuf& pic, std::string &rHashString, const BitDepths &bitDepths); +#if HEVC_SEI void initTemporalLevel0IndexSEI(SEITemporalLevel0Index *sei, Slice *slice); void initSEIGreenMetadataInfo(SEIGreenMetadataInfo *sei, uint32_t u); - +#endif + void initSEIErp(SEIEquirectangularProjection *sei); + void initSEISphereRotation(SEISphereRotation *sei); + void initSEIOmniViewport(SEIOmniViewport *sei); + void initSEIRegionWisePacking(SEIRegionWisePacking *sei); + void initSEIGcmp(SEIGeneralizedCubemapProjection *sei); + void initSEISubpictureLevelInfo(SEISubpicureLevelInfo *sei, const SPS *sps); + void initSEISampleAspectRatioInfo(SEISampleAspectRatioInfo *sei); + void initSEIFilmGrainCharacteristics(SEIFilmGrainCharacteristics *sei); + void initSEIMasteringDisplayColourVolume(SEIMasteringDisplayColourVolume *sei); + void initSEIContentLightLevel(SEIContentLightLevelInfo *sei); + void initSEIAmbientViewingEnvironment(SEIAmbientViewingEnvironment *sei); + void initSEIContentColourVolume(SEIContentColourVolume *sei); private: EncCfg* m_pcCfg; EncLib* m_pcEncLib; EncGOP* m_pcEncGOP; +#if HEVC_SEI // for temporal level 0 index SEI uint32_t m_tl0Idx; uint32_t m_rapIdx; - +#endif bool m_isInitialized; }; diff --git a/source/Lib/EncoderLib/SEIwrite.cpp b/source/Lib/EncoderLib/SEIwrite.cpp index f6d7c78adde76064015434952a8fa10074876164..67f80ebcd343b1ccc0213628b09f410f54269666 100644 --- a/source/Lib/EncoderLib/SEIwrite.cpp +++ b/source/Lib/EncoderLib/SEIwrite.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -41,85 +41,85 @@ //! \ingroup EncoderLib //! \{ -void SEIWriter::xWriteSEIpayloadData(OutputBitstream& bs, const SEI& sei, const SPS *sps) +void SEIWriter::xWriteSEIpayloadData(OutputBitstream& bs, const SEI& sei, const SPS *sps, HRD &hrd, const uint32_t temporalId) { + const SEIBufferingPeriod *bp = NULL; switch (sei.payloadType()) { case SEI::USER_DATA_UNREGISTERED: xWriteSEIuserDataUnregistered(*static_cast<const SEIuserDataUnregistered*>(&sei)); break; - case SEI::ACTIVE_PARAMETER_SETS: - xWriteSEIActiveParameterSets(*static_cast<const SEIActiveParameterSets*>(& sei)); - break; case SEI::DECODING_UNIT_INFO: - xWriteSEIDecodingUnitInfo(*static_cast<const SEIDecodingUnitInfo*>(& sei), sps); + bp = hrd.getBufferingPeriodSEI(); + CHECK (bp == nullptr, "Buffering Period need to be initialized in HRD to allow writing of Decoding Unit Information SEI"); + xWriteSEIDecodingUnitInfo(*static_cast<const SEIDecodingUnitInfo*>(& sei), *bp, temporalId); break; case SEI::DECODED_PICTURE_HASH: xWriteSEIDecodedPictureHash(*static_cast<const SEIDecodedPictureHash*>(&sei)); break; case SEI::BUFFERING_PERIOD: - xWriteSEIBufferingPeriod(*static_cast<const SEIBufferingPeriod*>(&sei), sps); + xWriteSEIBufferingPeriod(*static_cast<const SEIBufferingPeriod*>(&sei)); + hrd.setBufferingPeriodSEI(static_cast<const SEIBufferingPeriod*>(&sei)); break; case SEI::PICTURE_TIMING: - xWriteSEIPictureTiming(*static_cast<const SEIPictureTiming*>(&sei), sps); + { + bp = hrd.getBufferingPeriodSEI(); + CHECK (bp == nullptr, "Buffering Period need to be initialized in HRD to allow writing of Picture Timing SEI"); + xWriteSEIPictureTiming(*static_cast<const SEIPictureTiming*>(&sei), *bp, temporalId); + } + break; + case SEI::FRAME_FIELD_INFO: + xWriteSEIFrameFieldInfo(*static_cast<const SEIFrameFieldInfo*>(&sei)); break; - case SEI::RECOVERY_POINT: - xWriteSEIRecoveryPoint(*static_cast<const SEIRecoveryPoint*>(&sei)); + case SEI::DEPENDENT_RAP_INDICATION: + xWriteSEIDependentRAPIndication(*static_cast<const SEIDependentRAPIndication*>(&sei)); break; case SEI::FRAME_PACKING: xWriteSEIFramePacking(*static_cast<const SEIFramePacking*>(&sei)); break; - case SEI::SEGM_RECT_FRAME_PACKING: - xWriteSEISegmentedRectFramePacking(*static_cast<const SEISegmentedRectFramePacking*>(&sei)); - break; - case SEI::DISPLAY_ORIENTATION: - xWriteSEIDisplayOrientation(*static_cast<const SEIDisplayOrientation*>(&sei)); - break; - case SEI::TEMPORAL_LEVEL0_INDEX: - xWriteSEITemporalLevel0Index(*static_cast<const SEITemporalLevel0Index*>(&sei)); + case SEI::MASTERING_DISPLAY_COLOUR_VOLUME: + xWriteSEIMasteringDisplayColourVolume(*static_cast<const SEIMasteringDisplayColourVolume*>(&sei)); break; - case SEI::REGION_REFRESH_INFO: - xWriteSEIGradualDecodingRefreshInfo(*static_cast<const SEIGradualDecodingRefreshInfo*>(&sei)); +#if U0033_ALTERNATIVE_TRANSFER_CHARACTERISTICS_SEI + case SEI::ALTERNATIVE_TRANSFER_CHARACTERISTICS: + xWriteSEIAlternativeTransferCharacteristics(*static_cast<const SEIAlternativeTransferCharacteristics*>(&sei)); break; - case SEI::NO_DISPLAY: - xWriteSEINoDisplay(*static_cast<const SEINoDisplay*>(&sei)); +#endif + case SEI::EQUIRECTANGULAR_PROJECTION: + xWriteSEIEquirectangularProjection(*static_cast<const SEIEquirectangularProjection*>(&sei)); break; - case SEI::TONE_MAPPING_INFO: - xWriteSEIToneMappingInfo(*static_cast<const SEIToneMappingInfo*>(&sei)); + case SEI::SPHERE_ROTATION: + xWriteSEISphereRotation(*static_cast<const SEISphereRotation*>(&sei)); break; - case SEI::SOP_DESCRIPTION: - xWriteSEISOPDescription(*static_cast<const SEISOPDescription*>(&sei)); + case SEI::OMNI_VIEWPORT: + xWriteSEIOmniViewport(*static_cast<const SEIOmniViewport*>(&sei)); break; - case SEI::SCALABLE_NESTING: - xWriteSEIScalableNesting(bs, *static_cast<const SEIScalableNesting*>(&sei), sps); + case SEI::REGION_WISE_PACKING: + xWriteSEIRegionWisePacking(*static_cast<const SEIRegionWisePacking*>(&sei)); break; - case SEI::CHROMA_RESAMPLING_FILTER_HINT: - xWriteSEIChromaResamplingFilterHint(*static_cast<const SEIChromaResamplingFilterHint*>(&sei)); + case SEI::GENERALIZED_CUBEMAP_PROJECTION: + xWriteSEIGeneralizedCubemapProjection(*static_cast<const SEIGeneralizedCubemapProjection*>(&sei)); break; -#if HEVC_TILES_WPP - case SEI::TEMP_MOTION_CONSTRAINED_TILE_SETS: - xWriteSEITempMotionConstrainedTileSets(*static_cast<const SEITempMotionConstrainedTileSets*>(&sei)); + case SEI::USER_DATA_REGISTERED_ITU_T_T35: + xWriteSEIUserDataRegistered(*static_cast<const SEIUserDataRegistered*>(&sei)); break; -#endif - case SEI::TIME_CODE: - xWriteSEITimeCode(*static_cast<const SEITimeCode*>(&sei)); + case SEI::FILM_GRAIN_CHARACTERISTICS: + xWriteSEIFilmGrainCharacteristics(*static_cast<const SEIFilmGrainCharacteristics*>(&sei)); break; - case SEI::KNEE_FUNCTION_INFO: - xWriteSEIKneeFunctionInfo(*static_cast<const SEIKneeFunctionInfo*>(&sei)); + case SEI::CONTENT_LIGHT_LEVEL_INFO: + xWriteSEIContentLightLevelInfo(*static_cast<const SEIContentLightLevelInfo*>(&sei)); break; - case SEI::COLOUR_REMAPPING_INFO: - xWriteSEIColourRemappingInfo(*static_cast<const SEIColourRemappingInfo*>(&sei)); + case SEI::AMBIENT_VIEWING_ENVIRONMENT: + xWriteSEIAmbientViewingEnvironment(*static_cast<const SEIAmbientViewingEnvironment*>(&sei)); break; - case SEI::MASTERING_DISPLAY_COLOUR_VOLUME: - xWriteSEIMasteringDisplayColourVolume(*static_cast<const SEIMasteringDisplayColourVolume*>(&sei)); + case SEI::CONTENT_COLOUR_VOLUME: + xWriteSEIContentColourVolume(*static_cast<const SEIContentColourVolume*>(&sei)); break; -#if U0033_ALTERNATIVE_TRANSFER_CHARACTERISTICS_SEI - case SEI::ALTERNATIVE_TRANSFER_CHARACTERISTICS: - xWriteSEIAlternativeTransferCharacteristics(*static_cast<const SEIAlternativeTransferCharacteristics*>(&sei)); + case SEI::SUBPICTURE_LEVEL_INFO: + xWriteSEISubpictureLevelInfo(*static_cast<const SEISubpicureLevelInfo*>(&sei), sps); break; -#endif - case SEI::GREEN_METADATA: - xWriteSEIGreenMetadataInfo(*static_cast<const SEIGreenMetadataInfo*>(&sei)); + case SEI::SAMPLE_ASPECT_RATIO_INFO: + xWriteSEISampleAspectRatioInfo(*static_cast<const SEISampleAspectRatioInfo*>(&sei)); break; default: THROW("Trying to write unhandled SEI message"); @@ -131,7 +131,7 @@ void SEIWriter::xWriteSEIpayloadData(OutputBitstream& bs, const SEI& sei, const /** * marshal all SEI messages in provided list into one bitstream bs */ -void SEIWriter::writeSEImessages(OutputBitstream& bs, const SEIMessages &seiList, const SPS *sps, bool isNested) +void SEIWriter::writeSEImessages(OutputBitstream& bs, const SEIMessages &seiList, const SPS *sps, HRD &hrd, bool isNested, const uint32_t temporalId) { #if ENABLE_TRACING if (g_HLSTraceEnable) @@ -151,7 +151,7 @@ void SEIWriter::writeSEImessages(OutputBitstream& bs, const SEIMessages &seiList bool traceEnable = g_HLSTraceEnable; g_HLSTraceEnable = false; #endif - xWriteSEIpayloadData(bs_count, **sei, sps); + xWriteSEIpayloadData(bs_count, **sei, sps, hrd, temporalId); #if ENABLE_TRACING g_HLSTraceEnable = traceEnable; #endif @@ -179,7 +179,7 @@ void SEIWriter::writeSEImessages(OutputBitstream& bs, const SEIMessages &seiList xTraceSEIMessageType((*sei)->payloadType()); #endif - xWriteSEIpayloadData(bs, **sei, sps); + xWriteSEIpayloadData(bs_count, **sei, sps, hrd, temporalId); } if (!isNested) { @@ -229,11 +229,9 @@ void SEIWriter::xWriteSEIDecodedPictureHash(const SEIDecodedPictureHash& sei) } } +#if HEVC_SEI void SEIWriter::xWriteSEIActiveParameterSets(const SEIActiveParameterSets& sei) { -#if HEVC_VPS - WRITE_CODE(sei.activeVPSId, 4, "active_video_parameter_set_id"); -#endif WRITE_FLAG(sei.m_selfContainedCvsFlag, "self_contained_cvs_flag"); WRITE_FLAG(sei.m_noParameterSetUpdateFlag, "no_parameter_set_update_flag"); WRITE_UVLC(sei.numSpsIdsMinus1, "num_sps_ids_minus1"); @@ -242,107 +240,198 @@ void SEIWriter::xWriteSEIActiveParameterSets(const SEIActiveParameterSets& sei) for (int i = 0; i < sei.activeSeqParameterSetId.size(); i++) { - WRITE_UVLC(sei.activeSeqParameterSetId[i], "active_seq_parameter_set_id"); + WRITE_CODE( sei.activeSeqParameterSetId[i], 4, "active_seq_parameter_set_id" ); } } +#endif -void SEIWriter::xWriteSEIDecodingUnitInfo(const SEIDecodingUnitInfo& sei, const SPS *sps) +void SEIWriter::xWriteSEIDecodingUnitInfo(const SEIDecodingUnitInfo& sei, const SEIBufferingPeriod& bp, const uint32_t temporalId) { - const VUI *vui = sps->getVuiParameters(); WRITE_UVLC(sei.m_decodingUnitIdx, "decoding_unit_idx"); - if(vui->getHrdParameters()->getSubPicCpbParamsInPicTimingSEIFlag()) + if( !bp.m_decodingUnitCpbParamsInPicTimingSeiFlag ) { - WRITE_CODE( sei.m_duSptCpbRemovalDelay, (vui->getHrdParameters()->getDuCpbRemovalDelayLengthMinus1() + 1), "du_spt_cpb_removal_delay_increment"); + for( int i = temporalId; i < bp.m_bpMaxSubLayers - 1; i ++ ) + { + WRITE_FLAG( sei.m_duiSubLayerDelaysPresentFlag[i], "dui_sub_layer_delays_present_flag[i]" ); + if( sei.m_duiSubLayerDelaysPresentFlag[i] ) + WRITE_CODE( sei.m_duSptCpbRemovalDelayIncrement[i], bp.getDuCpbRemovalDelayIncrementLength(), "du_spt_cpb_removal_delay_increment[i]"); + } } WRITE_FLAG( sei.m_dpbOutputDuDelayPresentFlag, "dpb_output_du_delay_present_flag"); if(sei.m_dpbOutputDuDelayPresentFlag) { - WRITE_CODE(sei.m_picSptDpbOutputDuDelay, vui->getHrdParameters()->getDpbOutputDelayDuLengthMinus1() + 1, "pic_spt_dpb_output_du_delay"); + WRITE_CODE(sei.m_picSptDpbOutputDuDelay, bp.getDpbOutputDelayDuLength(), "pic_spt_dpb_output_du_delay"); } } -void SEIWriter::xWriteSEIBufferingPeriod(const SEIBufferingPeriod& sei, const SPS *sps) +void SEIWriter::xWriteSEIBufferingPeriod(const SEIBufferingPeriod& sei) { - int i, nalOrVcl; - const VUI *vui = sps->getVuiParameters(); - const HRD *hrd = vui->getHrdParameters(); + WRITE_FLAG( sei.m_bpNalCpbParamsPresentFlag, "bp_nal_hrd_parameters_present_flag"); + WRITE_FLAG( sei.m_bpVclCpbParamsPresentFlag, "bp_vcl_hrd_parameters_present_flag"); + CHECK(!sei.m_bpNalCpbParamsPresentFlag && !sei.m_bpVclCpbParamsPresentFlag, "bp_nal_hrd_parameters_present_flag and/or bp_vcl_hrd_parameters_present_flag must be true"); + CHECK (sei.m_initialCpbRemovalDelayLength < 1, "sei.m_initialCpbRemovalDelayLength must be > 0"); + WRITE_CODE( sei.m_initialCpbRemovalDelayLength - 1, 5, "initial_cpb_removal_delay_length_minus1" ); + CHECK (sei.m_cpbRemovalDelayLength < 1, "sei.m_cpbRemovalDelayLength must be > 0"); + WRITE_CODE( sei.m_cpbRemovalDelayLength - 1, 5, "cpb_removal_delay_length_minus1" ); + CHECK (sei.m_dpbOutputDelayLength < 1, "sei.m_dpbOutputDelayLength must be > 0"); + WRITE_CODE( sei.m_dpbOutputDelayLength - 1, 5, "dpb_output_delay_length_minus1" ); + WRITE_FLAG(sei.m_altCpbParamsPresentFlag, "alt_cpb_params_present_flag"); + WRITE_FLAG( sei.m_bpDecodingUnitHrdParamsPresentFlag, "bp_decoding_unit_hrd_params_present_flag" ); + if( sei.m_bpDecodingUnitHrdParamsPresentFlag ) + { + CHECK (sei.m_duCpbRemovalDelayIncrementLength < 1, "sei.m_duCpbRemovalDelayIncrementLength must be > 0"); + WRITE_CODE( sei.m_duCpbRemovalDelayIncrementLength - 1, 5, "du_cpb_removal_delay_increment_length_minus1" ); + CHECK (sei.m_dpbOutputDelayDuLength < 1, "sei.m_dpbOutputDelayDuLength must be > 0"); + WRITE_CODE( sei.m_dpbOutputDelayDuLength - 1, 5, "dpb_output_delay_du_length_minus1" ); + WRITE_FLAG( sei.m_decodingUnitCpbParamsInPicTimingSeiFlag, "decoding_unit_cpb_params_in_pic_timing_sei_flag" ); + } - WRITE_UVLC( sei.m_bpSeqParameterSetId, "bp_seq_parameter_set_id" ); - if( !hrd->getSubPicCpbParamsPresentFlag() ) + WRITE_FLAG( sei.m_concatenationFlag, "concatenation_flag"); + WRITE_FLAG( sei.m_additionalConcatenationInfoPresentFlag, "additional_concatenation_info_present_flag"); + if (sei.m_additionalConcatenationInfoPresentFlag) { - WRITE_FLAG( sei.m_rapCpbParamsPresentFlag, "irap_cpb_params_present_flag" ); + WRITE_CODE( sei.m_maxInitialRemovalDelayForConcatenation, sei.m_initialCpbRemovalDelayLength, "max_initial_removal_delay_for_concatenation" ); } - if( sei.m_rapCpbParamsPresentFlag ) + + CHECK (sei.m_auCpbRemovalDelayDelta < 1, "sei.m_auCpbRemovalDelayDelta must be > 0"); + WRITE_CODE( sei.m_auCpbRemovalDelayDelta - 1, sei.m_cpbRemovalDelayLength, "au_cpb_removal_delay_delta_minus1" ); + + WRITE_FLAG( sei.m_cpbRemovalDelayDeltasPresentFlag, "cpb_removal_delay_deltas_present_flag"); + if (sei.m_cpbRemovalDelayDeltasPresentFlag) { - WRITE_CODE( sei.m_cpbDelayOffset, hrd->getCpbRemovalDelayLengthMinus1() + 1, "cpb_delay_offset" ); - WRITE_CODE( sei.m_dpbDelayOffset, hrd->getDpbOutputDelayLengthMinus1() + 1, "dpb_delay_offset" ); + CHECK (sei.m_numCpbRemovalDelayDeltas < 1, "m_numCpbRemovalDelayDeltas must be > 0"); + WRITE_UVLC( sei.m_numCpbRemovalDelayDeltas - 1, "num_cpb_removal_delay_deltas_minus1" ); + for( int i = 0; i < sei.m_numCpbRemovalDelayDeltas; i ++ ) + { + WRITE_CODE( sei.m_cpbRemovalDelayDelta[i], sei.m_cpbRemovalDelayLength, "cpb_removal_delay_delta[i]" ); + } + CHECK (sei.m_bpMaxSubLayers < 1, "bp_max_sub_layers_minus1 must be > 0"); + WRITE_CODE( sei.m_bpMaxSubLayers - 1, 3, "bp_max_sub_layers_minus1" ); } - WRITE_FLAG( sei.m_concatenationFlag, "concatenation_flag"); - WRITE_CODE( sei.m_auCpbRemovalDelayDelta - 1, ( hrd->getCpbRemovalDelayLengthMinus1() + 1 ), "au_cpb_removal_delay_delta_minus1" ); - for( nalOrVcl = 0; nalOrVcl < 2; nalOrVcl ++ ) + CHECK (sei.m_bpCpbCnt < 1, "sei.m_bpCpbCnt must be > 0"); + WRITE_UVLC( sei.m_bpCpbCnt - 1, "bp_cpb_cnt_minus1"); + WRITE_FLAG(sei.m_sublayerInitialCpbRemovalDelayPresentFlag, "sublayer_initial_cpb_removal_delay_present_flag"); + for (int i = (sei.m_sublayerInitialCpbRemovalDelayPresentFlag ? 0 : sei.m_bpMaxSubLayers - 1); i < sei.m_bpMaxSubLayers; i++) { - if( ( ( nalOrVcl == 0 ) && ( hrd->getNalHrdParametersPresentFlag() ) ) || - ( ( nalOrVcl == 1 ) && ( hrd->getVclHrdParametersPresentFlag() ) ) ) + for( int nalOrVcl = 0; nalOrVcl < 2; nalOrVcl ++ ) { - for( i = 0; i < ( hrd->getCpbCntMinus1( 0 ) + 1 ); i ++ ) + if( ( ( nalOrVcl == 0 ) && ( sei.m_bpNalCpbParamsPresentFlag ) ) || + ( ( nalOrVcl == 1 ) && ( sei.m_bpVclCpbParamsPresentFlag ) ) ) { - WRITE_CODE( sei.m_initialCpbRemovalDelay[i][nalOrVcl],( hrd->getInitialCpbRemovalDelayLengthMinus1() + 1 ) , "initial_cpb_removal_delay" ); - WRITE_CODE( sei.m_initialCpbRemovalDelayOffset[i][nalOrVcl],( hrd->getInitialCpbRemovalDelayLengthMinus1() + 1 ), "initial_cpb_removal_delay_offset" ); - if( hrd->getSubPicCpbParamsPresentFlag() || sei.m_rapCpbParamsPresentFlag ) + for( int j = 0; j < sei.m_bpCpbCnt; j ++ ) { - WRITE_CODE( sei.m_initialAltCpbRemovalDelay[i][nalOrVcl], ( hrd->getInitialCpbRemovalDelayLengthMinus1() + 1 ) , "initial_alt_cpb_removal_delay" ); - WRITE_CODE( sei.m_initialAltCpbRemovalDelayOffset[i][nalOrVcl], ( hrd->getInitialCpbRemovalDelayLengthMinus1() + 1 ),"initial_alt_cpb_removal_delay_offset" ); + WRITE_CODE( sei.m_initialCpbRemovalDelay[j][i][nalOrVcl], sei.m_initialCpbRemovalDelayLength, "initial_cpb_removal_delay[j][i][nalOrVcl]" ); + WRITE_CODE( sei.m_initialCpbRemovalOffset[j][i][nalOrVcl], sei.m_initialCpbRemovalDelayLength, "initial_cpb_removal_delay_offset[j][i][nalOrVcl]" ); } } } } + if (sei.m_altCpbParamsPresentFlag) + { + WRITE_FLAG(sei.m_useAltCpbParamsFlag, "use_alt_cpb_params_flag"); + } + } -void SEIWriter::xWriteSEIPictureTiming(const SEIPictureTiming& sei, const SPS *sps) -{ - int i; - const VUI *vui = sps->getVuiParameters(); - const HRD *hrd = vui->getHrdParameters(); - if( vui->getFrameFieldInfoPresentFlag() ) +void SEIWriter::xWriteSEIPictureTiming(const SEIPictureTiming& sei, const SEIBufferingPeriod &bp, const uint32_t temporalId) +{ + + WRITE_CODE( sei.m_auCpbRemovalDelay[bp.m_bpMaxSubLayers - 1] - 1, bp.m_cpbRemovalDelayLength, "cpb_removal_delay_minus1[bp_max_sub_layers_minus1]" ); + if( bp.m_altCpbParamsPresentFlag ) { - WRITE_CODE( sei.m_picStruct, 4, "pic_struct" ); - WRITE_CODE( sei.m_sourceScanType, 2, "source_scan_type" ); - WRITE_FLAG( sei.m_duplicateFlag ? 1 : 0, "duplicate_flag" ); + WRITE_FLAG( sei.m_cpbAltTimingInfoPresentFlag, "cpb_alt_timing_info_present_flag" ); + if( sei.m_cpbAltTimingInfoPresentFlag ) + { + for( int i = 0; i < bp.m_bpCpbCnt; i++ ) + { + WRITE_CODE( sei.m_cpbAltInitialCpbRemovalDelayDelta[i], bp.m_initialCpbRemovalDelayLength, "cpb_alt_initial_cpb_removal_delay_delta[ i ]" ); + WRITE_CODE( sei.m_cpbAltInitialCpbRemovalOffsetDelta[i], bp.m_initialCpbRemovalDelayLength, "cpb_alt_initial_cpb_removal_offset_delta[ i ]" ); + } + WRITE_CODE( sei.m_cpbDelayOffset, bp.m_initialCpbRemovalDelayLength, "cpb_delay_offset" ); + WRITE_CODE( sei.m_dpbDelayOffset, bp.m_initialCpbRemovalDelayLength, "dpb_delay_offset" ); + } } - - if( hrd->getCpbDpbDelaysPresentFlag() ) + for( int i = temporalId; i < bp.m_bpMaxSubLayers - 1; i ++ ) { - WRITE_CODE( sei.m_auCpbRemovalDelay - 1, ( hrd->getCpbRemovalDelayLengthMinus1() + 1 ), "au_cpb_removal_delay_minus1" ); - WRITE_CODE( sei.m_picDpbOutputDelay, ( hrd->getDpbOutputDelayLengthMinus1() + 1 ), "pic_dpb_output_delay" ); - if(hrd->getSubPicCpbParamsPresentFlag()) + WRITE_FLAG( sei.m_ptSubLayerDelaysPresentFlag[i], "pt_sub_layer_delays_present_flag[i]" ); + if( sei.m_ptSubLayerDelaysPresentFlag[i] ) { - WRITE_CODE(sei.m_picDpbOutputDuDelay, hrd->getDpbOutputDelayDuLengthMinus1()+1, "pic_dpb_output_du_delay" ); + if (bp.m_cpbRemovalDelayDeltasPresentFlag) + { + WRITE_FLAG(sei.m_cpbRemovalDelayDeltaEnabledFlag[i], "cpb_removal_delay_delta_enabled_flag[i]"); + } + if( sei.m_cpbRemovalDelayDeltaEnabledFlag[i] ) + { + WRITE_CODE( sei.m_cpbRemovalDelayDeltaIdx[i], ceilLog2(bp.m_numCpbRemovalDelayDeltas), "cpb_removal_delay_delta_idx[i]" ); + } + else + { + WRITE_CODE( sei.m_auCpbRemovalDelay[i] - 1, bp.m_cpbRemovalDelayLength, "cpb_removal_delay_minus1[i]" ); + } } - if( hrd->getSubPicCpbParamsPresentFlag() && hrd->getSubPicCpbParamsInPicTimingSEIFlag() ) + } + WRITE_CODE( sei.m_picDpbOutputDelay, bp.m_dpbOutputDelayLength, "dpb_output_delay" ); + if( bp.m_bpDecodingUnitHrdParamsPresentFlag ) + { + WRITE_CODE( sei.m_picDpbOutputDuDelay, bp.m_dpbOutputDelayDuLength, "pic_dpb_output_du_delay" ); + } + if( bp.m_bpDecodingUnitHrdParamsPresentFlag && bp.m_decodingUnitCpbParamsInPicTimingSeiFlag ) + { + WRITE_UVLC( sei.m_numDecodingUnitsMinus1, "num_decoding_units_minus1" ); + WRITE_FLAG( sei.m_duCommonCpbRemovalDelayFlag, "du_commmon_cpb_removal_delay_flag" ); + if( sei.m_duCommonCpbRemovalDelayFlag ) { - WRITE_UVLC( sei.m_numDecodingUnitsMinus1, "num_decoding_units_minus1" ); - WRITE_FLAG( sei.m_duCommonCpbRemovalDelayFlag, "du_common_cpb_removal_delay_flag" ); - if( sei.m_duCommonCpbRemovalDelayFlag ) + for( int i = temporalId; i < bp.m_bpMaxSubLayers - 1; i ++ ) { - WRITE_CODE( sei.m_duCommonCpbRemovalDelayMinus1, ( hrd->getDuCpbRemovalDelayLengthMinus1() + 1 ), "du_common_cpb_removal_delay_minus1" ); + if( sei.m_ptSubLayerDelaysPresentFlag[i] ) + WRITE_CODE( sei.m_duCommonCpbRemovalDelayMinus1[i], bp.m_duCpbRemovalDelayIncrementLength, "du_common_cpb_removal_delay_increment_minus1[i]" ); } - for( i = 0; i <= sei.m_numDecodingUnitsMinus1; i ++ ) + } + for( int i = 0; i <= sei.m_numDecodingUnitsMinus1; i ++ ) + { + WRITE_UVLC( sei.m_numNalusInDuMinus1[i], "num_nalus_in_du_minus1[i]" ); + if( !sei.m_duCommonCpbRemovalDelayFlag && i < sei.m_numDecodingUnitsMinus1 ) { - WRITE_UVLC( sei.m_numNalusInDuMinus1[ i ], "num_nalus_in_du_minus1"); - if( ( !sei.m_duCommonCpbRemovalDelayFlag ) && ( i < sei.m_numDecodingUnitsMinus1 ) ) + for( int j = temporalId; j < bp.m_bpMaxSubLayers - 1; j ++ ) { - WRITE_CODE( sei.m_duCpbRemovalDelayMinus1[ i ], ( hrd->getDuCpbRemovalDelayLengthMinus1() + 1 ), "du_cpb_removal_delay_minus1" ); + if( sei.m_ptSubLayerDelaysPresentFlag[j] ) + WRITE_CODE( sei.m_duCpbRemovalDelayMinus1[i * bp.m_bpMaxSubLayers + j], bp.m_duCpbRemovalDelayIncrementLength, "du_cpb_removal_delay_increment_minus1[i][j]" ); } } } } } -void SEIWriter::xWriteSEIRecoveryPoint(const SEIRecoveryPoint& sei) + +void SEIWriter::xWriteSEIFrameFieldInfo(const SEIFrameFieldInfo& sei) { - WRITE_SVLC( sei.m_recoveryPocCnt, "recovery_poc_cnt" ); - WRITE_FLAG( sei.m_exactMatchingFlag, "exact_matching_flag" ); - WRITE_FLAG( sei.m_brokenLinkFlag, "broken_link_flag" ); + WRITE_FLAG( sei.m_fieldPicFlag ? 1 : 0, "field_pic_flag" ); + if (sei.m_fieldPicFlag) + { + WRITE_FLAG( sei.m_bottomFieldFlag ? 1 : 0, "bottom_field_flag" ); + WRITE_FLAG( sei.m_pairingIndicatedFlag ? 1 : 0, "pairing_indicated_flag" ); + if (sei.m_pairingIndicatedFlag) + { + WRITE_FLAG( sei.m_pairedWithNextFieldFlag ? 1 : 0, "paired_with_next_field_flag" ); + } + } + else + { + WRITE_FLAG( sei.m_displayFieldsFromFrameFlag ? 1 : 0, "display_fields_from_frame_flag" ); + if (sei.m_displayFieldsFromFrameFlag) + { + WRITE_FLAG( sei.m_topFieldFirstFlag ? 1 : 0, "display_fields_from_frame_flag" ); + } + WRITE_UVLC( sei.m_displayElementalPeriodsMinus1, "display_elemental_periods_minus1" ); + } + WRITE_CODE( sei.m_sourceScanType, 2, "source_scan_type" ); + WRITE_FLAG( sei.m_duplicateFlag ? 1 : 0, "duplicate_flag" ); } + +void SEIWriter::xWriteSEIDependentRAPIndication(const SEIDependentRAPIndication& /*sei*/) +{ + // intentionally empty +} + void SEIWriter::xWriteSEIFramePacking(const SEIFramePacking& sei) { WRITE_UVLC( sei.m_arrangementId, "frame_packing_arrangement_id" ); @@ -377,458 +466,322 @@ void SEIWriter::xWriteSEIFramePacking(const SEIFramePacking& sei) WRITE_FLAG( sei.m_upsampledAspectRatio, "upsampled_aspect_ratio" ); } -void SEIWriter::xWriteSEISegmentedRectFramePacking(const SEISegmentedRectFramePacking& sei) -{ - WRITE_FLAG( sei.m_arrangementCancelFlag, "segmented_rect_frame_packing_arrangement_cancel_flag" ); - if( sei.m_arrangementCancelFlag == 0 ) - { - WRITE_CODE( sei.m_contentInterpretationType, 2, "segmented_rect_content_interpretation_type" ); - WRITE_FLAG( sei.m_arrangementPersistenceFlag, "segmented_rect_frame_packing_arrangement_persistence" ); - } -} -void SEIWriter::xWriteSEIToneMappingInfo(const SEIToneMappingInfo& sei) +void SEIWriter::xWriteSEIMasteringDisplayColourVolume(const SEIMasteringDisplayColourVolume& sei) { - int i; - WRITE_UVLC( sei.m_toneMapId, "tone_map_id" ); - WRITE_FLAG( sei.m_toneMapCancelFlag, "tone_map_cancel_flag" ); - if( !sei.m_toneMapCancelFlag ) - { - WRITE_FLAG( sei.m_toneMapPersistenceFlag, "tone_map_persistence_flag" ); - WRITE_CODE( sei.m_codedDataBitDepth, 8, "coded_data_bit_depth" ); - WRITE_CODE( sei.m_targetBitDepth, 8, "target_bit_depth" ); - WRITE_UVLC( sei.m_modelId, "model_id" ); - switch(sei.m_modelId) - { - case 0: - { - WRITE_CODE( sei.m_minValue, 32, "min_value" ); - WRITE_CODE( sei.m_maxValue, 32, "max_value" ); - break; - } - case 1: - { - WRITE_CODE( sei.m_sigmoidMidpoint, 32, "sigmoid_midpoint" ); - WRITE_CODE( sei.m_sigmoidWidth, 32, "sigmoid_width" ); - break; - } - case 2: - { - uint32_t num = 1u << sei.m_targetBitDepth; - for(i = 0; i < num; i++) - { - WRITE_CODE( sei.m_startOfCodedInterval[i], (( sei.m_codedDataBitDepth + 7 ) >> 3 ) << 3, "start_of_coded_interval" ); - } - break; - } - case 3: - { - WRITE_CODE( sei.m_numPivots, 16, "num_pivots" ); - for(i = 0; i < sei.m_numPivots; i++ ) - { - WRITE_CODE( sei.m_codedPivotValue[i], (( sei.m_codedDataBitDepth + 7 ) >> 3 ) << 3, "coded_pivot_value" ); - WRITE_CODE( sei.m_targetPivotValue[i], (( sei.m_targetBitDepth + 7 ) >> 3 ) << 3, "target_pivot_value"); - } - break; - } - case 4: - { - WRITE_CODE( sei.m_cameraIsoSpeedIdc, 8, "camera_iso_speed_idc" ); - if( sei.m_cameraIsoSpeedIdc == 255) //Extended_ISO - { - WRITE_CODE( sei.m_cameraIsoSpeedValue, 32, "camera_iso_speed_value" ); - } - WRITE_CODE( sei.m_exposureIndexIdc, 8, "exposure_index_idc" ); - if( sei.m_exposureIndexIdc == 255) //Extended_ISO - { - WRITE_CODE( sei.m_exposureIndexValue, 32, "exposure_index_value" ); - } - WRITE_FLAG( sei.m_exposureCompensationValueSignFlag, "exposure_compensation_value_sign_flag" ); - WRITE_CODE( sei.m_exposureCompensationValueNumerator, 16, "exposure_compensation_value_numerator" ); - WRITE_CODE( sei.m_exposureCompensationValueDenomIdc, 16, "exposure_compensation_value_denom_idc" ); - WRITE_CODE( sei.m_refScreenLuminanceWhite, 32, "ref_screen_luminance_white" ); - WRITE_CODE( sei.m_extendedRangeWhiteLevel, 32, "extended_range_white_level" ); - WRITE_CODE( sei.m_nominalBlackLevelLumaCodeValue, 16, "nominal_black_level_luma_code_value" ); - WRITE_CODE( sei.m_nominalWhiteLevelLumaCodeValue, 16, "nominal_white_level_luma_code_value" ); - WRITE_CODE( sei.m_extendedWhiteLevelLumaCodeValue, 16, "extended_white_level_luma_code_value" ); - break; - } - default: - { - THROW("Undefined SEIToneMapModelId"); - break; - } - }//switch m_modelId - }//if(!sei.m_toneMapCancelFlag) + WRITE_CODE( sei.values.primaries[0][0], 16, "display_primaries_x[0]" ); + WRITE_CODE( sei.values.primaries[0][1], 16, "display_primaries_y[0]" ); + + WRITE_CODE( sei.values.primaries[1][0], 16, "display_primaries_x[1]" ); + WRITE_CODE( sei.values.primaries[1][1], 16, "display_primaries_y[1]" ); + + WRITE_CODE( sei.values.primaries[2][0], 16, "display_primaries_x[2]" ); + WRITE_CODE( sei.values.primaries[2][1], 16, "display_primaries_y[2]" ); + + WRITE_CODE( sei.values.whitePoint[0], 16, "white_point_x" ); + WRITE_CODE( sei.values.whitePoint[1], 16, "white_point_y" ); + + WRITE_CODE( sei.values.maxLuminance, 32, "max_display_mastering_luminance" ); + WRITE_CODE( sei.values.minLuminance, 32, "min_display_mastering_luminance" ); } -void SEIWriter::xWriteSEIDisplayOrientation(const SEIDisplayOrientation &sei) +void SEIWriter::xWriteByteAlign() { - WRITE_FLAG( sei.cancelFlag, "display_orientation_cancel_flag" ); - if( !sei.cancelFlag ) + if( m_pcBitIf->getNumberOfWrittenBits() % 8 != 0) { - WRITE_FLAG( sei.horFlip, "hor_flip" ); - WRITE_FLAG( sei.verFlip, "ver_flip" ); - WRITE_CODE( sei.anticlockwiseRotation, 16, "anticlockwise_rotation" ); - WRITE_FLAG( sei.persistenceFlag, "display_orientation_persistence_flag" ); + WRITE_FLAG( 1, "payload_bit_equal_to_one" ); + while( m_pcBitIf->getNumberOfWrittenBits() % 8 != 0 ) + { + WRITE_FLAG( 0, "payload_bit_equal_to_zero" ); + } } } -void SEIWriter::xWriteSEITemporalLevel0Index(const SEITemporalLevel0Index &sei) +#if U0033_ALTERNATIVE_TRANSFER_CHARACTERISTICS_SEI +void SEIWriter::xWriteSEIAlternativeTransferCharacteristics(const SEIAlternativeTransferCharacteristics& sei) { - WRITE_CODE( sei.tl0Idx, 8 , "tl0_idx" ); - WRITE_CODE( sei.rapIdx, 8 , "rap_idx" ); + WRITE_CODE(sei.m_preferredTransferCharacteristics, 8, "preferred_transfer_characteristics"); } +#endif -void SEIWriter::xWriteSEIGradualDecodingRefreshInfo(const SEIGradualDecodingRefreshInfo &sei) +void SEIWriter::xWriteSEIEquirectangularProjection(const SEIEquirectangularProjection &sei) { - WRITE_FLAG( sei.m_gdrForegroundFlag, "gdr_foreground_flag"); + WRITE_FLAG( sei.m_erpCancelFlag, "erp_cancel_flag" ); + if( !sei.m_erpCancelFlag ) + { + WRITE_FLAG( sei.m_erpPersistenceFlag, "erp_persistence_flag" ); + WRITE_FLAG( sei.m_erpGuardBandFlag, "erp_guard_band_flag" ); + WRITE_CODE( 0, 2, "erp_reserved_zero_2bits" ); + if ( sei.m_erpGuardBandFlag == 1) + { + WRITE_CODE( sei.m_erpGuardBandType, 3, "erp_guard_band_type" ); + WRITE_CODE( sei.m_erpLeftGuardBandWidth, 8, "erp_left_guard_band_width" ); + WRITE_CODE( sei.m_erpRightGuardBandWidth, 8, "erp_right_guard_band_width" ); + } + } } -void SEIWriter::xWriteSEINoDisplay(const SEINoDisplay& /*sei*/) +void SEIWriter::xWriteSEISphereRotation(const SEISphereRotation &sei) { + WRITE_FLAG( sei.m_sphereRotationCancelFlag, "sphere_rotation_cancel_flag" ); + if( !sei.m_sphereRotationCancelFlag ) + { + WRITE_FLAG( sei.m_sphereRotationPersistenceFlag, "sphere_rotation_persistence_flag" ); + WRITE_CODE( 0, 6, "sphere_rotation_reserved_zero_6bits" ); + WRITE_SCODE(sei.m_sphereRotationYaw, 32, "sphere_rotation_yaw" ); + WRITE_SCODE(sei.m_sphereRotationPitch, 32, "sphere_rotation_pitch" ); + WRITE_SCODE(sei.m_sphereRotationRoll, 32, "sphere_rotation_roll" ); + } } -void SEIWriter::xWriteSEISOPDescription(const SEISOPDescription& sei) +void SEIWriter::xWriteSEIOmniViewport(const SEIOmniViewport &sei) { - WRITE_UVLC( sei.m_sopSeqParameterSetId, "sop_seq_parameter_set_id" ); - WRITE_UVLC( sei.m_numPicsInSopMinus1, "num_pics_in_sop_minus1" ); - for (uint32_t i = 0; i <= sei.m_numPicsInSopMinus1; i++) + WRITE_CODE( sei.m_omniViewportId, 10, "omni_viewport_id" ); + WRITE_FLAG( sei.m_omniViewportCancelFlag, "omni_viewport_cancel_flag" ); + if ( !sei.m_omniViewportCancelFlag ) { - WRITE_CODE( sei.m_sopDescVclNaluType[i], 6, "sop_desc_vcl_nalu_type" ); - WRITE_CODE( sei.m_sopDescTemporalId[i], 3, "sop_desc_temporal_id" ); - if (sei.m_sopDescVclNaluType[i] != NAL_UNIT_CODED_SLICE_IDR_W_RADL && sei.m_sopDescVclNaluType[i] != NAL_UNIT_CODED_SLICE_IDR_N_LP) - { - WRITE_UVLC( sei.m_sopDescStRpsIdx[i], "sop_desc_st_rps_idx" ); - } - if (i > 0) + WRITE_FLAG( sei.m_omniViewportPersistenceFlag, "omni_viewport_persistence_flag" ); + const uint32_t numRegions = (uint32_t) sei.m_omniViewportRegions.size(); + WRITE_CODE( numRegions - 1, 4, "omni_viewport_cnt_minus1" ); + for(uint32_t region=0; region<numRegions; region++) { - WRITE_SVLC( sei.m_sopDescPocDelta[i], "sop_desc_poc_delta" ); + const SEIOmniViewport::OmniViewport &viewport=sei.m_omniViewportRegions[region]; + WRITE_SCODE( viewport.azimuthCentre, 32, "omni_viewport_azimuth_centre" ); + WRITE_SCODE( viewport.elevationCentre, 32, "omni_viewport_elevation_centre" ); + WRITE_SCODE( viewport.tiltCentre, 32, "omni_viewport_tilt_center" ); + WRITE_CODE( viewport.horRange, 32, "omni_viewport_hor_range[i]" ); + WRITE_CODE( viewport.verRange, 32, "omni_viewport_ver_range[i]" ); } } } -void SEIWriter::xWriteSEIScalableNesting(OutputBitstream& bs, const SEIScalableNesting& sei, const SPS *sps) +void SEIWriter::xWriteSEIRegionWisePacking(const SEIRegionWisePacking &sei) { - WRITE_FLAG( sei.m_bitStreamSubsetFlag, "bitstream_subset_flag" ); - WRITE_FLAG( sei.m_nestingOpFlag, "nesting_op_flag " ); - if (sei.m_nestingOpFlag) - { - WRITE_FLAG( sei.m_defaultOpFlag, "default_op_flag" ); - WRITE_UVLC( sei.m_nestingNumOpsMinus1, "nesting_num_ops_minus1" ); - for (uint32_t i = (sei.m_defaultOpFlag ? 1 : 0); i <= sei.m_nestingNumOpsMinus1; i++) - { - WRITE_CODE( sei.m_nestingMaxTemporalIdPlus1[i], 3, "nesting_max_temporal_id_plus1" ); - WRITE_UVLC( sei.m_nestingOpIdx[i], "nesting_op_idx" ); - } - } - else + WRITE_FLAG( sei.m_rwpCancelFlag, "rwp_cancel_flag" ); + if(!sei.m_rwpCancelFlag) { - WRITE_FLAG( sei.m_allLayersFlag, "all_layers_flag" ); - if (!sei.m_allLayersFlag) - { - WRITE_CODE( sei.m_nestingNoOpMaxTemporalIdPlus1, 3, "nesting_no_op_max_temporal_id_plus1" ); - WRITE_UVLC( sei.m_nestingNumLayersMinus1, "nesting_num_layers" ); - for (uint32_t i = 0; i <= sei.m_nestingNumLayersMinus1; i++) + WRITE_FLAG( sei.m_rwpPersistenceFlag, "rwp_persistence_flag" ); + WRITE_FLAG( sei.m_constituentPictureMatchingFlag, "constituent_picture_matching_flag" ); + WRITE_CODE( 0, 5, "rwp_reserved_zero_5bits" ); + WRITE_CODE( (uint32_t)sei.m_numPackedRegions, 8, "num_packed_regions" ); + WRITE_CODE( (uint32_t)sei.m_projPictureWidth, 32, "proj_picture_width" ); + WRITE_CODE( (uint32_t)sei.m_projPictureHeight, 32, "proj_picture_height" ); + WRITE_CODE( (uint32_t)sei.m_packedPictureWidth, 16, "packed_picture_width" ); + WRITE_CODE( (uint32_t)sei.m_packedPictureHeight, 16, "packed_picture_height" ); + for( int i=0; i < sei.m_numPackedRegions; i++ ) + { + WRITE_CODE( 0, 4, "rwp_reserved_zero_4bits" ); + WRITE_CODE( (uint32_t)sei.m_rwpTransformType[i], 3, "rwp_tTransform_type" ); + WRITE_FLAG( sei.m_rwpGuardBandFlag[i], "rwp_guard_band_flag" ); + WRITE_CODE( (uint32_t)sei.m_projRegionWidth[i], 32, "proj_region_width" ); + WRITE_CODE( (uint32_t)sei.m_projRegionHeight[i], 32, "proj_region_height" ); + WRITE_CODE( (uint32_t)sei.m_rwpProjRegionTop[i], 32, "rwp_proj_regionTop" ); + WRITE_CODE( (uint32_t)sei.m_projRegionLeft[i], 32, "proj_region_left" ); + WRITE_CODE( (uint32_t)sei.m_packedRegionWidth[i], 16, "packed_region_width" ); + WRITE_CODE( (uint32_t)sei.m_packedRegionHeight[i], 16, "packed_region_height" ); + WRITE_CODE( (uint32_t)sei.m_packedRegionTop[i], 16, "packed_region_top" ); + WRITE_CODE( (uint32_t)sei.m_packedRegionLeft[i], 16, "packed_region_left" ); + if( sei.m_rwpGuardBandFlag[i] ) { - WRITE_CODE( sei.m_nestingLayerId[i], 6, "nesting_layer_id" ); + WRITE_CODE( (uint32_t)sei.m_rwpLeftGuardBandWidth[i], 8, "rwp_left_guard_band_width"); + WRITE_CODE( (uint32_t)sei.m_rwpRightGuardBandWidth[i], 8, "rwp_right_guard_band_width"); + WRITE_CODE( (uint32_t)sei.m_rwpTopGuardBandHeight[i], 8, "rwp_top_guard_band_height"); + WRITE_CODE( (uint32_t)sei. m_rwpBottomGuardBandHeight[i], 8, "rwp_bottom_guard_band_height"); + WRITE_FLAG( sei.m_rwpGuardBandNotUsedForPredFlag[i], "rwp_guard_band_not_used_forPred_flag" ); + for( int j=0; j < 4; j++ ) + { + WRITE_CODE( (uint32_t)sei.m_rwpGuardBandType[i*4 + j], 3, "rwp_guard_band_type"); + } + WRITE_CODE( 0, 3, "rwp_guard_band_reserved_zero_3bits" ); } } } - - // byte alignment - while ( m_pcBitIf->getNumberOfWrittenBits() % 8 != 0 ) - { - WRITE_FLAG( 0, "nesting_zero_bit" ); - } - - // write nested SEI messages - writeSEImessages(bs, sei.m_nestedSEIs, sps, true); } -#if HEVC_TILES_WPP -void SEIWriter::xWriteSEITempMotionConstrainedTileSets(const SEITempMotionConstrainedTileSets& sei) +void SEIWriter::xWriteSEIGeneralizedCubemapProjection(const SEIGeneralizedCubemapProjection &sei) { - //uint32_t code; - WRITE_FLAG((sei.m_mc_all_tiles_exact_sample_value_match_flag ? 1 : 0), "mc_all_tiles_exact_sample_value_match_flag"); - WRITE_FLAG((sei.m_each_tile_one_tile_set_flag ? 1 : 0), "each_tile_one_tile_set_flag" ); - - if(!sei.m_each_tile_one_tile_set_flag) + WRITE_FLAG( sei.m_gcmpCancelFlag, "gcmp_cancel_flag" ); + if (!sei.m_gcmpCancelFlag) { - WRITE_FLAG((sei.m_limited_tile_set_display_flag ? 1 : 0), "limited_tile_set_display_flag"); - WRITE_UVLC((sei.getNumberOfTileSets() - 1), "num_sets_in_message_minus1" ); - - if(sei.getNumberOfTileSets() > 0) + WRITE_FLAG( sei.m_gcmpPersistenceFlag, "gcmp_persistence_flag" ); + WRITE_CODE( sei.m_gcmpPackingType, 3, "gcmp_packing_type" ); + WRITE_CODE( sei.m_gcmpMappingFunctionType, 2, "gcmp_mapping_function_type" ); + int numFace = sei.m_gcmpPackingType == 4 || sei.m_gcmpPackingType == 5 ? 5 : 6; + for (int i = 0; i < numFace; i++) { - for(int i = 0; i < sei.getNumberOfTileSets(); i++) + WRITE_CODE( sei.m_gcmpFaceIndex[i], 3, "gcmp_face_index" ); + WRITE_CODE( sei.m_gcmpFaceRotation[i], 2, "gcmp_face_rotation" ); + if (sei.m_gcmpMappingFunctionType == 2) { - WRITE_UVLC(sei.tileSetData(i).m_mcts_id, "mcts_id"); - - if(sei.m_limited_tile_set_display_flag) - { - WRITE_FLAG((sei.tileSetData(i).m_display_tile_set_flag ? 1 : 0), "display_tile_set_flag"); - } - - WRITE_UVLC((sei.tileSetData(i).getNumberOfTileRects() - 1), "num_tile_rects_in_set_minus1"); - - for(int j = 0; j < sei.tileSetData(i).getNumberOfTileRects(); j++) - { - WRITE_UVLC(sei.tileSetData(i).topLeftTileIndex (j), "top_left_tile_index"); - WRITE_UVLC(sei.tileSetData(i).bottomRightTileIndex(j), "bottom_right_tile_index"); - } - - if(!sei.m_mc_all_tiles_exact_sample_value_match_flag) - { - WRITE_FLAG((sei.tileSetData(i).m_exact_sample_value_match_flag ? 1 : 0), "exact_sample_value_match_flag"); - } - - WRITE_FLAG((sei.tileSetData(i).m_mcts_tier_level_idc_present_flag ? 1 : 0), "mcts_tier_level_idc_present_flag"); - - if(sei.tileSetData(i).m_mcts_tier_level_idc_present_flag) - { - WRITE_FLAG((sei.tileSetData(i).m_mcts_tier_flag ? 1 : 0), "mcts_tier_flag"); - WRITE_CODE( sei.tileSetData(i).m_mcts_level_idc, 8, "mcts_level_idc"); - } + WRITE_CODE( sei.m_gcmpFunctionCoeffU[i], 7, "gcmp_function_coeff_u" ); + WRITE_FLAG( sei.m_gcmpFunctionUAffectedByVFlag[i], "gcmp_function_u_affected_by_v_flag" ); + WRITE_CODE( sei.m_gcmpFunctionCoeffV[i], 7, "gcmp_function_coeff_v" ); + WRITE_FLAG( sei.m_gcmpFunctionVAffectedByUFlag[i], "gcmp_function_v_affected_by_u_flag" ); } } - } - else - { - WRITE_FLAG((sei.m_max_mcs_tier_level_idc_present_flag ? 1 : 0), "max_mcs_tier_level_idc_present_flag"); - - if(sei.m_max_mcs_tier_level_idc_present_flag) + WRITE_FLAG( sei.m_gcmpGuardBandFlag, "gcmp_guard_band_flag" ); + if (sei.m_gcmpGuardBandFlag) { - WRITE_FLAG((sei.m_max_mcts_tier_flag ? 1 : 0), "max_mcts_tier_flag"); - WRITE_CODE( sei.m_max_mcts_level_idc, 8, "max_mcts_level_idc"); + WRITE_FLAG( sei.m_gcmpGuardBandBoundaryType, "gcmp_guard_band_boundary_type" ); + WRITE_CODE( sei.m_gcmpGuardBandSamplesMinus1, 4, "gcmp_guard_band_samples_minus1" ); } } } -#endif -void SEIWriter::xWriteSEITimeCode(const SEITimeCode& sei) +void SEIWriter::xWriteSEISubpictureLevelInfo(const SEISubpicureLevelInfo &sei, const SPS* sps) { - WRITE_CODE(sei.numClockTs, 2, "num_clock_ts"); - for(int i = 0; i < sei.numClockTs; i++) + WRITE_CODE( (uint32_t)sei.m_sliSeqParameterSetId, 4, "sli_seq_parameter_set_id"); + CHECK(sei.m_numRefLevels < 1, "SEISubpicureLevelInfo: numRefLevels must be greater than zero"); + CHECK(sei.m_numRefLevels != (int)sei.m_refLevelIdc.size(), "SEISubpicureLevelInfo: numRefLevels must be equal to the number of levels"); + if (sei.m_explicitFractionPresentFlag) + { + CHECK(sei.m_numRefLevels != (int)sei.m_refLevelFraction.size(), "SEISubpicureLevelInfo: numRefLevels must be equal to the number of fractions"); + } + WRITE_CODE( (uint32_t)sei.m_numRefLevels - 1, 3, "num_ref_levels_minus1"); + WRITE_FLAG( sei.m_explicitFractionPresentFlag, "explicit_fraction_present_flag"); + + for (int i=0; i<sei.m_numRefLevels; i++) { - const SEITimeSet ¤tTimeSet = sei.timeSetArray[i]; - WRITE_FLAG(currentTimeSet.clockTimeStampFlag, "clock_time_stamp_flag"); - if(currentTimeSet.clockTimeStampFlag) + WRITE_CODE( (uint32_t)sei.m_refLevelIdc[i], 8, "ref_level_idc[i]"); + if (sei.m_explicitFractionPresentFlag) { - WRITE_FLAG(currentTimeSet.numUnitFieldBasedFlag, "units_field_based_flag"); - WRITE_CODE(currentTimeSet.countingType, 5, "counting_type"); - WRITE_FLAG(currentTimeSet.fullTimeStampFlag, "full_timestamp_flag"); - WRITE_FLAG(currentTimeSet.discontinuityFlag, "discontinuity_flag"); - WRITE_FLAG(currentTimeSet.cntDroppedFlag, "cnt_dropped_flag"); - WRITE_CODE(currentTimeSet.numberOfFrames, 9, "n_frames"); - if(currentTimeSet.fullTimeStampFlag) - { - WRITE_CODE(currentTimeSet.secondsValue, 6, "seconds_value"); - WRITE_CODE(currentTimeSet.minutesValue, 6, "minutes_value"); - WRITE_CODE(currentTimeSet.hoursValue, 5, "hours_value"); - } - else + CHECK(sps->getNumSubPics() != (int)sei.m_refLevelFraction[i].size(), "SEISubpicureLevelInfo: number of fractions differs from number of subpictures"); + for (int j = 0; j < sps->getNumSubPics(); j++) { - WRITE_FLAG(currentTimeSet.secondsFlag, "seconds_flag"); - if(currentTimeSet.secondsFlag) - { - WRITE_CODE(currentTimeSet.secondsValue, 6, "seconds_value"); - WRITE_FLAG(currentTimeSet.minutesFlag, "minutes_flag"); - if(currentTimeSet.minutesFlag) - { - WRITE_CODE(currentTimeSet.minutesValue, 6, "minutes_value"); - WRITE_FLAG(currentTimeSet.hoursFlag, "hours_flag"); - if(currentTimeSet.hoursFlag) - { - WRITE_CODE(currentTimeSet.hoursValue, 5, "hours_value"); - } - } - } - } - WRITE_CODE(currentTimeSet.timeOffsetLength, 5, "time_offset_length"); - if(currentTimeSet.timeOffsetLength > 0) - { - if(currentTimeSet.timeOffsetValue >= 0) - { - WRITE_CODE((uint32_t)currentTimeSet.timeOffsetValue, currentTimeSet.timeOffsetLength, "time_offset_value"); - } - else - { - // Two's complement conversion - uint32_t offsetValue = ~(currentTimeSet.timeOffsetValue) + 1; - offsetValue |= (1 << (currentTimeSet.timeOffsetLength-1)); - WRITE_CODE(offsetValue, currentTimeSet.timeOffsetLength, "time_offset_value"); - } + WRITE_CODE( (uint32_t)sei.m_refLevelFraction[i][j], 8, "ref_level_fraction_minus1[i][j]"); } } } } -void SEIWriter::xWriteSEIChromaResamplingFilterHint(const SEIChromaResamplingFilterHint &sei) +void SEIWriter::xWriteSEISampleAspectRatioInfo(const SEISampleAspectRatioInfo &sei) { - WRITE_CODE(sei.m_verChromaFilterIdc, 8, "ver_chroma_filter_idc"); - WRITE_CODE(sei.m_horChromaFilterIdc, 8, "hor_chroma_filter_idc"); - WRITE_FLAG(sei.m_verFilteringFieldProcessingFlag, "ver_filtering_field_processing_flag"); - if(sei.m_verChromaFilterIdc == 1 || sei.m_horChromaFilterIdc == 1) + WRITE_FLAG( sei.m_sariCancelFlag, "sari_cancel_flag" ); + if(!sei.m_sariCancelFlag) { - WRITE_UVLC(sei.m_targetFormatIdc, "target_format_idc"); - if(sei.m_verChromaFilterIdc == 1) - { - const int numVerticalFilter = (int)sei.m_verFilterCoeff.size(); - WRITE_UVLC(numVerticalFilter, "num_vertical_filters"); - if(numVerticalFilter > 0) - { - for(int i = 0; i < numVerticalFilter; i ++) - { - const int verTapLengthMinus1 = (int) sei.m_verFilterCoeff[i].size() - 1; - WRITE_UVLC(verTapLengthMinus1, "ver_tap_length_minus_1"); - for(int j = 0; j < (verTapLengthMinus1 + 1); j ++) - { - WRITE_SVLC(sei.m_verFilterCoeff[i][j], "ver_filter_coeff"); - } - } - } - } - if(sei.m_horChromaFilterIdc == 1) + WRITE_FLAG( sei.m_sariPersistenceFlag, "sari_persistence_flag" ); + WRITE_CODE( (uint32_t)sei.m_sariAspectRatioIdc, 8, "sari_aspect_ratio_idc"); + if (sei.m_sariAspectRatioIdc == 255) { - const int numHorizontalFilter = (int) sei.m_horFilterCoeff.size(); - WRITE_UVLC(numHorizontalFilter, "num_horizontal_filters"); - if(numHorizontalFilter > 0) - { - for(int i = 0; i < numHorizontalFilter; i ++) - { - const int horTapLengthMinus1 = (int) sei.m_horFilterCoeff[i].size() - 1; - WRITE_UVLC(horTapLengthMinus1, "hor_tap_length_minus_1"); - for(int j = 0; j < (horTapLengthMinus1 + 1); j ++) - { - WRITE_SVLC(sei.m_horFilterCoeff[i][j], "hor_filter_coeff"); - } - } - } + WRITE_CODE( (uint32_t)sei.m_sariSarWidth, 16, "sari_sar_width"); + WRITE_CODE( (uint32_t)sei.m_sariSarHeight, 16, "sari_sar_height"); } } } -void SEIWriter::xWriteSEIKneeFunctionInfo(const SEIKneeFunctionInfo &sei) +void SEIWriter::xWriteSEIUserDataRegistered(const SEIUserDataRegistered &sei) { - WRITE_UVLC( sei.m_kneeId, "knee_function_id" ); - WRITE_FLAG( sei.m_kneeCancelFlag, "knee_function_cancel_flag" ); - if ( !sei.m_kneeCancelFlag ) - { - WRITE_FLAG( sei.m_kneePersistenceFlag, "knee_function_persistence_flag" ); - WRITE_CODE( (uint32_t)sei.m_kneeInputDrange , 32, "input_d_range" ); - WRITE_CODE( (uint32_t)sei.m_kneeInputDispLuminance, 32, "input_disp_luminance" ); - WRITE_CODE( (uint32_t)sei.m_kneeOutputDrange, 32, "output_d_range" ); - WRITE_CODE( (uint32_t)sei.m_kneeOutputDispLuminance, 32, "output_disp_luminance" ); - WRITE_UVLC( sei.m_kneeNumKneePointsMinus1, "num_knee_points_minus1" ); - for(int i = 0; i <= sei.m_kneeNumKneePointsMinus1; i++ ) - { - WRITE_CODE( (uint32_t)sei.m_kneeInputKneePoint[i], 10,"input_knee_point" ); - WRITE_CODE( (uint32_t)sei.m_kneeOutputKneePoint[i], 10, "output_knee_point" ); - } + WRITE_CODE((sei.m_ituCountryCode>255) ? 0xff : sei.m_ituCountryCode, 8, "itu_t_t35_country_code"); + if (sei.m_ituCountryCode >= 255) + { + assert(sei.m_ituCountryCode < 255 + 256); + WRITE_CODE(sei.m_ituCountryCode - 255, 8, "itu_t_t35_country_code_extension_byte"); + } + for (uint32_t i = 0; i<sei.m_userData.size(); i++) + { + WRITE_CODE(sei.m_userData[i], 8, "itu_t_t35_payload_byte"); } } -void SEIWriter::xWriteSEIColourRemappingInfo(const SEIColourRemappingInfo& sei) +void SEIWriter::xWriteSEIFilmGrainCharacteristics(const SEIFilmGrainCharacteristics &sei) { - WRITE_UVLC( sei.m_colourRemapId, "colour_remap_id" ); - WRITE_FLAG( sei.m_colourRemapCancelFlag, "colour_remap_cancel_flag" ); - if( !sei.m_colourRemapCancelFlag ) + WRITE_FLAG(sei.m_filmGrainCharacteristicsCancelFlag, "film_grain_characteristics_cancel_flag"); + if (!sei.m_filmGrainCharacteristicsCancelFlag) { - WRITE_FLAG( sei.m_colourRemapPersistenceFlag, "colour_remap_persistence_flag" ); - WRITE_FLAG( sei.m_colourRemapVideoSignalInfoPresentFlag, "colour_remap_video_signal_info_present_flag" ); - if ( sei.m_colourRemapVideoSignalInfoPresentFlag ) - { - WRITE_FLAG( sei.m_colourRemapFullRangeFlag, "colour_remap_full_range_flag" ); - WRITE_CODE( sei.m_colourRemapPrimaries, 8, "colour_remap_primaries" ); - WRITE_CODE( sei.m_colourRemapTransferFunction, 8, "colour_remap_transfer_function" ); - WRITE_CODE( sei.m_colourRemapMatrixCoefficients, 8, "colour_remap_matrix_coefficients" ); - } - WRITE_CODE( sei.m_colourRemapInputBitDepth, 8, "colour_remap_input_bit_depth" ); - WRITE_CODE( sei.m_colourRemapBitDepth, 8, "colour_remap_bit_depth" ); - for( int c=0 ; c<3 ; c++ ) + WRITE_CODE(sei.m_filmGrainModelId, 2, "film_grain_model_id"); + WRITE_FLAG(sei.m_separateColourDescriptionPresentFlag, "separate_colour_description_present_flag"); + if (sei.m_separateColourDescriptionPresentFlag) { - WRITE_CODE( sei.m_preLutNumValMinus1[c], 8, "pre_lut_num_val_minus1[c]" ); - if( sei.m_preLutNumValMinus1[c]>0 ) - { - for( int i=0 ; i<=sei.m_preLutNumValMinus1[c] ; i++ ) - { - WRITE_CODE( sei.m_preLut[c][i].codedValue, (( sei.m_colourRemapInputBitDepth + 7 ) >> 3 ) << 3, "pre_lut_coded_value[c][i]" ); - WRITE_CODE( sei.m_preLut[c][i].targetValue, (( sei.m_colourRemapBitDepth + 7 ) >> 3 ) << 3, "pre_lut_target_value[c][i]" ); - } - } + WRITE_CODE(sei.m_filmGrainBitDepthLumaMinus8, 3, "film_grain_bit_depth_luma_minus8"); + WRITE_CODE(sei.m_filmGrainBitDepthChromaMinus8, 3, "film_grain_bit_depth_chroma_minus8"); + WRITE_FLAG(sei.m_filmGrainFullRangeFlag, "film_grain_full_range_flag"); + WRITE_CODE(sei.m_filmGrainColourPrimaries, 8, "film_grain_colour_primaries"); + WRITE_CODE(sei.m_filmGrainTransferCharacteristics, 8, "film_grain_transfer_characteristics"); + WRITE_CODE(sei.m_filmGrainMatrixCoeffs, 8, "film_grain_matrix_coeffs"); } - WRITE_FLAG( sei.m_colourRemapMatrixPresentFlag, "colour_remap_matrix_present_flag" ); - if( sei.m_colourRemapMatrixPresentFlag ) + WRITE_CODE(sei.m_blendingModeId, 2, "blending_mode_id"); + WRITE_CODE(sei.m_log2ScaleFactor, 4, "log2_scale_factor"); + for (int c = 0; c<3; c++) { - WRITE_CODE( sei.m_log2MatrixDenom, 4, "log2_matrix_denom" ); - for( int c=0 ; c<3 ; c++ ) - { - for( int i=0 ; i<3 ; i++ ) - { - WRITE_SVLC( sei.m_colourRemapCoeffs[c][i], "colour_remap_coeffs[c][i]" ); - } - } + const SEIFilmGrainCharacteristics::CompModel &cm = sei.m_compModel[c]; + const uint32_t numIntensityIntervals = (uint32_t)cm.intensityValues.size(); + const uint32_t numModelValues = cm.numModelValues; + WRITE_FLAG(sei.m_compModel[c].presentFlag && numIntensityIntervals>0 && numModelValues>0, "comp_model_present_flag[c]"); } - - for( int c=0 ; c<3 ; c++ ) + for (uint32_t c = 0; c<3; c++) { - WRITE_CODE( sei.m_postLutNumValMinus1[c], 8, "m_postLutNumValMinus1[c]" ); - if( sei.m_postLutNumValMinus1[c]>0 ) + const SEIFilmGrainCharacteristics::CompModel &cm = sei.m_compModel[c]; + const uint32_t numIntensityIntervals = (uint32_t)cm.intensityValues.size(); + const uint32_t numModelValues = cm.numModelValues; + if (cm.presentFlag && numIntensityIntervals>0 && numModelValues>0) { - for( int i=0 ; i<=sei.m_postLutNumValMinus1[c] ; i++ ) + assert(numIntensityIntervals <= 256); + assert(numModelValues <= 256); + WRITE_CODE(numIntensityIntervals - 1, 8, "num_intensity_intervals_minus1[c]"); + WRITE_CODE(numModelValues - 1, 8, "num_model_values_minus1[c]"); + for (uint32_t interval = 0; interval<numIntensityIntervals; interval++) { - WRITE_CODE( sei.m_postLut[c][i].codedValue, (( sei.m_colourRemapBitDepth + 7 ) >> 3 ) << 3, "post_lut_coded_value[c][i]" ); - WRITE_CODE( sei.m_postLut[c][i].targetValue, (( sei.m_colourRemapBitDepth + 7 ) >> 3 ) << 3, "post_lut_target_value[c][i]" ); + const SEIFilmGrainCharacteristics::CompModelIntensityValues &cmiv = cm.intensityValues[interval]; + WRITE_CODE(cmiv.intensityIntervalLowerBound, 8, "intensity_interval_lower_bound[c][i]"); + WRITE_CODE(cmiv.intensityIntervalUpperBound, 8, "intensity_interval_upper_bound[c][i]"); + assert(cmiv.compModelValue.size() == numModelValues); + for (uint32_t j = 0; j<cm.numModelValues; j++) + { + WRITE_SVLC(cmiv.compModelValue[j], "comp_model_value[c][i]"); + } } } - } - } + } // for c + WRITE_FLAG(sei.m_filmGrainCharacteristicsPersistenceFlag, "film_grain_characteristics_persistence_flag"); + } // cancel flag } -void SEIWriter::xWriteSEIMasteringDisplayColourVolume(const SEIMasteringDisplayColourVolume& sei) +void SEIWriter::xWriteSEIContentLightLevelInfo(const SEIContentLightLevelInfo& sei) { - WRITE_CODE( sei.values.primaries[0][0], 16, "display_primaries_x[0]" ); - WRITE_CODE( sei.values.primaries[0][1], 16, "display_primaries_y[0]" ); - - WRITE_CODE( sei.values.primaries[1][0], 16, "display_primaries_x[1]" ); - WRITE_CODE( sei.values.primaries[1][1], 16, "display_primaries_y[1]" ); - - WRITE_CODE( sei.values.primaries[2][0], 16, "display_primaries_x[2]" ); - WRITE_CODE( sei.values.primaries[2][1], 16, "display_primaries_y[2]" ); - - WRITE_CODE( sei.values.whitePoint[0], 16, "white_point_x" ); - WRITE_CODE( sei.values.whitePoint[1], 16, "white_point_y" ); - - WRITE_CODE( sei.values.maxLuminance, 32, "max_display_mastering_luminance" ); - WRITE_CODE( sei.values.minLuminance, 32, "min_display_mastering_luminance" ); + WRITE_CODE( sei.m_maxContentLightLevel, 16, "max_content_light_level" ); + WRITE_CODE( sei.m_maxPicAverageLightLevel, 16, "max_pic_average_light_level" ); } - -void SEIWriter::xWriteByteAlign() +void SEIWriter::xWriteSEIAmbientViewingEnvironment(const SEIAmbientViewingEnvironment& sei) { - if( m_pcBitIf->getNumberOfWrittenBits() % 8 != 0) - { - WRITE_FLAG( 1, "payload_bit_equal_to_one" ); - while( m_pcBitIf->getNumberOfWrittenBits() % 8 != 0 ) - { - WRITE_FLAG( 0, "payload_bit_equal_to_zero" ); - } - } + WRITE_CODE(sei.m_ambientIlluminance, 32, "ambient_illuminance" ); + WRITE_CODE(sei.m_ambientLightX, 16, "ambient_light_x" ); + WRITE_CODE(sei.m_ambientLightY, 16, "ambient_light_y" ); } -#if U0033_ALTERNATIVE_TRANSFER_CHARACTERISTICS_SEI -void SEIWriter::xWriteSEIAlternativeTransferCharacteristics(const SEIAlternativeTransferCharacteristics& sei) +void SEIWriter::xWriteSEIContentColourVolume(const SEIContentColourVolume &sei) { - WRITE_CODE(sei.m_preferredTransferCharacteristics, 8, "preferred_transfer_characteristics"); -} -#endif + WRITE_FLAG(sei.m_ccvCancelFlag, "ccv_cancel_flag"); + if (!sei.m_ccvCancelFlag) + { + WRITE_FLAG(sei.m_ccvPersistenceFlag, "ccv_persistence_flag"); + WRITE_FLAG(sei.m_ccvPrimariesPresentFlag, "ccv_primaries_present_flag"); + WRITE_FLAG(sei.m_ccvMinLuminanceValuePresentFlag, "ccv_min_luminance_value_present_flag"); + WRITE_FLAG(sei.m_ccvMaxLuminanceValuePresentFlag, "ccv_max_luminance_value_present_flag"); + WRITE_FLAG(sei.m_ccvAvgLuminanceValuePresentFlag, "ccv_avg_luminance_value_present_flag"); -void SEIWriter::xWriteSEIGreenMetadataInfo(const SEIGreenMetadataInfo& sei) -{ - WRITE_CODE(sei.m_greenMetadataType, 8, "green_metadata_type"); + if (sei.m_ccvPrimariesPresentFlag == true) + { + for (int i = 0; i < MAX_NUM_COMPONENT; i++) + { + WRITE_SCODE((int32_t)sei.m_ccvPrimariesX[i], 32, "ccv_primaries_x[i]"); + WRITE_SCODE((int32_t)sei.m_ccvPrimariesY[i], 32, "ccv_primaries_y[i]"); + } + } - WRITE_CODE(sei.m_xsdMetricType, 8, "xsd_metric_type"); - WRITE_CODE(sei.m_xsdMetricValue, 16, "xsd_metric_value"); + if (sei.m_ccvMinLuminanceValuePresentFlag == true) + { + WRITE_CODE((uint32_t)sei.m_ccvMinLuminanceValue, 32, "ccv_min_luminance_value"); + } + if (sei.m_ccvMinLuminanceValuePresentFlag == true) + { + WRITE_CODE((uint32_t)sei.m_ccvMaxLuminanceValue, 32, "ccv_max_luminance_value"); + } + if (sei.m_ccvMinLuminanceValuePresentFlag == true) + { + WRITE_CODE((uint32_t)sei.m_ccvAvgLuminanceValue, 32, "ccv_avg_luminance_value"); + } + } } //! \} diff --git a/source/Lib/EncoderLib/SEIwrite.h b/source/Lib/EncoderLib/SEIwrite.h index f93a0192968930b97d220f5b6a51a5b733b156eb..77c97f36c60531cc93900e37418e9dab31f32393 100644 --- a/source/Lib/EncoderLib/SEIwrite.h +++ b/source/Lib/EncoderLib/SEIwrite.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -49,39 +49,35 @@ public: SEIWriter() {}; virtual ~SEIWriter() {}; - void writeSEImessages(OutputBitstream& bs, const SEIMessages &seiList, const SPS *sps, bool isNested); + void writeSEImessages(OutputBitstream& bs, const SEIMessages &seiList, const SPS *sps, HRD &hrd, bool isNested, const uint32_t temporalId); protected: void xWriteSEIuserDataUnregistered(const SEIuserDataUnregistered &sei); - void xWriteSEIActiveParameterSets(const SEIActiveParameterSets& sei); - void xWriteSEIDecodingUnitInfo(const SEIDecodingUnitInfo& sei, const SPS *sps); + void xWriteSEIDecodingUnitInfo(const SEIDecodingUnitInfo& sei, const SEIBufferingPeriod& bp, const uint32_t temporalId); void xWriteSEIDecodedPictureHash(const SEIDecodedPictureHash& sei); - void xWriteSEIBufferingPeriod(const SEIBufferingPeriod& sei, const SPS *sps); - void xWriteSEIPictureTiming(const SEIPictureTiming& sei, const SPS *sps); - void xWriteSEIRecoveryPoint(const SEIRecoveryPoint& sei); + void xWriteSEIBufferingPeriod(const SEIBufferingPeriod& sei); + void xWriteSEIPictureTiming(const SEIPictureTiming& sei, const SEIBufferingPeriod& bp, const uint32_t temporalId); + void xWriteSEIFrameFieldInfo(const SEIFrameFieldInfo& sei); + void xWriteSEIDependentRAPIndication(const SEIDependentRAPIndication& sei); void xWriteSEIFramePacking(const SEIFramePacking& sei); - void xWriteSEISegmentedRectFramePacking(const SEISegmentedRectFramePacking& sei); - void xWriteSEIDisplayOrientation(const SEIDisplayOrientation &sei); - void xWriteSEITemporalLevel0Index(const SEITemporalLevel0Index &sei); - void xWriteSEIGradualDecodingRefreshInfo(const SEIGradualDecodingRefreshInfo &sei); - void xWriteSEINoDisplay(const SEINoDisplay &sei); - void xWriteSEIToneMappingInfo(const SEIToneMappingInfo& sei); - void xWriteSEISOPDescription(const SEISOPDescription& sei); - void xWriteSEIScalableNesting(OutputBitstream& bs, const SEIScalableNesting& sei, const SPS *sps); -#if HEVC_TILES_WPP - void xWriteSEITempMotionConstrainedTileSets(const SEITempMotionConstrainedTileSets& sei); -#endif - void xWriteSEITimeCode(const SEITimeCode& sei); - void xWriteSEIChromaResamplingFilterHint(const SEIChromaResamplingFilterHint& sei); - void xWriteSEIKneeFunctionInfo(const SEIKneeFunctionInfo &sei); - void xWriteSEIColourRemappingInfo(const SEIColourRemappingInfo& sei); void xWriteSEIMasteringDisplayColourVolume( const SEIMasteringDisplayColourVolume& sei); #if U0033_ALTERNATIVE_TRANSFER_CHARACTERISTICS_SEI void xWriteSEIAlternativeTransferCharacteristics(const SEIAlternativeTransferCharacteristics& sei); #endif - void xWriteSEIGreenMetadataInfo(const SEIGreenMetadataInfo &sei); + void xWriteSEIEquirectangularProjection (const SEIEquirectangularProjection &sei); + void xWriteSEISphereRotation (const SEISphereRotation &sei); + void xWriteSEIOmniViewport (const SEIOmniViewport& sei); + void xWriteSEIRegionWisePacking (const SEIRegionWisePacking &sei); + void xWriteSEIGeneralizedCubemapProjection (const SEIGeneralizedCubemapProjection &sei); + void xWriteSEISubpictureLevelInfo (const SEISubpicureLevelInfo &sei, const SPS* sps); + void xWriteSEISampleAspectRatioInfo (const SEISampleAspectRatioInfo &sei); - void xWriteSEIpayloadData(OutputBitstream& bs, const SEI& sei, const SPS *sps); + void xWriteSEIUserDataRegistered(const SEIUserDataRegistered& sei); + void xWriteSEIFilmGrainCharacteristics(const SEIFilmGrainCharacteristics& sei); + void xWriteSEIContentLightLevelInfo(const SEIContentLightLevelInfo& sei); + void xWriteSEIAmbientViewingEnvironment(const SEIAmbientViewingEnvironment& sei); + void xWriteSEIContentColourVolume(const SEIContentColourVolume &sei); + void xWriteSEIpayloadData(OutputBitstream& bs, const SEI& sei, const SPS *sps, HRD &hrd, const uint32_t temporalId); void xWriteByteAlign(); }; diff --git a/source/Lib/EncoderLib/VLCWriter.cpp b/source/Lib/EncoderLib/VLCWriter.cpp index ae81af9f9443f6563f4e755aa4395baf944be319..d00d64a66981ec399ad5b9ddf0eda4bdbd0f9406 100644 --- a/source/Lib/EncoderLib/VLCWriter.cpp +++ b/source/Lib/EncoderLib/VLCWriter.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * -* Copyright (c) 2010-2019, ITU/ISO/IEC +* Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -50,6 +50,22 @@ #if ENABLE_TRACING +void VLCWriter::xWriteSCodeTr (int value, uint32_t length, const char *pSymbolName) +{ + xWriteSCode (value,length); + if( g_HLSTraceEnable ) + { + if( length<10 ) + { + DTRACE( g_trace_ctx, D_HEADER, "%-50s u(%d) : %d\n", pSymbolName, length, value ); + } + else + { + DTRACE( g_trace_ctx, D_HEADER, "%-50s u(%d) : %d\n", pSymbolName, length, value ); + } + } +} + void VLCWriter::xWriteCodeTr (uint32_t value, uint32_t length, const char *pSymbolName) { xWriteCode (value,length); @@ -98,10 +114,16 @@ bool g_HLSTraceEnable = true; #endif +void VLCWriter::xWriteSCode ( int code, uint32_t length ) +{ + assert ( length > 0 && length<=32 ); + assert( length==32 || (code>=-(1<<(length-1)) && code<(1<<(length-1))) ); + m_pcBitIf->write( length==32 ? uint32_t(code) : ( uint32_t(code)&((1<<length)-1) ), length ); +} void VLCWriter::xWriteCode ( uint32_t uiCode, uint32_t uiLength ) { - CHECK( uiLength == 0, "Code of lenght '0' not supported" ); + CHECK( uiLength == 0, "Code of length '0' not supported" ); m_pcBitIf->write( uiCode, uiLength ); } @@ -157,117 +179,235 @@ void AUDWriter::codeAUD(OutputBitstream& bs, const int pictureType) xWriteRbspTrailingBits(); } -void HLSWriter::xCodeShortTermRefPicSet( const ReferencePictureSet* rps, bool calledFromSliceHeader, int idx) +void HLSWriter::xCodeRefPicList( const ReferencePictureList* rpl, bool isLongTermPresent, uint32_t ltLsbBitsCount, const bool isForbiddenZeroDeltaPoc ) { - //int lastBits = getNumberOfWrittenBits(); + uint32_t numRefPic = rpl->getNumberOfShorttermPictures() + rpl->getNumberOfLongtermPictures() + rpl->getNumberOfInterLayerPictures(); + WRITE_UVLC( numRefPic, "num_ref_entries[ listIdx ][ rplsIdx ]" ); - if (idx > 0) + if (isLongTermPresent) { - WRITE_FLAG( rps->getInterRPSPrediction(), "inter_ref_pic_set_prediction_flag" ); // inter_RPS_prediction_flag + WRITE_FLAG(rpl->getLtrpInSliceHeaderFlag(), "ltrp_in_slice_header_flag[ listIdx ][ rplsIdx ]"); } - if (rps->getInterRPSPrediction()) + int prevDelta = MAX_INT; + int deltaValue = 0; + bool firstSTRP = true; + for (int ii = 0; ii < numRefPic; ii++) { - int deltaRPS = rps->getDeltaRPS(); - if(calledFromSliceHeader) + if( rpl->getInterLayerPresentFlag() ) { - WRITE_UVLC( rps->getDeltaRIdxMinus1(), "delta_idx_minus1" ); // delta index of the Reference Picture Set used for prediction minus 1 + WRITE_FLAG( rpl->isInterLayerRefPic( ii ), "inter_layer_ref_pic_flag[ listIdx ][ rplsIdx ][ i ]" ); + + if( rpl->isInterLayerRefPic( ii ) ) + { + CHECK( rpl->getInterLayerRefPicIdx( ii ) < 0, "Wrong inter-layer reference index" ); + WRITE_UVLC( rpl->getInterLayerRefPicIdx( ii ), "ilrp_idx[ listIdx ][ rplsIdx ][ i ]" ); + } } - WRITE_CODE( (deltaRPS >=0 ? 0: 1), 1, "delta_rps_sign" ); //delta_rps_sign - WRITE_UVLC( abs(deltaRPS) - 1, "abs_delta_rps_minus1"); // absolute delta RPS minus 1 + if( !rpl->isInterLayerRefPic( ii ) ) + { + if( isLongTermPresent ) + { + WRITE_FLAG( !rpl->isRefPicLongterm( ii ), "st_ref_pic_flag[ listIdx ][ rplsIdx ][ i ]" ); + } - for(int j=0; j < rps->getNumRefIdc(); j++) + if (!rpl->isRefPicLongterm(ii)) { - int refIdc = rps->getRefIdc(j); - WRITE_CODE( (refIdc==1? 1: 0), 1, "used_by_curr_pic_flag" ); //first bit is "1" if Idc is 1 - if (refIdc != 1) + if (firstSTRP) + { + firstSTRP = false; + deltaValue = prevDelta = rpl->getRefPicIdentifier(ii); + } + else { - WRITE_CODE( refIdc>>1, 1, "use_delta_flag" ); //second bit is "1" if Idc is 2, "0" otherwise. + deltaValue = rpl->getRefPicIdentifier(ii) - prevDelta; + prevDelta = rpl->getRefPicIdentifier(ii); } + unsigned int absDeltaValue = (deltaValue < 0) ? 0 - deltaValue : deltaValue; + if( isForbiddenZeroDeltaPoc ) + { + CHECK( !absDeltaValue, "Zero delta POC is not used without WP" ); + WRITE_UVLC( absDeltaValue - 1, "abs_delta_poc_st[ listIdx ][ rplsIdx ][ i ]" ); + } + else + WRITE_UVLC(absDeltaValue, "abs_delta_poc_st[ listIdx ][ rplsIdx ][ i ]"); + if (absDeltaValue > 0) + WRITE_FLAG((deltaValue < 0) ? 0 : 1, "strp_entry_sign_flag[ listIdx ][ rplsIdx ][ i ]"); //0 means negative delta POC : 1 means positive } - } - else - { - WRITE_UVLC( rps->getNumberOfNegativePictures(), "num_negative_pics" ); - WRITE_UVLC( rps->getNumberOfPositivePictures(), "num_positive_pics" ); - int prev = 0; - for(int j=0 ; j < rps->getNumberOfNegativePictures(); j++) + else if (!rpl->getLtrpInSliceHeaderFlag()) { - WRITE_UVLC( prev-rps->getDeltaPOC(j)-1, "delta_poc_s0_minus1" ); - prev = rps->getDeltaPOC(j); - WRITE_FLAG( rps->getUsed(j), "used_by_curr_pic_s0_flag"); + WRITE_CODE(rpl->getRefPicIdentifier(ii), ltLsbBitsCount, "poc_lsb_lt[listIdx][rplsIdx][i]"); } - prev = 0; - for(int j=rps->getNumberOfNegativePictures(); j < rps->getNumberOfNegativePictures()+rps->getNumberOfPositivePictures(); j++) - { - WRITE_UVLC( rps->getDeltaPOC(j)-prev-1, "delta_poc_s1_minus1" ); - prev = rps->getDeltaPOC(j); - WRITE_FLAG( rps->getUsed(j), "used_by_curr_pic_s1_flag" ); } } - - //DTRACE( g_trace_ctx, D_RPSINFO, "irps=%d (%2d bits) ", rps->getInterRPSPrediction(), getNumberOfWrittenBits() - lastBits ); - rps->printDeltaPOC(); } -void HLSWriter::codePPS( const PPS* pcPPS ) +void HLSWriter::codePPS( const PPS* pcPPS, const SPS* pcSPS ) { #if ENABLE_TRACING xTracePPSHeader (); #endif WRITE_UVLC( pcPPS->getPPSId(), "pps_pic_parameter_set_id" ); - WRITE_UVLC( pcPPS->getSPSId(), "pps_seq_parameter_set_id" ); -#if HEVC_DEPENDENT_SLICES - WRITE_FLAG( pcPPS->getDependentSliceSegmentsEnabledFlag() ? 1 : 0, "dependent_slice_segments_enabled_flag" ); -#endif + WRITE_CODE( pcPPS->getSPSId(), 4, "pps_seq_parameter_set_id" ); + + WRITE_UVLC( pcPPS->getPicWidthInLumaSamples(), "pic_width_in_luma_samples" ); + WRITE_UVLC( pcPPS->getPicHeightInLumaSamples(), "pic_height_in_luma_samples" ); + Window conf = pcPPS->getConformanceWindow(); + + WRITE_FLAG( conf.getWindowEnabledFlag(), "conformance_window_flag" ); + if( conf.getWindowEnabledFlag() ) + { + WRITE_UVLC( conf.getWindowLeftOffset(), "conf_win_left_offset" ); + WRITE_UVLC( conf.getWindowRightOffset(), "conf_win_right_offset" ); + WRITE_UVLC( conf.getWindowTopOffset(), "conf_win_top_offset" ); + WRITE_UVLC( conf.getWindowBottomOffset(), "conf_win_bottom_offset" ); + } + Window scalingWindow = pcPPS->getScalingWindow(); + + WRITE_FLAG( scalingWindow.getWindowEnabledFlag(), "scaling_window_flag" ); + if( scalingWindow.getWindowEnabledFlag() ) + { + WRITE_UVLC( scalingWindow.getWindowLeftOffset(), "scaling_win_left_offset" ); + WRITE_UVLC( scalingWindow.getWindowRightOffset(), "scaling_win_right_offset" ); + WRITE_UVLC( scalingWindow.getWindowTopOffset(), "scaling_win_top_offset" ); + WRITE_UVLC( scalingWindow.getWindowBottomOffset(), "scaling_win_bottom_offset" ); + } + WRITE_FLAG( pcPPS->getOutputFlagPresentFlag() ? 1 : 0, "output_flag_present_flag" ); - WRITE_CODE( pcPPS->getNumExtraSliceHeaderBits(), 3, "num_extra_slice_header_bits"); + WRITE_FLAG(pcPPS->getSubPicIdSignallingPresentFlag(), "pps_subpic_id_signalling_present_flag"); + if( pcPPS->getSubPicIdSignallingPresentFlag() ) + { + WRITE_UVLC( pcPPS->getNumSubPics() - 1, "pps_num_subpics_minus1" ); + + WRITE_UVLC( pcPPS->getSubPicIdLen() - 1, "pps_subpic_id_len_minus1" ); + + for( int picIdx = 0; picIdx < pcPPS->getNumSubPics( ); picIdx++ ) + { + WRITE_CODE( pcPPS->getSubPicId(picIdx), pcPPS->getSubPicIdLen( ), "pps_subpic_id[i]" ); + } + } + + WRITE_FLAG( pcPPS->getNoPicPartitionFlag( ) ? 1 : 0, "no_pic_partition_flag" ); + if( !pcPPS->getNoPicPartitionFlag() ) + { + int colIdx, rowIdx; + + // CTU size - required to match size in SPS + WRITE_CODE( pcPPS->getLog2CtuSize() - 5, 2, "pps_log2_ctu_size_minus5" ); + + // number of explicit tile columns/rows + WRITE_UVLC( pcPPS->getNumExpTileColumns() - 1, "num_exp_tile_columns_minus1" ); + WRITE_UVLC( pcPPS->getNumExpTileRows() - 1, "num_exp_tile_rows_minus1" ); + + // tile sizes + for( colIdx = 0; colIdx < pcPPS->getNumExpTileColumns(); colIdx++ ) + { + WRITE_UVLC( pcPPS->getTileColumnWidth( colIdx ) - 1, "tile_column_width_minus1[i]" ); + } + for( rowIdx = 0; rowIdx < pcPPS->getNumExpTileRows(); rowIdx++ ) + { + WRITE_UVLC( pcPPS->getTileRowHeight( rowIdx ) - 1, "tile_row_height_minus1[i]" ); + } + + // rectangular slice signalling + WRITE_FLAG( pcPPS->getRectSliceFlag( ) ? 1 : 0, "rect_slice_flag"); + if (pcPPS->getRectSliceFlag()) + { + WRITE_FLAG(pcPPS->getSingleSlicePerSubPicFlag( ) ? 1 : 0, "single_slice_per_subpic_flag"); + } + if (pcPPS->getRectSliceFlag() & !(pcPPS->getSingleSlicePerSubPicFlag())) + { + WRITE_UVLC( pcPPS->getNumSlicesInPic( ) - 1, "num_slices_in_pic_minus1" ); + WRITE_FLAG( pcPPS->getTileIdxDeltaPresentFlag( ) ? 1 : 0, "tile_idx_delta_present_flag"); + + // write rectangular slice parameters + for( int i = 0; i < pcPPS->getNumSlicesInPic()-1; i++ ) + { + // complete tiles within a single slice + WRITE_UVLC( pcPPS->getSliceWidthInTiles( i ) - 1, "slice_width_in_tiles_minus1[i]" ); +#if JVET_Q0480_RASTER_RECT_SLICES + if( pcPPS->getTileIdxDeltaPresentFlag() || ( (pcPPS->getSliceTileIdx( i ) % pcPPS->getNumTileColumns()) == 0 ) ) + { + WRITE_UVLC( pcPPS->getSliceHeightInTiles( i ) - 1, "slice_height_in_tiles_minus1[i]" ); + } +#else + WRITE_UVLC( pcPPS->getSliceHeightInTiles( i ) - 1, "slice_height_in_tiles_minus1[i]" ); +#endif + + // multiple slices within a single tile special case + if( pcPPS->getSliceWidthInTiles( i ) == 1 && pcPPS->getSliceHeightInTiles( i ) == 1 ) + { + WRITE_UVLC( pcPPS->getNumSlicesInTile( i ) - 1, "num_slices_in_tile_minus1[i]" ); + uint32_t numSlicesInTile = pcPPS->getNumSlicesInTile( i ); + for( int j = 0; j < numSlicesInTile-1; j++ ) + { + WRITE_UVLC( pcPPS->getSliceHeightInCtu( i ) - 1, "slice_height_in_ctu_minus1[i]" ); + i++; + } + } + + // tile index offset to start of next slice + if( i < pcPPS->getNumSlicesInPic()-1 ) + { + if( pcPPS->getTileIdxDeltaPresentFlag() ) + { + int32_t tileIdxDelta = pcPPS->getSliceTileIdx( i + 1 ) - pcPPS->getSliceTileIdx( i ); + WRITE_SVLC( tileIdxDelta, "tile_idx_delta[i]" ); + } + } + } + } + + // loop filtering across slice/tile controls + WRITE_FLAG( pcPPS->getLoopFilterAcrossTilesEnabledFlag(), "loop_filter_across_tiles_enabled_flag"); + WRITE_FLAG( pcPPS->getLoopFilterAcrossSlicesEnabledFlag(), "loop_filter_across_slices_enabled_flag"); + } + + WRITE_FLAG( pcPPS->getEntropyCodingSyncEnabledFlag() ? 1 : 0, "entropy_coding_sync_enabled_flag" ); WRITE_FLAG( pcPPS->getCabacInitPresentFlag() ? 1 : 0, "cabac_init_present_flag" ); WRITE_UVLC( pcPPS->getNumRefIdxL0DefaultActive()-1, "num_ref_idx_l0_default_active_minus1"); WRITE_UVLC( pcPPS->getNumRefIdxL1DefaultActive()-1, "num_ref_idx_l1_default_active_minus1"); + WRITE_FLAG(pcPPS->getRpl1IdxPresentFlag() ? 1 : 0, "rpl1IdxPresentFlag"); + WRITE_SVLC( pcPPS->getPicInitQPMinus26(), "init_qp_minus26"); - WRITE_FLAG( pcPPS->getConstrainedIntraPred() ? 1 : 0, "constrained_intra_pred_flag" ); - WRITE_FLAG( pcPPS->getUseTransformSkip() ? 1 : 0, "transform_skip_enabled_flag" ); + WRITE_UVLC( pcPPS->getLog2MaxTransformSkipBlockSize() - 2, "log2_transform_skip_max_size_minus2"); WRITE_FLAG( pcPPS->getUseDQP() ? 1 : 0, "cu_qp_delta_enabled_flag" ); - if ( pcPPS->getUseDQP() ) - { - WRITE_UVLC( pcPPS->getCuQpDeltaSubdiv(), "cu_qp_delta_subdiv" ); - } WRITE_SVLC( pcPPS->getQpOffset(COMPONENT_Cb), "pps_cb_qp_offset" ); WRITE_SVLC( pcPPS->getQpOffset(COMPONENT_Cr), "pps_cr_qp_offset" ); + if (pcSPS->getJointCbCrEnabledFlag() == false || pcSPS->getChromaFormatIdc() == CHROMA_400) + { + CHECK(pcPPS->getJointCbCrQpOffsetPresentFlag(), "pps_jcbcr_qp_offset_present_flag should be false"); + } + WRITE_FLAG(pcPPS->getJointCbCrQpOffsetPresentFlag() ? 1 : 0, "pps_joint_cbcr_qp_offset_present_flag"); + if (pcPPS->getJointCbCrQpOffsetPresentFlag()) + { + WRITE_SVLC(pcPPS->getQpOffset(JOINT_CbCr), "pps_joint_cbcr_qp_offset_value"); + } WRITE_FLAG( pcPPS->getSliceChromaQpFlag() ? 1 : 0, "pps_slice_chroma_qp_offsets_present_flag" ); - WRITE_FLAG( pcPPS->getUseWP() ? 1 : 0, "weighted_pred_flag" ); // Use of Weighting Prediction (P_SLICE) - WRITE_FLAG( pcPPS->getWPBiPred() ? 1 : 0, "weighted_bipred_flag" ); // Use of Weighting Bi-Prediction (B_SLICE) - WRITE_FLAG( pcPPS->getTransquantBypassEnabledFlag() ? 1 : 0, "transquant_bypass_enabled_flag" ); -#if HEVC_TILES_WPP - WRITE_FLAG( pcPPS->getTilesEnabledFlag() ? 1 : 0, "tiles_enabled_flag" ); - WRITE_FLAG( pcPPS->getEntropyCodingSyncEnabledFlag() ? 1 : 0, "entropy_coding_sync_enabled_flag" ); - if( pcPPS->getTilesEnabledFlag() ) + WRITE_FLAG(uint32_t(pcPPS->getCuChromaQpOffsetEnabledFlag()), "cu_chroma_qp_offset_enabled_flag" ); + if (pcPPS->getCuChromaQpOffsetEnabledFlag()) { - WRITE_UVLC( pcPPS->getNumTileColumnsMinus1(), "num_tile_columns_minus1" ); - WRITE_UVLC( pcPPS->getNumTileRowsMinus1(), "num_tile_rows_minus1" ); - WRITE_FLAG( pcPPS->getTileUniformSpacingFlag(), "uniform_spacing_flag" ); - if( !pcPPS->getTileUniformSpacingFlag() ) + WRITE_UVLC(pcPPS->getChromaQpOffsetListLen() - 1, "chroma_qp_offset_list_len_minus1"); + /* skip zero index */ + for (int cuChromaQpOffsetIdx = 0; cuChromaQpOffsetIdx < pcPPS->getChromaQpOffsetListLen(); cuChromaQpOffsetIdx++) { - for(uint32_t i=0; i<pcPPS->getNumTileColumnsMinus1(); i++) - { - WRITE_UVLC( pcPPS->getTileColumnWidth(i)-1, "column_width_minus1" ); - } - for(uint32_t i=0; i<pcPPS->getNumTileRowsMinus1(); i++) + WRITE_SVLC(pcPPS->getChromaQpOffsetListEntry(cuChromaQpOffsetIdx+1).u.comp.CbOffset, "cb_qp_offset_list[i]"); + WRITE_SVLC(pcPPS->getChromaQpOffsetListEntry(cuChromaQpOffsetIdx+1).u.comp.CrOffset, "cr_qp_offset_list[i]"); + if (pcPPS->getJointCbCrQpOffsetPresentFlag()) { - WRITE_UVLC( pcPPS->getTileRowHeight(i)-1, "row_height_minus1" ); + WRITE_SVLC(pcPPS->getChromaQpOffsetListEntry(cuChromaQpOffsetIdx + 1).u.comp.JointCbCrOffset, "joint_cbcr_qp_offset_list[i]"); } } - CHECK ((pcPPS->getNumTileColumnsMinus1() + pcPPS->getNumTileRowsMinus1()) == 0, "Invalid tile parameters read"); - WRITE_FLAG( pcPPS->getLoopFilterAcrossTilesEnabledFlag()?1 : 0, "loop_filter_across_tiles_enabled_flag"); } -#endif - WRITE_FLAG( pcPPS->getLoopFilterAcrossSlicesEnabledFlag()?1 : 0, "pps_loop_filter_across_slices_enabled_flag"); + + WRITE_FLAG( pcPPS->getUseWP() ? 1 : 0, "weighted_pred_flag" ); // Use of Weighting Prediction (P_SLICE) + WRITE_FLAG( pcPPS->getWPBiPred() ? 1 : 0, "weighted_bipred_flag" ); // Use of Weighting Bi-Prediction (B_SLICE) + WRITE_FLAG( pcPPS->getDeblockingFilterControlPresentFlag()?1 : 0, "deblocking_filter_control_present_flag"); if(pcPPS->getDeblockingFilterControlPresentFlag()) { @@ -279,28 +419,23 @@ void HLSWriter::codePPS( const PPS* pcPPS ) WRITE_SVLC( pcPPS->getDeblockingFilterTcOffsetDiv2(), "pps_tc_offset_div2" ); } } -#if HEVC_USE_SCALING_LISTS - WRITE_FLAG( pcPPS->getScalingListPresentFlag() ? 1 : 0, "pps_scaling_list_data_present_flag" ); - if( pcPPS->getScalingListPresentFlag() ) - { - codeScalingList( pcPPS->getScalingList() ); + WRITE_FLAG( pcPPS->getConstantSliceHeaderParamsEnabledFlag(), "constant_slice_header_params_enabled_flag"); + if ( pcPPS->getConstantSliceHeaderParamsEnabledFlag() ) { + WRITE_CODE( pcPPS->getPPSDepQuantEnabledIdc(), 2, "pps_dep_quant_enabled_idc"); + WRITE_CODE( pcPPS->getPPSRefPicListSPSIdc0(), 2, "pps_ref_pic_list_sps_idc[0]"); + WRITE_CODE( pcPPS->getPPSRefPicListSPSIdc1(), 2, "pps_ref_pic_list_sps_idc[1]"); + WRITE_CODE( pcPPS->getPPSMvdL1ZeroIdc(), 2, "pps_mvd_l1_zero_idc"); + WRITE_CODE( pcPPS->getPPSCollocatedFromL0Idc(), 2, "pps_collocated_from_l0_idc"); + WRITE_UVLC( pcPPS->getPPSSixMinusMaxNumMergeCandPlus1(), "pps_six_minus_max_num_merge_cand_plus1"); + WRITE_UVLC( pcPPS->getPPSMaxNumMergeCandMinusMaxNumTriangleCandPlus1(), "pps_max_num_merge_cand_minus_max_num_triangle_cand_plus1"); } -#endif - WRITE_FLAG( pcPPS->getListsModificationPresentFlag(), "lists_modification_present_flag"); - WRITE_UVLC( pcPPS->getLog2ParallelMergeLevelMinus2(), "log2_parallel_merge_level_minus2"); - WRITE_FLAG( pcPPS->getSliceHeaderExtensionPresentFlag() ? 1 : 0, "slice_segment_header_extension_present_flag"); - bool pps_extension_present_flag=false; - bool pps_extension_flags[NUM_PPS_EXTENSION_FLAGS]={false}; - pps_extension_flags[PPS_EXT__REXT] = pcPPS->getPpsRangeExtension().settingsDifferFromDefaults(pcPPS->getUseTransformSkip()); + WRITE_FLAG( pcPPS->getPictureHeaderExtensionPresentFlag() ? 1 : 0, "picture_header_extension_present_flag"); + WRITE_FLAG( pcPPS->getSliceHeaderExtensionPresentFlag() ? 1 : 0, "slice_header_extension_present_flag"); - // Other PPS extension flags checked here. - - for(int i=0; i<NUM_PPS_EXTENSION_FLAGS; i++) - { - pps_extension_present_flag|=pps_extension_flags[i]; - } + bool pps_extension_present_flag=false; + bool pps_extension_flags[NUM_PPS_EXTENSION_FLAGS]={false}; WRITE_FLAG( (pps_extension_present_flag?1:0), "pps_extension_present_flag" ); @@ -331,26 +466,9 @@ void HLSWriter::codePPS( const PPS* pcPPS ) case PPS_EXT__REXT: { const PPSRExt &ppsRangeExtension = pcPPS->getPpsRangeExtension(); - if (pcPPS->getUseTransformSkip()) - { - WRITE_UVLC( ppsRangeExtension.getLog2MaxTransformSkipBlockSize()-2, "log2_max_transform_skip_block_size_minus2"); - } WRITE_FLAG((ppsRangeExtension.getCrossComponentPredictionEnabledFlag() ? 1 : 0), "cross_component_prediction_enabled_flag" ); - WRITE_FLAG(uint32_t(ppsRangeExtension.getChromaQpOffsetListEnabledFlag()), "chroma_qp_offset_list_enabled_flag" ); - if (ppsRangeExtension.getChromaQpOffsetListEnabledFlag()) - { - WRITE_UVLC(ppsRangeExtension.getCuChromaQpOffsetSubdiv(), "cu_chroma_qp_offset_subdiv"); - WRITE_UVLC(ppsRangeExtension.getChromaQpOffsetListLen() - 1, "chroma_qp_offset_list_len_minus1"); - /* skip zero index */ - for (int cuChromaQpOffsetIdx = 0; cuChromaQpOffsetIdx < ppsRangeExtension.getChromaQpOffsetListLen(); cuChromaQpOffsetIdx++) - { - WRITE_SVLC(ppsRangeExtension.getChromaQpOffsetListEntry(cuChromaQpOffsetIdx+1).u.comp.CbOffset, "cb_qp_offset_list[i]"); - WRITE_SVLC(ppsRangeExtension.getChromaQpOffsetListEntry(cuChromaQpOffsetIdx+1).u.comp.CrOffset, "cr_qp_offset_list[i]"); - } - } - WRITE_UVLC( ppsRangeExtension.getLog2SaoOffsetScale(CHANNEL_TYPE_LUMA), "log2_sao_offset_scale_luma" ); WRITE_UVLC( ppsRangeExtension.getLog2SaoOffsetScale(CHANNEL_TYPE_CHROMA), "log2_sao_offset_scale_chroma" ); } @@ -365,43 +483,121 @@ void HLSWriter::codePPS( const PPS* pcPPS ) xWriteRbspTrailingBits(); } -void HLSWriter::codeAPS( APS* pcAPS) +void HLSWriter::codeAPS( APS* pcAPS ) { #if ENABLE_TRACING xTraceAPSHeader(); #endif - AlfSliceParam param = pcAPS->getAlfAPSParam(); WRITE_CODE(pcAPS->getAPSId(), 5, "adaptation_parameter_set_id"); + WRITE_CODE( (int)pcAPS->getAPSType(), 3, "aps_params_type" ); + + if (pcAPS->getAPSType() == ALF_APS) + { + codeAlfAps(pcAPS); + } + else if (pcAPS->getAPSType() == LMCS_APS) + { + codeLmcsAps (pcAPS); + } + else if( pcAPS->getAPSType() == SCALING_LIST_APS ) + { + codeScalingListAps( pcAPS ); + } + WRITE_FLAG(0, "aps_extension_flag"); //Implementation when this flag is equal to 1 should be added when it is needed. Currently in the spec we don't have case when this flag is equal to 1 + xWriteRbspTrailingBits(); +} + +void HLSWriter::codeAlfAps( APS* pcAPS ) +{ + AlfParam param = pcAPS->getAlfAPSParam(); + + WRITE_FLAG(param.newFilterFlag[CHANNEL_TYPE_LUMA], "alf_luma_new_filter"); + WRITE_FLAG(param.newFilterFlag[CHANNEL_TYPE_CHROMA], "alf_chroma_new_filter"); + + if (param.newFilterFlag[CHANNEL_TYPE_LUMA]) + { +#if JVET_Q0249_ALF_CHROMA_CLIPFLAG + WRITE_FLAG( param.nonLinearFlag[CHANNEL_TYPE_LUMA], "alf_luma_clip" ); +#else + WRITE_FLAG( param.nonLinearFlag[CHANNEL_TYPE_LUMA][0], "alf_luma_clip" ); +#endif - const int alfChromaIdc = param.enabledFlag[COMPONENT_Cb] * 2 + param.enabledFlag[COMPONENT_Cr]; - truncatedUnaryEqProb(alfChromaIdc, 3); // alf_chroma_idc + WRITE_UVLC(param.numLumaFilters - 1, "alf_luma_num_filters_signalled_minus1"); + if (param.numLumaFilters > 1) + { + const int length = ceilLog2( param.numLumaFilters); + for (int i = 0; i < MAX_NUM_ALF_CLASSES; i++) + { + WRITE_CODE(param.filterCoeffDeltaIdx[i], length, "alf_luma_coeff_delta_idx" ); + } + } + alfFilter(param, false, 0); - xWriteTruncBinCode(param.numLumaFilters - 1, MAX_NUM_ALF_CLASSES); //number_of_filters_minus1 - if (param.numLumaFilters > 1) + } + if (param.newFilterFlag[CHANNEL_TYPE_CHROMA]) { - for (int i = 0; i < MAX_NUM_ALF_CLASSES; i++) +#if JVET_Q0249_ALF_CHROMA_CLIPFLAG + WRITE_FLAG(param.nonLinearFlag[CHANNEL_TYPE_CHROMA], "alf_nonlinear_enable_flag_chroma"); +#endif + if( MAX_NUM_ALF_ALTERNATIVES_CHROMA > 1 ) + WRITE_UVLC( param.numAlternativesChroma - 1, "alf_chroma_num_alts_minus1" ); + for( int altIdx=0; altIdx < param.numAlternativesChroma; ++altIdx ) { - xWriteTruncBinCode((uint32_t)param.filterCoeffDeltaIdx[i], param.numLumaFilters); //filter_coeff_delta[i] +#if !JVET_Q0249_ALF_CHROMA_CLIPFLAG + WRITE_FLAG( param.nonLinearFlag[CHANNEL_TYPE_CHROMA][altIdx], "alf_nonlinear_enable_flag_chroma" ); +#endif + alfFilter(param, true, altIdx); } } +} - alfFilter(param, false); +void HLSWriter::codeLmcsAps( APS* pcAPS ) +{ + SliceReshapeInfo param = pcAPS->getReshaperAPSInfo(); + WRITE_UVLC(param.reshaperModelMinBinIdx, "lmcs_min_bin_idx"); + WRITE_UVLC(PIC_CODE_CW_BINS - 1 - param.reshaperModelMaxBinIdx, "lmcs_delta_max_bin_idx"); + assert(param.maxNbitsNeededDeltaCW > 0); + WRITE_UVLC(param.maxNbitsNeededDeltaCW - 1, "lmcs_delta_cw_prec_minus1"); - if (alfChromaIdc) + for (int i = param.reshaperModelMinBinIdx; i <= param.reshaperModelMaxBinIdx; i++) + { + int deltaCW = param.reshaperModelBinCWDelta[i]; + int signCW = (deltaCW < 0) ? 1 : 0; + int absCW = (deltaCW < 0) ? (-deltaCW) : deltaCW; + WRITE_CODE(absCW, param.maxNbitsNeededDeltaCW, "lmcs_delta_abs_cw[ i ]"); + if (absCW > 0) + { + WRITE_FLAG(signCW, "lmcs_delta_sign_cw_flag[ i ]"); + } + } + int deltaCRS = param.chrResScalingOffset; + int signCRS = (deltaCRS < 0) ? 1 : 0; + int absCRS = (deltaCRS < 0) ? (-deltaCRS) : deltaCRS; + WRITE_CODE(absCRS, 3, "lmcs_delta_crs_val"); + if (absCRS > 0) { - alfFilter(param, true); + WRITE_FLAG(signCRS, "lmcs_delta_crs_val_flag"); } - xWriteRbspTrailingBits(); } + +void HLSWriter::codeScalingListAps( APS* pcAPS ) +{ + ScalingList param = pcAPS->getScalingList(); + codeScalingList( param ); +} + void HLSWriter::codeVUI( const VUI *pcVUI, const SPS* pcSPS ) { #if ENABLE_TRACING DTRACE( g_trace_ctx, D_HEADER, "----------- vui_parameters -----------\n"); #endif + + WRITE_FLAG(pcVUI->getAspectRatioInfoPresentFlag(), "aspect_ratio_info_present_flag"); if (pcVUI->getAspectRatioInfoPresentFlag()) { + WRITE_FLAG(pcVUI->getAspectRatioConstantFlag(), "aspect_ratio_constant_flag"); WRITE_CODE(pcVUI->getAspectRatioIdc(), 8, "aspect_ratio_idc" ); if (pcVUI->getAspectRatioIdc() == 255) { @@ -409,108 +605,52 @@ void HLSWriter::codeVUI( const VUI *pcVUI, const SPS* pcSPS ) WRITE_CODE(pcVUI->getSarHeight(), 16, "sar_height"); } } - WRITE_FLAG(pcVUI->getOverscanInfoPresentFlag(), "overscan_info_present_flag"); - if (pcVUI->getOverscanInfoPresentFlag()) - { - WRITE_FLAG(pcVUI->getOverscanAppropriateFlag(), "overscan_appropriate_flag"); - } - WRITE_FLAG(pcVUI->getVideoSignalTypePresentFlag(), "video_signal_type_present_flag"); - if (pcVUI->getVideoSignalTypePresentFlag()) + WRITE_FLAG(pcVUI->getColourDescriptionPresentFlag(), "colour_description_present_flag"); + if (pcVUI->getColourDescriptionPresentFlag()) { - WRITE_CODE(pcVUI->getVideoFormat(), 3, "video_format"); - WRITE_FLAG(pcVUI->getVideoFullRangeFlag(), "video_full_range_flag"); - WRITE_FLAG(pcVUI->getColourDescriptionPresentFlag(), "colour_description_present_flag"); - if (pcVUI->getColourDescriptionPresentFlag()) - { - WRITE_CODE(pcVUI->getColourPrimaries(), 8, "colour_primaries"); - WRITE_CODE(pcVUI->getTransferCharacteristics(), 8, "transfer_characteristics"); - WRITE_CODE(pcVUI->getMatrixCoefficients(), 8, "matrix_coeffs"); - } + WRITE_CODE(pcVUI->getColourPrimaries(), 8, "colour_primaries"); + WRITE_CODE(pcVUI->getTransferCharacteristics(), 8, "transfer_characteristics"); + WRITE_CODE(pcVUI->getMatrixCoefficients(), 8, "matrix_coeffs"); + WRITE_FLAG(pcVUI->getVideoFullRangeFlag(), "video_full_range_flag"); } - + WRITE_FLAG(pcVUI->getFieldSeqFlag(), "field_seq_flag"); WRITE_FLAG(pcVUI->getChromaLocInfoPresentFlag(), "chroma_loc_info_present_flag"); if (pcVUI->getChromaLocInfoPresentFlag()) { - WRITE_UVLC(pcVUI->getChromaSampleLocTypeTopField(), "chroma_sample_loc_type_top_field"); - WRITE_UVLC(pcVUI->getChromaSampleLocTypeBottomField(), "chroma_sample_loc_type_bottom_field"); - } - - WRITE_FLAG(pcVUI->getNeutralChromaIndicationFlag(), "neutral_chroma_indication_flag"); - WRITE_FLAG(pcVUI->getFieldSeqFlag(), "field_seq_flag"); - WRITE_FLAG(pcVUI->getFrameFieldInfoPresentFlag(), "frame_field_info_present_flag"); - - Window defaultDisplayWindow = pcVUI->getDefaultDisplayWindow(); - WRITE_FLAG(defaultDisplayWindow.getWindowEnabledFlag(), "default_display_window_flag"); - if( defaultDisplayWindow.getWindowEnabledFlag() ) - { - WRITE_UVLC(defaultDisplayWindow.getWindowLeftOffset() / SPS::getWinUnitX(pcSPS->getChromaFormatIdc()), "def_disp_win_left_offset"); - WRITE_UVLC(defaultDisplayWindow.getWindowRightOffset() / SPS::getWinUnitX(pcSPS->getChromaFormatIdc()), "def_disp_win_right_offset"); - WRITE_UVLC(defaultDisplayWindow.getWindowTopOffset() / SPS::getWinUnitY(pcSPS->getChromaFormatIdc()), "def_disp_win_top_offset"); - WRITE_UVLC(defaultDisplayWindow.getWindowBottomOffset()/ SPS::getWinUnitY(pcSPS->getChromaFormatIdc()), "def_disp_win_bottom_offset"); - } - const TimingInfo *timingInfo = pcVUI->getTimingInfo(); - WRITE_FLAG(timingInfo->getTimingInfoPresentFlag(), "vui_timing_info_present_flag"); - if(timingInfo->getTimingInfoPresentFlag()) - { - WRITE_CODE(timingInfo->getNumUnitsInTick(), 32, "vui_num_units_in_tick"); - WRITE_CODE(timingInfo->getTimeScale(), 32, "vui_time_scale"); - WRITE_FLAG(timingInfo->getPocProportionalToTimingFlag(), "vui_poc_proportional_to_timing_flag"); - if(timingInfo->getPocProportionalToTimingFlag()) + if(pcVUI->getFieldSeqFlag()) { - WRITE_UVLC(timingInfo->getNumTicksPocDiffOneMinus1(), "vui_num_ticks_poc_diff_one_minus1"); + WRITE_UVLC(pcVUI->getChromaSampleLocTypeTopField(), "chroma_sample_loc_type_top_field"); + WRITE_UVLC(pcVUI->getChromaSampleLocTypeBottomField(), "chroma_sample_loc_type_bottom_field"); } - WRITE_FLAG(pcVUI->getHrdParametersPresentFlag(), "vui_hrd_parameters_present_flag"); - if( pcVUI->getHrdParametersPresentFlag() ) + else { - codeHrdParameters(pcVUI->getHrdParameters(), 1, pcSPS->getMaxTLayers() - 1 ); + WRITE_UVLC(pcVUI->getChromaSampleLocType(), "chroma_sample_loc_type"); } } - - WRITE_FLAG(pcVUI->getBitstreamRestrictionFlag(), "bitstream_restriction_flag"); - if (pcVUI->getBitstreamRestrictionFlag()) + WRITE_FLAG(pcVUI->getOverscanInfoPresentFlag(), "overscan_info_present_flag"); + if (pcVUI->getOverscanInfoPresentFlag()) { -#if HEVC_TILES_WPP - WRITE_FLAG(pcVUI->getTilesFixedStructureFlag(), "tiles_fixed_structure_flag"); -#endif - WRITE_FLAG(pcVUI->getMotionVectorsOverPicBoundariesFlag(), "motion_vectors_over_pic_boundaries_flag"); - WRITE_FLAG(pcVUI->getRestrictedRefPicListsFlag(), "restricted_ref_pic_lists_flag"); - WRITE_UVLC(pcVUI->getMinSpatialSegmentationIdc(), "min_spatial_segmentation_idc"); - WRITE_UVLC(pcVUI->getMaxBytesPerPicDenom(), "max_bytes_per_pic_denom"); - WRITE_UVLC(pcVUI->getMaxBitsPerMinCuDenom(), "max_bits_per_min_cu_denom"); - WRITE_UVLC(pcVUI->getLog2MaxMvLengthHorizontal(), "log2_max_mv_length_horizontal"); - WRITE_UVLC(pcVUI->getLog2MaxMvLengthVertical(), "log2_max_mv_length_vertical"); + WRITE_FLAG(pcVUI->getOverscanAppropriateFlag(), "overscan_appropriate_flag"); } } -void HLSWriter::codeHrdParameters( const HRD *hrd, bool commonInfPresentFlag, uint32_t maxNumSubLayersMinus1 ) +void HLSWriter::codeHrdParameters( const HRDParameters *hrd, const uint32_t firstSubLayer, const uint32_t maxNumSubLayersMinus1) { - if( commonInfPresentFlag ) + WRITE_FLAG( hrd->getNalHrdParametersPresentFlag() ? 1 : 0 , "general_nal_hrd_parameters_present_flag" ); + WRITE_FLAG( hrd->getVclHrdParametersPresentFlag() ? 1 : 0 , "general_vcl_hrd_parameters_present_flag" ); + WRITE_FLAG( hrd->getGeneralDecodingUnitHrdParamsPresentFlag() ? 1 : 0, "general_decoding_unit_hrd_params_present_flag" ); + if( hrd->getGeneralDecodingUnitHrdParamsPresentFlag() ) { - WRITE_FLAG( hrd->getNalHrdParametersPresentFlag() ? 1 : 0 , "nal_hrd_parameters_present_flag" ); - WRITE_FLAG( hrd->getVclHrdParametersPresentFlag() ? 1 : 0 , "vcl_hrd_parameters_present_flag" ); - if( hrd->getNalHrdParametersPresentFlag() || hrd->getVclHrdParametersPresentFlag() ) - { - WRITE_FLAG( hrd->getSubPicCpbParamsPresentFlag() ? 1 : 0, "sub_pic_hrd_params_present_flag" ); - if( hrd->getSubPicCpbParamsPresentFlag() ) - { - WRITE_CODE( hrd->getTickDivisorMinus2(), 8, "tick_divisor_minus2" ); - WRITE_CODE( hrd->getDuCpbRemovalDelayLengthMinus1(), 5, "du_cpb_removal_delay_increment_length_minus1" ); - WRITE_FLAG( hrd->getSubPicCpbParamsInPicTimingSEIFlag() ? 1 : 0, "sub_pic_cpb_params_in_pic_timing_sei_flag" ); - WRITE_CODE( hrd->getDpbOutputDelayDuLengthMinus1(), 5, "dpb_output_delay_du_length_minus1" ); - } - WRITE_CODE( hrd->getBitRateScale(), 4, "bit_rate_scale" ); - WRITE_CODE( hrd->getCpbSizeScale(), 4, "cpb_size_scale" ); - if( hrd->getSubPicCpbParamsPresentFlag() ) - { - WRITE_CODE( hrd->getDuCpbSizeScale(), 4, "du_cpb_size_scale" ); - } - WRITE_CODE( hrd->getInitialCpbRemovalDelayLengthMinus1(), 5, "initial_cpb_removal_delay_length_minus1" ); - WRITE_CODE( hrd->getCpbRemovalDelayLengthMinus1(), 5, "au_cpb_removal_delay_length_minus1" ); - WRITE_CODE( hrd->getDpbOutputDelayLengthMinus1(), 5, "dpb_output_delay_length_minus1" ); - } + WRITE_CODE( hrd->getTickDivisorMinus2(), 8, "tick_divisor_minus2" ); + } + WRITE_CODE( hrd->getBitRateScale(), 4, "bit_rate_scale" ); + WRITE_CODE( hrd->getCpbSizeScale(), 4, "cpb_size_scale" ); + if( hrd->getGeneralDecodingUnitHrdParamsPresentFlag() ) + { + WRITE_CODE( hrd->getCpbSizeDuScale(), 4, "cpb_size_du_scale" ); } - int i, j, nalOrVcl; - for( i = 0; i <= maxNumSubLayersMinus1; i ++ ) + + for( int i = firstSubLayer; i <= maxNumSubLayersMinus1; i ++ ) { WRITE_FLAG( hrd->getFixedPicRateFlag( i ) ? 1 : 0, "fixed_pic_rate_general_flag"); bool fixedPixRateWithinCvsFlag = true; @@ -532,20 +672,15 @@ void HLSWriter::codeHrdParameters( const HRD *hrd, bool commonInfPresentFlag, ui WRITE_UVLC( hrd->getCpbCntMinus1( i ), "cpb_cnt_minus1"); } - for( nalOrVcl = 0; nalOrVcl < 2; nalOrVcl ++ ) + for( int nalOrVcl = 0; nalOrVcl < 2; nalOrVcl ++ ) { if( ( ( nalOrVcl == 0 ) && ( hrd->getNalHrdParametersPresentFlag() ) ) || ( ( nalOrVcl == 1 ) && ( hrd->getVclHrdParametersPresentFlag() ) ) ) { - for( j = 0; j <= ( hrd->getCpbCntMinus1( i ) ); j ++ ) + for( int j = 0; j <= ( hrd->getCpbCntMinus1( i ) ); j ++ ) { WRITE_UVLC( hrd->getBitRateValueMinus1( i, j, nalOrVcl ), "bit_rate_value_minus1"); WRITE_UVLC( hrd->getCpbSizeValueMinus1( i, j, nalOrVcl ), "cpb_size_value_minus1"); - if( hrd->getSubPicCpbParamsPresentFlag() ) - { - WRITE_UVLC( hrd->getDuCpbSizeValueMinus1( i, j, nalOrVcl ), "cpb_size_du_value_minus1"); - WRITE_UVLC( hrd->getDuBitRateValueMinus1( i, j, nalOrVcl ), "bit_rate_du_value_minus1"); - } WRITE_FLAG( hrd->getCbrFlag( i, j, nalOrVcl ) ? 1 : 0, "cbr_flag"); } } @@ -553,91 +688,25 @@ void HLSWriter::codeHrdParameters( const HRD *hrd, bool commonInfPresentFlag, ui } } -void HLSWriter::codeReshaper(const SliceReshapeInfo& pSliceReshaperInfo, const SPS* pcSPS, const bool isIntra) -{ - WRITE_FLAG(pSliceReshaperInfo.getSliceReshapeModelPresentFlag() ? 1 : 0, "tile_group_reshaper_model_present_flag"); - if (pSliceReshaperInfo.getSliceReshapeModelPresentFlag()) - { - WRITE_UVLC(pSliceReshaperInfo.reshaperModelMinBinIdx, "reshaper_model_min_bin_idx"); - WRITE_UVLC(PIC_CODE_CW_BINS - 1 - pSliceReshaperInfo.reshaperModelMaxBinIdx, "reshaper_model_delta_max_bin_idx"); - assert(pSliceReshaperInfo.maxNbitsNeededDeltaCW > 0); - WRITE_UVLC(pSliceReshaperInfo.maxNbitsNeededDeltaCW - 1, "reshaper_model_bin_delta_abs_cw_prec_minus1"); - - for (int i = pSliceReshaperInfo.reshaperModelMinBinIdx; i <= pSliceReshaperInfo.reshaperModelMaxBinIdx; i++) - { - int deltaCW = pSliceReshaperInfo.reshaperModelBinCWDelta[i]; - int signCW = (deltaCW < 0) ? 1 : 0; - int absCW = (deltaCW < 0) ? (-deltaCW) : deltaCW; - WRITE_CODE(absCW, pSliceReshaperInfo.maxNbitsNeededDeltaCW, "reshaper_model_bin_delta_abs_CW"); - if (absCW > 0) - { - WRITE_FLAG(signCW, "reshaper_model_bin_delta_sign_CW_flag"); - } - } - } - - WRITE_FLAG(pSliceReshaperInfo.getUseSliceReshaper() ? 1 : 0, "tile_group_reshaper_enable_flag"); - - if (!pSliceReshaperInfo.getUseSliceReshaper()) - return; - - if (!(pcSPS->getUseDualITree() && isIntra)) - WRITE_FLAG(pSliceReshaperInfo.getSliceReshapeChromaAdj(), "tile_group_reshaper_chroma_residual_scale_flag"); -}; void HLSWriter::codeSPS( const SPS* pcSPS ) { #if ENABLE_TRACING xTraceSPSHeader (); #endif -#if HEVC_VPS - WRITE_CODE( pcSPS->getVPSId (), 4, "sps_video_parameter_set_id" ); -#endif -#if !JVET_M0101_HLS - WRITE_UVLC( pcSPS->getSPSId (), "sps_seq_parameter_set_id" ); - - WRITE_FLAG(pcSPS->getIntraOnlyConstraintFlag() ? 1 : 0, "intra_only_constraint_flag"); - WRITE_CODE(pcSPS->getMaxBitDepthConstraintIdc(), 4, "max_bitdepth_constraint_idc"); - WRITE_CODE(pcSPS->getMaxChromaFormatConstraintIdc(), 2, "max_chroma_format_constraint_idc"); - WRITE_FLAG(pcSPS->getFrameConstraintFlag() ? 1 : 0, "frame_only_constraint_flag"); - WRITE_FLAG(pcSPS->getNoQtbttDualTreeIntraConstraintFlag() ? 1 : 0, "no_qtbtt_dual_tree_intra_constraint_flag"); - WRITE_FLAG(pcSPS->getNoSaoConstraintFlag() ? 1 : 0, "no_sao_constraint_flag"); - WRITE_FLAG(pcSPS->getNoAlfConstraintFlag() ? 1 : 0, "no_alf_constraint_flag"); - WRITE_FLAG(pcSPS->getNoPcmConstraintFlag() ? 1 : 0, "no_pcm_constraint_flag"); - WRITE_FLAG(pcSPS->getNoRefWraparoundConstraintFlag() ? 1 : 0, "no_ref_wraparound_constraint_flag"); - WRITE_FLAG(pcSPS->getNoTemporalMvpConstraintFlag() ? 1 : 0, "no_temporal_mvp_constraint_flag"); - WRITE_FLAG(pcSPS->getNoSbtmvpConstraintFlag() ? 1 : 0, "no_sbtmvp_constraint_flag"); - WRITE_FLAG(pcSPS->getNoAmvrConstraintFlag() ? 1 : 0, "no_amvr_constraint_flag"); - WRITE_FLAG(pcSPS->getNoBdofConstraintFlag() ? 1 : 0, "no_bdof_constraint_flag"); - WRITE_FLAG(pcSPS->getNoCclmConstraintFlag() ? 1 : 0, "no_cclm_constraint_flag"); - WRITE_FLAG(pcSPS->getNoMtsConstraintFlag() ? 1 : 0, "no_mts_constraint_flag"); - WRITE_FLAG(pcSPS->getNoAffineMotionConstraintFlag() ? 1 : 0, "no_affine_motion_constraint_flag"); - WRITE_FLAG(pcSPS->getNoGbiConstraintFlag() ? 1 : 0, "no_gbi_constraint_flag"); - WRITE_FLAG(pcSPS->getNoMhIntraConstraintFlag() ? 1 : 0, "no_mh_intra_constraint_flag"); - WRITE_FLAG(pcSPS->getNoTriangleConstraintFlag() ? 1 : 0, "no_triangle_constraint_flag"); - WRITE_FLAG(pcSPS->getNoLadfConstraintFlag() ? 1 : 0, "no_ladf_constraint_flag"); - WRITE_FLAG(pcSPS->getNoCurrPicRefConstraintFlag() ? 1 : 0, "no_curr_pic_ref_constraint_flag"); - WRITE_FLAG(pcSPS->getNoQpDeltaConstraintFlag() ? 1 : 0, "no_qp_delta_constraint_flag"); - WRITE_FLAG(pcSPS->getNoDepQuantConstraintFlag() ? 1 : 0, "no_dep_quant_constraint_flag"); - WRITE_FLAG(pcSPS->getNoSignDataHidingConstraintFlag() ? 1 : 0, "no_sign_data_hiding_constraint_flag"); - - CHECK( pcSPS->getMaxTLayers() == 0, "Maximum number of temporal sub-layers is '0'" ); - WRITE_CODE( pcSPS->getMaxTLayers() - 1, 3, "sps_max_sub_layers_minus1" ); - - WRITE_FLAG( pcSPS->getTemporalIdNestingFlag() ? 1 : 0, "sps_temporal_id_nesting_flag" ); - codePTL( pcSPS->getPTL(), true, pcSPS->getMaxTLayers() - 1 ); -#else + WRITE_CODE( pcSPS->getDecodingParameterSetId (), 4, "sps_decoding_parameter_set_id" ); + WRITE_CODE( pcSPS->getVPSId(), 4, "sps_video_parameter_set_id" ); CHECK(pcSPS->getMaxTLayers() == 0, "Maximum number of temporal sub-layers is '0'"); WRITE_CODE(pcSPS->getMaxTLayers() - 1, 3, "sps_max_sub_layers_minus1"); WRITE_CODE(0, 5, "sps_reserved_zero_5bits"); codeProfileTierLevel( pcSPS->getProfileTierLevel(), pcSPS->getMaxTLayers() - 1 ); + WRITE_FLAG(pcSPS->getGDREnabledFlag(), "gdr_enabled_flag"); - WRITE_UVLC(pcSPS->getSPSId (), "sps_seq_parameter_set_id"); -#endif + WRITE_CODE( pcSPS->getSPSId (), 4, "sps_seq_parameter_set_id" ); - WRITE_UVLC( int(pcSPS->getChromaFormatIdc ()), "chroma_format_idc" ); + WRITE_CODE(int(pcSPS->getChromaFormatIdc ()), 2, "chroma_format_idc"); const ChromaFormat format = pcSPS->getChromaFormatIdc(); if( format == CHROMA_444 ) @@ -645,30 +714,55 @@ void HLSWriter::codeSPS( const SPS* pcSPS ) WRITE_FLAG( 0, "separate_colour_plane_flag"); } - WRITE_UVLC( pcSPS->getPicWidthInLumaSamples (), "pic_width_in_luma_samples" ); - WRITE_UVLC( pcSPS->getPicHeightInLumaSamples(), "pic_height_in_luma_samples" ); - Window conf = pcSPS->getConformanceWindow(); + WRITE_FLAG( pcSPS->getRprEnabledFlag(), "ref_pic_resampling_enabled_flag" ); - // KJS: not removing yet - WRITE_FLAG( conf.getWindowEnabledFlag(), "conformance_window_flag" ); - if (conf.getWindowEnabledFlag()) + WRITE_UVLC( pcSPS->getMaxPicWidthInLumaSamples(), "pic_width_max_in_luma_samples" ); + WRITE_UVLC( pcSPS->getMaxPicHeightInLumaSamples(), "pic_height_max_in_luma_samples" ); + WRITE_CODE(floorLog2(pcSPS->getCTUSize()) - 5, 2, "sps_log2_ctu_size_minus5"); + + WRITE_FLAG(pcSPS->getSubPicPresentFlag(), "subpics_present_flag"); + if(pcSPS->getSubPicPresentFlag()) { - WRITE_UVLC( conf.getWindowLeftOffset() / SPS::getWinUnitX(pcSPS->getChromaFormatIdc() ), "conf_win_left_offset" ); - WRITE_UVLC( conf.getWindowRightOffset() / SPS::getWinUnitX(pcSPS->getChromaFormatIdc() ), "conf_win_right_offset" ); - WRITE_UVLC( conf.getWindowTopOffset() / SPS::getWinUnitY(pcSPS->getChromaFormatIdc() ), "conf_win_top_offset" ); - WRITE_UVLC( conf.getWindowBottomOffset() / SPS::getWinUnitY(pcSPS->getChromaFormatIdc() ), "conf_win_bottom_offset" ); + WRITE_CODE(pcSPS->getNumSubPics() - 1, 8, "sps_num_subpics_minus1"); + for (int picIdx = 0; picIdx < pcSPS->getNumSubPics(); picIdx++) + { + WRITE_CODE( pcSPS->getSubPicCtuTopLeftX(picIdx), std::max(1, ceilLog2((( pcSPS->getMaxPicWidthInLumaSamples() + pcSPS->getCTUSize() - 1) >> floorLog2( pcSPS->getCTUSize())))), "subpic_ctu_top_left_x[ i ]" ); + WRITE_CODE( pcSPS->getSubPicCtuTopLeftY(picIdx), std::max(1, ceilLog2((( pcSPS->getMaxPicHeightInLumaSamples() + pcSPS->getCTUSize() - 1) >> floorLog2( pcSPS->getCTUSize())))), "subpic_ctu_top_left_y[ i ]" ); + WRITE_CODE( pcSPS->getSubPicWidth(picIdx) - 1, std::max(1, ceilLog2((( pcSPS->getMaxPicWidthInLumaSamples() + pcSPS->getCTUSize() - 1) >> floorLog2( pcSPS->getCTUSize())))), "subpic_width_minus1[ i ]" ); + WRITE_CODE( pcSPS->getSubPicHeight(picIdx) - 1, std::max(1, ceilLog2((( pcSPS->getMaxPicHeightInLumaSamples() + pcSPS->getCTUSize() - 1) >> floorLog2( pcSPS->getCTUSize())))), "subpic_height_minus1[ i ]" ); + WRITE_FLAG( pcSPS->getSubPicTreatedAsPicFlag(picIdx), "subpic_treated_as_pic_flag[ i ]" ); + WRITE_FLAG( pcSPS->getLoopFilterAcrossSubpicEnabledFlag(picIdx), "loop_filter_across_subpic_enabled_flag[ i ]" ); + } } - WRITE_UVLC( pcSPS->getBitDepth(CHANNEL_TYPE_LUMA) - 8, "bit_depth_luma_minus8" ); + WRITE_FLAG( pcSPS->getSubPicIdPresentFlag(), "sps_subpic_id_present_flag"); + if( pcSPS->getSubPicIdPresentFlag() ) + { + WRITE_FLAG(pcSPS->getSubPicIdSignallingPresentFlag(), "sps_subpic_id_signalling_present_flag"); + if( pcSPS->getSubPicIdSignallingPresentFlag() ) + { + WRITE_UVLC( pcSPS->getSubPicIdLen( ) - 1, "sps_subpic_id_len_minus1" ); + for( int picIdx = 0; picIdx < pcSPS->getNumSubPics( ); picIdx++ ) + { + WRITE_CODE( pcSPS->getSubPicId(picIdx), pcSPS->getSubPicIdLen( ), "sps_subpic_id[i]" ); + } + } + } - const bool chromaEnabled = isChromaEnabled(format); - WRITE_UVLC( chromaEnabled ? (pcSPS->getBitDepth(CHANNEL_TYPE_CHROMA) - 8):0, "bit_depth_chroma_minus8" ); + WRITE_UVLC( pcSPS->getBitDepth(CHANNEL_TYPE_LUMA) - 8, "bit_depth_minus8" ); - WRITE_UVLC( pcSPS->getBitsForPOC()-4, "log2_max_pic_order_cnt_lsb_minus4" ); + WRITE_UVLC( pcSPS->getMinQpPrimeTsMinus4(CHANNEL_TYPE_LUMA), "min_qp_prime_ts_minus4" ); + + WRITE_FLAG( pcSPS->getUseWP() ? 1 : 0, "sps_weighted_pred_flag" ); // Use of Weighting Prediction (P_SLICE) + WRITE_FLAG( pcSPS->getUseWPBiPred() ? 1 : 0, "sps_weighted_bipred_flag" ); // Use of Weighting Bi-Prediction (B_SLICE) + WRITE_CODE(pcSPS->getBitsForPOC()-4, 4, "log2_max_pic_order_cnt_lsb_minus4"); // KJS: Marakech decision: sub-layers added back const bool subLayerOrderingInfoPresentFlag = 1; - WRITE_FLAG(subLayerOrderingInfoPresentFlag, "sps_sub_layer_ordering_info_present_flag"); + if (pcSPS->getMaxTLayers() > 1) + { + WRITE_FLAG(subLayerOrderingInfoPresentFlag, "sps_sub_layer_ordering_info_present_flag"); + } for(uint32_t i=0; i <= pcSPS->getMaxTLayers()-1; i++) { WRITE_UVLC( pcSPS->getMaxDecPicBuffering(i) - 1, "sps_max_dec_pic_buffering_minus1[i]" ); @@ -680,50 +774,107 @@ void HLSWriter::codeSPS( const SPS* pcSPS ) } } CHECK( pcSPS->getMaxCUWidth() != pcSPS->getMaxCUHeight(), "Rectangular CTUs not supported" ); - WRITE_FLAG(pcSPS->getUseDualITree(), "qtbtt_dual_tree_intra_flag"); - WRITE_UVLC(g_aucLog2[pcSPS->getCTUSize()] - MIN_CU_LOG2, "log2_ctu_size_minus2"); + WRITE_FLAG(pcSPS->getLongTermRefsPresent() ? 1 : 0, "long_term_ref_pics_flag"); + WRITE_FLAG( pcSPS->getInterLayerPresentFlag() ? 1 : 0, "inter_layer_ref_pics_present_flag" ); + WRITE_FLAG(pcSPS->getIDRRefParamListPresent() ? 1 : 0, "sps_idr_rpl_present_flag" ); + WRITE_FLAG(pcSPS->getRPL1CopyFromRPL0Flag() ? 1 : 0, "rpl1_copy_from_rpl0_flag"); + + const RPLList* rplList0 = pcSPS->getRPLList0(); + const RPLList* rplList1 = pcSPS->getRPLList1(); + + //Write candidate for List0 + uint32_t numberOfRPL = pcSPS->getNumRPL0(); + WRITE_UVLC(numberOfRPL, "num_ref_pic_lists_in_sps[0]"); + for (int ii = 0; ii < numberOfRPL; ii++) + { + const ReferencePictureList* rpl = rplList0->getReferencePictureList(ii); + xCodeRefPicList( rpl, pcSPS->getLongTermRefsPresent(), pcSPS->getBitsForPOC(), !pcSPS->getUseWP() && !pcSPS->getUseWPBiPred() ); + } + + //Write candidate for List1 + if (!pcSPS->getRPL1CopyFromRPL0Flag()) + { + numberOfRPL = pcSPS->getNumRPL1(); + WRITE_UVLC(numberOfRPL, "num_ref_pic_lists_in_sps[1]"); + for (int ii = 0; ii < numberOfRPL; ii++) + { + const ReferencePictureList* rpl = rplList1->getReferencePictureList(ii); + xCodeRefPicList( rpl, pcSPS->getLongTermRefsPresent(), pcSPS->getBitsForPOC(), !pcSPS->getUseWP() && !pcSPS->getUseWPBiPred() ); + } + } + if( pcSPS->getChromaFormatIdc() != CHROMA_400 ) + { + WRITE_FLAG(pcSPS->getUseDualITree(), "qtbtt_dual_tree_intra_flag"); + } WRITE_UVLC(pcSPS->getLog2MinCodingBlockSize() - 2, "log2_min_luma_coding_block_size_minus2"); WRITE_FLAG(pcSPS->getSplitConsOverrideEnabledFlag(), "partition_constraints_override_enabled_flag"); - WRITE_UVLC(g_aucLog2[pcSPS->getMinQTSize(I_SLICE)] - pcSPS->getLog2MinCodingBlockSize(), "sps_log2_diff_min_qt_min_cb_intra_tile_group_luma"); - WRITE_UVLC(g_aucLog2[pcSPS->getMinQTSize(B_SLICE)] - pcSPS->getLog2MinCodingBlockSize(), "sps_log2_diff_min_qt_min_cb_inter_tile_group"); - WRITE_UVLC(pcSPS->getMaxBTDepth(), "sps_max_mtt_hierarchy_depth_inter_tile_group"); - WRITE_UVLC(pcSPS->getMaxBTDepthI(), "sps_max_mtt_hierarchy_depth_intra_tile_group_luma"); - if (pcSPS->getMaxBTDepthI() != 0) + WRITE_UVLC(floorLog2(pcSPS->getMinQTSize(I_SLICE)) - pcSPS->getLog2MinCodingBlockSize(), "sps_log2_diff_min_qt_min_cb_intra_slice_luma"); + WRITE_UVLC(floorLog2(pcSPS->getMinQTSize(B_SLICE)) - pcSPS->getLog2MinCodingBlockSize(), "sps_log2_diff_min_qt_min_cb_inter_slice"); + WRITE_UVLC(pcSPS->getMaxMTTHierarchyDepth(), "sps_max_mtt_hierarchy_depth_inter_slice"); + WRITE_UVLC(pcSPS->getMaxMTTHierarchyDepthI(), "sps_max_mtt_hierarchy_depth_intra_slice_luma"); + if (pcSPS->getMaxMTTHierarchyDepthI() != 0) { - WRITE_UVLC(g_aucLog2[pcSPS->getMaxBTSizeI()] - g_aucLog2[pcSPS->getMinQTSize(I_SLICE)], "sps_log2_diff_max_bt_min_qt_intra_tile_group_luma"); - WRITE_UVLC(g_aucLog2[pcSPS->getMaxTTSizeI()] - g_aucLog2[pcSPS->getMinQTSize(I_SLICE)], "sps_log2_diff_max_tt_min_qt_intra_tile_group_luma"); + WRITE_UVLC(floorLog2(pcSPS->getMaxBTSizeI()) - floorLog2(pcSPS->getMinQTSize(I_SLICE)), "sps_log2_diff_max_bt_min_qt_intra_slice_luma"); + WRITE_UVLC(floorLog2(pcSPS->getMaxTTSizeI()) - floorLog2(pcSPS->getMinQTSize(I_SLICE)), "sps_log2_diff_max_tt_min_qt_intra_slice_luma"); } - if (pcSPS->getMaxBTDepth() != 0) + if (pcSPS->getMaxMTTHierarchyDepth() != 0) { - WRITE_UVLC(g_aucLog2[pcSPS->getMaxBTSize()] - g_aucLog2[pcSPS->getMinQTSize(B_SLICE)], "sps_log2_diff_max_bt_min_qt_inter_tile_group"); - WRITE_UVLC(g_aucLog2[pcSPS->getMaxTTSize()] - g_aucLog2[pcSPS->getMinQTSize(B_SLICE)], "sps_log2_diff_max_tt_min_qt_inter_tile_group"); + WRITE_UVLC(floorLog2(pcSPS->getMaxBTSize()) - floorLog2(pcSPS->getMinQTSize(B_SLICE)), "sps_log2_diff_max_bt_min_qt_inter_slice"); + WRITE_UVLC(floorLog2(pcSPS->getMaxTTSize()) - floorLog2(pcSPS->getMinQTSize(B_SLICE)), "sps_log2_diff_max_tt_min_qt_inter_slice"); } if (pcSPS->getUseDualITree()) { - WRITE_UVLC(g_aucLog2[pcSPS->getMinQTSize(I_SLICE, CHANNEL_TYPE_CHROMA)] - pcSPS->getLog2MinCodingBlockSize(), "sps_log2_diff_min_qt_min_cb_intra_tile_group_chroma"); - WRITE_UVLC(pcSPS->getMaxBTDepthIChroma(), "sps_max_mtt_hierarchy_depth_intra_tile_group_chroma"); - if (pcSPS->getMaxBTDepthIChroma() != 0) + WRITE_UVLC(floorLog2(pcSPS->getMinQTSize(I_SLICE, CHANNEL_TYPE_CHROMA)) - pcSPS->getLog2MinCodingBlockSize(), "sps_log2_diff_min_qt_min_cb_intra_slice_chroma"); + WRITE_UVLC(pcSPS->getMaxMTTHierarchyDepthIChroma(), "sps_max_mtt_hierarchy_depth_intra_slice_chroma"); + if (pcSPS->getMaxMTTHierarchyDepthIChroma() != 0) { - WRITE_UVLC(g_aucLog2[pcSPS->getMaxBTSizeIChroma()] - g_aucLog2[pcSPS->getMinQTSize(I_SLICE, CHANNEL_TYPE_CHROMA)], "sps_log2_diff_max_bt_min_qt_intra_tile_group_chroma"); - WRITE_UVLC(g_aucLog2[pcSPS->getMaxTTSizeIChroma()] - g_aucLog2[pcSPS->getMinQTSize(I_SLICE, CHANNEL_TYPE_CHROMA)], "sps_log2_diff_max_tt_min_qt_intra_tile_group_chroma"); + WRITE_UVLC(floorLog2(pcSPS->getMaxBTSizeIChroma()) - floorLog2(pcSPS->getMinQTSize(I_SLICE, CHANNEL_TYPE_CHROMA)), "sps_log2_diff_max_bt_min_qt_intra_slice_chroma"); + WRITE_UVLC(floorLog2(pcSPS->getMaxTTSizeIChroma()) - floorLog2(pcSPS->getMinQTSize(I_SLICE, CHANNEL_TYPE_CHROMA)), "sps_log2_diff_max_tt_min_qt_intra_slice_chroma"); + } + } + + WRITE_FLAG( (pcSPS->getLog2MaxTbSize() - 5) ? 1 : 0, "sps_max_luma_transform_size_64_flag" ); + + WRITE_FLAG(pcSPS->getJointCbCrEnabledFlag(), "sps_joint_cbcr_enabled_flag"); + if (pcSPS->getChromaFormatIdc() != CHROMA_400) + { + const ChromaQpMappingTable& chromaQpMappingTable = pcSPS->getChromaQpMappingTable(); + WRITE_FLAG(chromaQpMappingTable.getSameCQPTableForAllChromaFlag(), "same_qp_table_for_chroma"); + int numQpTables = chromaQpMappingTable.getSameCQPTableForAllChromaFlag() ? 1 : (pcSPS->getJointCbCrEnabledFlag() ? 3 : 2); + CHECK(numQpTables != chromaQpMappingTable.getNumQpTables(), " numQpTables does not match at encoder side "); + for (int i = 0; i < numQpTables; i++) + { + WRITE_SVLC(chromaQpMappingTable.getQpTableStartMinus26(i), "qp_table_starts_minus26"); + WRITE_UVLC(chromaQpMappingTable.getNumPtsInCQPTableMinus1(i), "num_points_in_qp_table_minus1"); + + for (int j = 0; j <= chromaQpMappingTable.getNumPtsInCQPTableMinus1(i); j++) + { + WRITE_UVLC(chromaQpMappingTable.getDeltaQpInValMinus1(i,j), "delta_qp_in_val_minus1"); + WRITE_UVLC(chromaQpMappingTable.getDeltaQpOutVal(i, j) ^ chromaQpMappingTable.getDeltaQpInValMinus1(i, j), + "delta_qp_diff_val"); + } } } -#if MAX_TB_SIZE_SIGNALLING - // KJS: Not in syntax - WRITE_UVLC( pcSPS->getLog2MaxTbSize() - 2, "log2_max_luma_transform_block_size_minus2" ); -#endif WRITE_FLAG( pcSPS->getSAOEnabledFlag(), "sps_sao_enabled_flag"); WRITE_FLAG( pcSPS->getALFEnabledFlag(), "sps_alf_enabled_flag" ); - WRITE_FLAG( pcSPS->getPCMEnabledFlag() ? 1 : 0, "sps_pcm_enabled_flag"); - if( pcSPS->getPCMEnabledFlag() ) + WRITE_FLAG(pcSPS->getTransformSkipEnabledFlag() ? 1 : 0, "sps_transform_skip_enabled_flag"); + if (pcSPS->getTransformSkipEnabledFlag()) + { + WRITE_FLAG(pcSPS->getBDPCMEnabled() ? 1 : 0, "sps_bdpcm_enabled_flag"); + if (pcSPS->getBDPCMEnabled() && pcSPS->getChromaFormatIdc() == CHROMA_444) + { + WRITE_FLAG(pcSPS->getBDPCMEnabled() == BDPCM_LUMACHROMA ? 1 : 0, "sps_bdpcm_enabled_chroma_flag"); + } + else + { + CHECK(pcSPS->getBDPCMEnabled() == BDPCM_LUMACHROMA, "BDPCM for chroma can be used for 444 only.") + } + } + else { - WRITE_CODE( pcSPS->getPCMBitDepth(CHANNEL_TYPE_LUMA) - 1, 4, "pcm_sample_bit_depth_luma_minus1" ); - WRITE_CODE( chromaEnabled ? (pcSPS->getPCMBitDepth(CHANNEL_TYPE_CHROMA) - 1) : 0, 4, "pcm_sample_bit_depth_chroma_minus1" ); - WRITE_UVLC( pcSPS->getPCMLog2MinSize() - 3, "log2_min_pcm_luma_coding_block_size_minus3" ); - WRITE_UVLC( pcSPS->getPCMLog2MaxSize() - pcSPS->getPCMLog2MinSize(), "log2_diff_max_min_pcm_luma_coding_block_size" ); - WRITE_FLAG( pcSPS->getPCMFilterDisableFlag()?1 : 0, "pcm_loop_filter_disable_flag"); + CHECK(pcSPS->getBDPCMEnabled()!=0, "BDPCM cannot be used when transform skip is disabled"); } WRITE_FLAG( pcSPS->getWrapAroundEnabledFlag() ? 1 : 0, "sps_ref_wraparound_enabled_flag" ); @@ -742,48 +893,70 @@ void HLSWriter::codeSPS( const SPS* pcSPS ) WRITE_FLAG( pcSPS->getAMVREnabledFlag() ? 1 : 0, "sps_amvr_enabled_flag" ); WRITE_FLAG( pcSPS->getBDOFEnabledFlag() ? 1 : 0, "sps_bdof_enabled_flag" ); - - WRITE_FLAG( pcSPS->getAffineAmvrEnabledFlag() ? 1 : 0, "sps_affine_amvr_enabled_flag" ); - - WRITE_FLAG( pcSPS->getUseDMVR() ? 1 : 0, "sps_dmvr_enable_flag" ); - - // KJS: sps_cclm_enabled_flag - WRITE_FLAG( pcSPS->getUseLMChroma() ? 1 : 0, "lm_chroma_enabled_flag" ); - if ( pcSPS->getUseLMChroma() && pcSPS->getChromaFormatIdc() == CHROMA_420 ) + if (pcSPS->getBDOFEnabledFlag()) + { + WRITE_FLAG(pcSPS->getBdofControlPresentFlag() ? 1 : 0, "sps_bdof_pic_present_flag"); + } + WRITE_FLAG( pcSPS->getUseSMVD() ? 1 : 0, "sps_smvd_enabled_flag" ); + WRITE_FLAG( pcSPS->getUseDMVR() ? 1 : 0, "sps_dmvr_enabled_flag" ); + if (pcSPS->getUseDMVR()) + { + WRITE_FLAG(pcSPS->getDmvrControlPresentFlag() ? 1 : 0, "sps_dmvr_pic_present_flag"); + } + WRITE_FLAG(pcSPS->getUseMMVD() ? 1 : 0, "sps_mmvd_enabled_flag"); + WRITE_FLAG( pcSPS->getUseISP() ? 1 : 0, "sps_isp_enabled_flag"); + WRITE_FLAG( pcSPS->getUseMRL() ? 1 : 0, "sps_mrl_enabled_flag"); + WRITE_FLAG( pcSPS->getUseMIP() ? 1 : 0, "sps_mip_enabled_flag"); + if( pcSPS->getChromaFormatIdc() != CHROMA_400) + { + WRITE_FLAG( pcSPS->getUseLMChroma() ? 1 : 0, "sps_cclm_enabled_flag"); + } + if( pcSPS->getChromaFormatIdc() == CHROMA_420 ) { - WRITE_FLAG( pcSPS->getCclmCollocatedChromaFlag() ? 1 : 0, "sps_cclm_collocated_chroma_flag" ); + WRITE_FLAG( pcSPS->getHorCollocatedChromaFlag() ? 1 : 0, "sps_chroma_horizontal_collocated_flag" ); + WRITE_FLAG( pcSPS->getVerCollocatedChromaFlag() ? 1 : 0, "sps_chroma_vertical_collocated_flag" ); } - WRITE_FLAG( pcSPS->getUseMTS() ? 1 : 0, "mts_enabled_flag" ); + WRITE_FLAG( pcSPS->getUseMTS() ? 1 : 0, "sps_mts_enabled_flag" ); if ( pcSPS->getUseMTS() ) { - WRITE_FLAG( pcSPS->getUseIntraMTS() ? 1 : 0, "mts_intra_enabled_flag" ); - WRITE_FLAG( pcSPS->getUseInterMTS() ? 1 : 0, "mts_inter_enabled_flag" ); + WRITE_FLAG( pcSPS->getUseIntraMTS() ? 1 : 0, "sps_explicit_mts_intra_enabled_flag" ); + WRITE_FLAG( pcSPS->getUseInterMTS() ? 1 : 0, "sps_explicit_mts_inter_enabled_flag" ); } - // KJS: sps_affine_enabled_flag - WRITE_FLAG( pcSPS->getUseAffine() ? 1 : 0, "affine_flag" ); + WRITE_FLAG( pcSPS->getUseSBT() ? 1 : 0, "sps_sbt_enabled_flag"); + WRITE_FLAG( pcSPS->getUseAffine() ? 1 : 0, "sps_affine_enabled_flag" ); if ( pcSPS->getUseAffine() ) { - WRITE_FLAG( pcSPS->getUseAffineType() ? 1 : 0, "affine_type_flag" ); + WRITE_FLAG( pcSPS->getUseAffineType() ? 1 : 0, "sps_affine_type_flag" ); + WRITE_FLAG( pcSPS->getAffineAmvrEnabledFlag() ? 1 : 0, "sps_affine_amvr_enabled_flag" ); + WRITE_FLAG( pcSPS->getUsePROF() ? 1 : 0, "sps_affine_prof_enabled_flag" ); + if (pcSPS->getUsePROF()) + { + WRITE_FLAG(pcSPS->getProfControlPresentFlag() ? 1 : 0, "sps_prof_pic_present_flag" ); + } + } + if (pcSPS->getChromaFormatIdc() == CHROMA_444) + { + WRITE_FLAG(pcSPS->getUseColorTrans() ? 1 : 0, "sps_act_enabled_flag"); + } + if (pcSPS->getChromaFormatIdc() == CHROMA_444) + { + WRITE_FLAG(pcSPS->getPLTMode() ? 1 : 0, "sps_palette_enabled_flag" ); } - WRITE_FLAG( pcSPS->getUseGBi() ? 1 : 0, "gbi_flag" ); - WRITE_FLAG(pcSPS->getIBCFlag() ? 1 : 0, "ibc_flag"); + WRITE_FLAG( pcSPS->getUseBcw() ? 1 : 0, "sps_bcw_enabled_flag" ); + WRITE_FLAG(pcSPS->getIBCFlag() ? 1 : 0, "sps_ibc_enabled_flag"); // KJS: sps_ciip_enabled_flag - WRITE_FLAG( pcSPS->getUseMHIntra() ? 1 : 0, "mhintra_flag" ); - - WRITE_FLAG( pcSPS->getUseTriangle() ? 1: 0, "triangle_flag" ); + WRITE_FLAG( pcSPS->getUseCiip() ? 1 : 0, "sps_ciip_enabled_flag" ); - // KJS: not in draft yet - WRITE_FLAG( pcSPS->getDisFracMmvdEnabledFlag() ? 1 : 0, "sps_fracmmvd_disabled_flag" ); - // KJS: not in draft yet - WRITE_FLAG( pcSPS->getUseSBT() ? 1 : 0, "sbt_enable_flag"); - if( pcSPS->getUseSBT() ) + if ( pcSPS->getUseMMVD() ) { - WRITE_FLAG(pcSPS->getMaxSbtSize() == 64 ? 1 : 0, "max_sbt_size_64_flag"); + WRITE_FLAG( pcSPS->getFpelMmvdEnabledFlag() ? 1 : 0, "sps_fpel_mmvd_enabled_flag" ); } - // KJS: not in draft yet - WRITE_FLAG(pcSPS->getUseReshaper() ? 1 : 0, "sps_reshaper_enable_flag"); + WRITE_FLAG( pcSPS->getUseTriangle() ? 1: 0, "sps_triangle_enabled_flag" ); + + WRITE_FLAG(pcSPS->getUseLmcs() ? 1 : 0, "sps_lmcs_enable_flag"); + WRITE_FLAG( pcSPS->getUseLFNST() ? 1 : 0, "sps_lfnst_enabled_flag" ); #if LUMA_ADAPTIVE_DEBLOCKING_FILTER_QP_OFFSET WRITE_FLAG( pcSPS->getLadfEnabled() ? 1 : 0, "sps_ladf_enabled_flag" ); @@ -800,44 +973,43 @@ void HLSWriter::codeSPS( const SPS* pcSPS ) #endif // KJS: reference picture sets to be replaced - const RPSList* rpsList = pcSPS->getRPSList(); - WRITE_UVLC(rpsList->getNumberOfReferencePictureSets(), "num_short_term_ref_pic_sets" ); - for(int i=0; i < rpsList->getNumberOfReferencePictureSets(); i++) - { - const ReferencePictureSet*rps = rpsList->getReferencePictureSet(i); - xCodeShortTermRefPicSet( rps,false, i); - } - WRITE_FLAG( pcSPS->getLongTermRefsPresent() ? 1 : 0, "long_term_ref_pics_present_flag" ); - if (pcSPS->getLongTermRefsPresent()) + + // KJS: remove scaling lists? + WRITE_FLAG( pcSPS->getScalingListFlag() ? 1 : 0, "sps_scaling_list_enabled_flag" ); + + WRITE_FLAG( pcSPS->getLoopFilterAcrossVirtualBoundariesDisabledFlag(), "sps_loop_filter_across_virtual_boundaries_disabled_present_flag" ); + if( pcSPS->getLoopFilterAcrossVirtualBoundariesDisabledFlag() ) { - WRITE_UVLC(pcSPS->getNumLongTermRefPicSPS(), "num_long_term_ref_pics_sps" ); - for (uint32_t k = 0; k < pcSPS->getNumLongTermRefPicSPS(); k++) + WRITE_CODE( pcSPS->getNumVerVirtualBoundaries(), 2, "sps_num_ver_virtual_boundaries"); + for( unsigned i = 0; i < pcSPS->getNumVerVirtualBoundaries(); i++ ) { - WRITE_CODE( pcSPS->getLtRefPicPocLsbSps(k), pcSPS->getBitsForPOC(), "lt_ref_pic_poc_lsb_sps"); - WRITE_FLAG( pcSPS->getUsedByCurrPicLtSPSFlag(k), "used_by_curr_pic_lt_sps_flag[i]"); + WRITE_CODE((pcSPS->getVirtualBoundariesPosX(i)>>3), 13, "sps_virtual_boundaries_pos_x"); + } + WRITE_CODE(pcSPS->getNumHorVirtualBoundaries(), 2, "sps_num_hor_virtual_boundaries"); + for( unsigned i = 0; i < pcSPS->getNumHorVirtualBoundaries(); i++ ) + { + WRITE_CODE((pcSPS->getVirtualBoundariesPosY(i)>>3), 13, "sps_virtual_boundaries_pos_y"); } } -#if HEVC_USE_INTRA_SMOOTHING_T32 || HEVC_USE_INTRA_SMOOTHING_T64 - WRITE_FLAG( pcSPS->getUseStrongIntraSmoothing(), "strong_intra_smoothing_enable_flag" ); - -#endif - - // KJS: remove scaling lists? -#if HEVC_USE_SCALING_LISTS - WRITE_FLAG( pcSPS->getScalingListFlag() ? 1 : 0, "scaling_list_enabled_flag" ); - if(pcSPS->getScalingListFlag()) + const TimingInfo *timingInfo = pcSPS->getTimingInfo(); + WRITE_FLAG(pcSPS->getHrdParametersPresentFlag(), "general_hrd_parameters_present_flag"); + if( pcSPS->getHrdParametersPresentFlag() ) { - WRITE_FLAG( pcSPS->getScalingListPresentFlag() ? 1 : 0, "sps_scaling_list_data_present_flag" ); - if(pcSPS->getScalingListPresentFlag()) + WRITE_CODE(timingInfo->getNumUnitsInTick(), 32, "num_units_in_tick"); + WRITE_CODE(timingInfo->getTimeScale(), 32, "time_scale"); + WRITE_FLAG(pcSPS->getSubLayerParametersPresentFlag(), "sub_layer_cpb_parameters_present_flag"); + if (pcSPS->getSubLayerParametersPresentFlag()) { - codeScalingList( pcSPS->getScalingList() ); + codeHrdParameters(pcSPS->getHrdParameters(), 0, pcSPS->getMaxTLayers() - 1); + } + else + { + codeHrdParameters(pcSPS->getHrdParameters(), pcSPS->getMaxTLayers() - 1, pcSPS->getMaxTLayers() - 1); } } -#endif - // KJS: no VUI defined yet WRITE_FLAG( pcSPS->getVuiParametersPresentFlag(), "vui_parameters_present_flag" ); if (pcSPS->getVuiParametersPresentFlag()) { @@ -907,367 +1079,859 @@ void HLSWriter::codeSPS( const SPS* pcSPS ) xWriteRbspTrailingBits(); } -#if HEVC_VPS -void HLSWriter::codeVPS( const VPS* pcVPS ) +void HLSWriter::codeDPS( const DPS* dps ) { #if ENABLE_TRACING - xTraceVPSHeader(); + xTraceDPSHeader(); #endif - WRITE_CODE( pcVPS->getVPSId(), 4, "vps_video_parameter_set_id" ); - WRITE_FLAG( 1, "vps_base_layer_internal_flag" ); - WRITE_FLAG( 1, "vps_base_layer_available_flag" ); - WRITE_CODE( 0, 6, "vps_max_layers_minus1" ); - WRITE_CODE( pcVPS->getMaxTLayers() - 1, 3, "vps_max_sub_layers_minus1" ); - WRITE_FLAG( pcVPS->getTemporalNestingFlag(), "vps_temporal_id_nesting_flag" ); - CHECK(pcVPS->getMaxTLayers()<=1&&!pcVPS->getTemporalNestingFlag(), "Invalud parameters"); - WRITE_CODE( 0xffff, 16, "vps_reserved_0xffff_16bits" ); - codePTL( pcVPS->getPTL(), true, pcVPS->getMaxTLayers() - 1 ); - const bool subLayerOrderingInfoPresentFlag = 1; - WRITE_FLAG(subLayerOrderingInfoPresentFlag, "vps_sub_layer_ordering_info_present_flag"); - for(uint32_t i=0; i <= pcVPS->getMaxTLayers()-1; i++) + WRITE_CODE( dps->getDecodingParameterSetId(), 4, "dps_decoding_parameter_set_id" ); + WRITE_CODE( dps->getMaxSubLayersMinus1(), 3, "dps_max_sub_layers_minus1" ); + WRITE_CODE( 0, 5, "dps_reserved_zero_5bits" ); + uint32_t numPTLs = (uint32_t) dps->getNumPTLs(); + CHECK (numPTLs<1, "At least one PTL must be available in DPS"); + + WRITE_CODE( numPTLs - 1, 4, "dps_num_ptls_minus1" ); + + for (int i=0; i< numPTLs; i++) { - WRITE_UVLC( pcVPS->getMaxDecPicBuffering(i) - 1, "vps_max_dec_pic_buffering_minus1[i]" ); - WRITE_UVLC( pcVPS->getNumReorderPics(i), "vps_max_num_reorder_pics[i]" ); - WRITE_UVLC( pcVPS->getMaxLatencyIncrease(i), "vps_max_latency_increase_plus1[i]" ); - if (!subLayerOrderingInfoPresentFlag) - { - break; - } + ProfileTierLevel ptl = dps->getProfileTierLevel(i); + codeProfileTierLevel( &ptl, dps->getMaxSubLayersMinus1() ); } + WRITE_FLAG( 0, "dps_extension_flag" ); + xWriteRbspTrailingBits(); +} - CHECK( pcVPS->getNumHrdParameters() > MAX_VPS_NUM_HRD_PARAMETERS, "Too many HRD parameters" ); - CHECK( pcVPS->getMaxNuhReservedZeroLayerId() >= MAX_VPS_NUH_RESERVED_ZERO_LAYER_ID_PLUS1, "Invalid parameters read" ); - WRITE_CODE( pcVPS->getMaxNuhReservedZeroLayerId(), 6, "vps_max_layer_id" ); - WRITE_UVLC( pcVPS->getMaxOpSets() - 1, "vps_num_layer_sets_minus1" ); - for( uint32_t opsIdx = 1; opsIdx <= ( pcVPS->getMaxOpSets() - 1 ); opsIdx ++ ) +void HLSWriter::codeVPS(const VPS* pcVPS) +{ +#if ENABLE_TRACING + xTraceVPSHeader(); +#endif + WRITE_CODE(pcVPS->getVPSId(), 4, "vps_video_parameter_set_id"); + WRITE_CODE(pcVPS->getMaxLayers() - 1, 6, "vps_max_layers_minus1"); + WRITE_CODE(pcVPS->getMaxSubLayers() - 1, 3, "vps_max_sublayers_minus1"); + if (pcVPS->getMaxLayers() > 1 && pcVPS->getMaxSubLayers() > 1) + { + WRITE_FLAG(pcVPS->getAllLayersSameNumSublayersFlag(), "vps_all_layers_same_num_sublayers_flag"); + } + if (pcVPS->getMaxLayers() > 1) { - // Operation point set - for( uint32_t i = 0; i <= pcVPS->getMaxNuhReservedZeroLayerId(); i ++ ) + WRITE_FLAG(pcVPS->getAllIndependentLayersFlag(), "vps_all_independent_layers_flag"); + } + for (uint32_t i = 0; i < pcVPS->getMaxLayers(); i++) + { + WRITE_CODE(pcVPS->getLayerId(i), 6, "vps_layer_id"); + if (i > 0 && !pcVPS->getAllIndependentLayersFlag()) { - // Only applicable for version 1 - // pcVPS->setLayerIdIncludedFlag( true, opsIdx, i ); - WRITE_FLAG( pcVPS->getLayerIdIncludedFlag( opsIdx, i ) ? 1 : 0, "layer_id_included_flag[opsIdx][i]" ); + WRITE_FLAG(pcVPS->getIndependentLayerFlag(i), "vps_independent_layer_flag"); + if (!pcVPS->getIndependentLayerFlag(i)) + { + for (int j = 0; j < i; j++) + { + WRITE_FLAG(pcVPS->getDirectRefLayerFlag(i, j), "vps_direct_dependency_flag"); + } + } } } - const TimingInfo *timingInfo = pcVPS->getTimingInfo(); - WRITE_FLAG(timingInfo->getTimingInfoPresentFlag(), "vps_timing_info_present_flag"); - if(timingInfo->getTimingInfoPresentFlag()) + if( pcVPS->getMaxLayers() > 1 ) { - WRITE_CODE(timingInfo->getNumUnitsInTick(), 32, "vps_num_units_in_tick"); - WRITE_CODE(timingInfo->getTimeScale(), 32, "vps_time_scale"); - WRITE_FLAG(timingInfo->getPocProportionalToTimingFlag(), "vps_poc_proportional_to_timing_flag"); - if(timingInfo->getPocProportionalToTimingFlag()) + if (pcVPS->getAllIndependentLayersFlag()) { - WRITE_UVLC(timingInfo->getNumTicksPocDiffOneMinus1(), "vps_num_ticks_poc_diff_one_minus1"); + WRITE_FLAG(pcVPS->getEachLayerIsAnOlsFlag(), "vps_each_layer_is_an_ols_flag"); } - WRITE_UVLC( pcVPS->getNumHrdParameters(), "vps_num_hrd_parameters" ); - - if( pcVPS->getNumHrdParameters() > 0 ) + if (!pcVPS->getEachLayerIsAnOlsFlag()) { - for( uint32_t i = 0; i < pcVPS->getNumHrdParameters(); i ++ ) + if (!pcVPS->getAllIndependentLayersFlag()) { + WRITE_CODE(pcVPS->getOlsModeIdc(), 2, "vps_ols_mode_idc"); + } + if (pcVPS->getOlsModeIdc() == 2) { - // Only applicable for version 1 - WRITE_UVLC( pcVPS->getHrdOpSetIdx( i ), "hrd_layer_set_idx" ); - if( i > 0 ) + WRITE_CODE(pcVPS->getNumOutputLayerSets() - 1, 8, "vps_num_output_layer_sets_minus1"); + for (uint32_t i = 1; i < pcVPS->getNumOutputLayerSets(); i++) { - WRITE_FLAG( pcVPS->getCprmsPresentFlag( i ) ? 1 : 0, "cprms_present_flag[i]" ); + for (uint32_t j = 0; j < pcVPS->getMaxLayers(); j++) + { + WRITE_FLAG(pcVPS->getOlsOutputLayerFlag(i, j), "vps_ols_output_layer_flag"); + } } - codeHrdParameters(pcVPS->getHrdParameters(i), pcVPS->getCprmsPresentFlag( i ), pcVPS->getMaxTLayers() - 1); } } } - WRITE_FLAG( 0, "vps_extension_flag" ); + WRITE_FLAG(0, "vps_extension_flag"); //future extensions here.. xWriteRbspTrailingBits(); } -#endif -void HLSWriter::codeSliceHeader ( Slice* pcSlice ) +void HLSWriter::codePictureHeader( PicHeader* picHeader ) { + const PPS* pps = NULL; + const SPS* sps = NULL; + #if ENABLE_TRACING - xTraceSliceHeader (); + xTracePictureHeader (); #endif - CodingStructure& cs = *pcSlice->getPic()->cs; - const ChromaFormat format = pcSlice->getSPS()->getChromaFormatIdc(); - const uint32_t numberValidComponents = getNumberValidComponents(format); - const bool chromaEnabled = isChromaEnabled(format); + CodingStructure& cs = *picHeader->getPic()->cs; - //calculate number of bits required for slice address - int maxSliceSegmentAddress = cs.pcv->sizeInCtus; - int bitsSliceSegmentAddress = 0; - while(maxSliceSegmentAddress>(1<<bitsSliceSegmentAddress)) + WRITE_FLAG(picHeader->getNonReferencePictureFlag(), "non_reference_picture_flag"); + WRITE_FLAG(picHeader->getGdrPicFlag(), "gdr_pic_flag"); + WRITE_FLAG(picHeader->getNoOutputOfPriorPicsFlag(), "no_output_of_prior_pics_flag"); + if( picHeader->getGdrPicFlag() ) { - bitsSliceSegmentAddress++; + WRITE_UVLC(picHeader->getRecoveryPocCnt(), "recovery_poc_cnt"); } -#if HEVC_DEPENDENT_SLICES - const int ctuTsAddress = pcSlice->getSliceSegmentCurStartCtuTsAddr(); -#else - const int ctuTsAddress = pcSlice->getSliceCurStartCtuTsAddr(); -#endif - - //write slice address -#if HEVC_TILES_WPP - const int sliceSegmentRsAddress = pcSlice->getPic()->tileMap->getCtuTsToRsAddrMap(ctuTsAddress); -#else - const int sliceSegmentRsAddress = ctuTsAddress; -#endif - - WRITE_FLAG( sliceSegmentRsAddress==0, "first_slice_segment_in_pic_flag" ); - if ( pcSlice->getRapPicFlag() ) + else { - WRITE_FLAG( pcSlice->getNoOutputPriorPicsFlag() ? 1 : 0, "no_output_of_prior_pics_flag" ); + picHeader->setRecoveryPocCnt( 0 ); } - WRITE_UVLC( pcSlice->getPPS()->getPPSId(), "slice_pic_parameter_set_id" ); -#if HEVC_DEPENDENT_SLICES - if ( pcSlice->getPPS()->getDependentSliceSegmentsEnabledFlag() && (sliceSegmentRsAddress!=0) ) + + // parameter sets + WRITE_UVLC(picHeader->getPPSId(), "ph_pic_parameter_set_id"); + pps = cs.slice->getPPS(); + CHECK(pps==0, "Invalid PPS"); + sps = cs.slice->getSPS(); + CHECK(sps==0, "Invalid SPS"); + + // sub-picture IDs + if( sps->getSubPicIdPresentFlag() ) { - WRITE_FLAG( pcSlice->getDependentSliceSegmentFlag() ? 1 : 0, "dependent_slice_segment_flag" ); + if( sps->getSubPicIdSignallingPresentFlag() ) + { + for( int picIdx = 0; picIdx < sps->getNumSubPics( ); picIdx++ ) + { + picHeader->setSubPicId( picIdx, sps->getSubPicId( picIdx ) ); + } + } + else + { + WRITE_FLAG(picHeader->getSubPicIdSignallingPresentFlag(), "ph_subpic_id_signalling_present_flag"); + if( picHeader->getSubPicIdSignallingPresentFlag() ) + { + WRITE_UVLC( picHeader->getSubPicIdLen() - 1, "ph_subpic_id_len_minus1" ); + for( int picIdx = 0; picIdx < sps->getNumSubPics( ); picIdx++ ) + { + WRITE_CODE(picHeader->getSubPicId(picIdx), picHeader->getSubPicIdLen( ), "ph_subpic_id[i]" ); + } + } + else + { + for( int picIdx = 0; picIdx < pps->getNumSubPics( ); picIdx++ ) + { + picHeader->setSubPicId( picIdx, pps->getSubPicId( picIdx ) ); + } + } + } } -#endif - if(sliceSegmentRsAddress>0) + else { - WRITE_CODE( sliceSegmentRsAddress, bitsSliceSegmentAddress, "slice_segment_address" ); + for( int picIdx = 0; picIdx < sps->getNumSubPics( ); picIdx++ ) + { + picHeader->setSubPicId( picIdx, picIdx ); + } } -#if HEVC_DEPENDENT_SLICES - if( !pcSlice->getDependentSliceSegmentFlag() ) + + // virtual boundaries + if( !sps->getLoopFilterAcrossVirtualBoundariesDisabledFlag() ) { -#endif - for( int i = 0; i < pcSlice->getPPS()->getNumExtraSliceHeaderBits(); i++ ) + WRITE_FLAG( picHeader->getLoopFilterAcrossVirtualBoundariesDisabledFlag(), "ph_loop_filter_across_virtual_boundaries_disabled_present_flag" ); + if( picHeader->getLoopFilterAcrossVirtualBoundariesDisabledFlag() ) { - WRITE_FLAG( 0, "slice_reserved_flag[]" ); + WRITE_CODE(picHeader->getNumVerVirtualBoundaries(), 2, "ph_num_ver_virtual_boundaries"); + for( unsigned i = 0; i < picHeader->getNumVerVirtualBoundaries(); i++ ) + { + WRITE_CODE(picHeader->getVirtualBoundariesPosX(i) >> 3, 13, "ph_virtual_boundaries_pos_x"); + } + WRITE_CODE(picHeader->getNumHorVirtualBoundaries(), 2, "ph_num_hor_virtual_boundaries"); + for( unsigned i = 0; i < picHeader->getNumHorVirtualBoundaries(); i++ ) + { + WRITE_CODE(picHeader->getVirtualBoundariesPosY(i)>>3, 13, "ph_virtual_boundaries_pos_y"); + } } - - WRITE_UVLC( pcSlice->getSliceType(), "slice_type" ); - - if( pcSlice->getPPS()->getOutputFlagPresentFlag() ) + else { - WRITE_FLAG( pcSlice->getPicOutputFlag() ? 1 : 0, "pic_output_flag" ); + picHeader->setLoopFilterAcrossVirtualBoundariesDisabledFlag( 0 ); + picHeader->setNumVerVirtualBoundaries( 0 ); + picHeader->setNumHorVirtualBoundaries( 0 ); } - - int pocBits = pcSlice->getSPS()->getBitsForPOC(); - int pocMask = (1 << pocBits) - 1; - WRITE_CODE(pcSlice->getPOC() & pocMask, pocBits, "slice_pic_order_cnt_lsb"); - if( !pcSlice->getIdrPicFlag() ) + } + else + { + picHeader->setLoopFilterAcrossVirtualBoundariesDisabledFlag( sps->getLoopFilterAcrossVirtualBoundariesDisabledFlag() ); + picHeader->setNumVerVirtualBoundaries( sps->getNumVerVirtualBoundaries() ); + picHeader->setNumHorVirtualBoundaries( sps->getNumHorVirtualBoundaries() ); + for( unsigned i = 0; i < 3; i++ ) { - const ReferencePictureSet* rps = pcSlice->getRPS(); + picHeader->setVirtualBoundariesPosX( sps->getVirtualBoundariesPosX(i), i ); + picHeader->setVirtualBoundariesPosY( sps->getVirtualBoundariesPosY(i), i ); + } + } + + // 4:4:4 colour plane ID + if( sps->getSeparateColourPlaneFlag() ) + { + WRITE_CODE( picHeader->getColourPlaneId(), 2, "colour_plane_id" ); + } + else + { + picHeader->setColourPlaneId( 0 ); + } + + // picture output flag + if( pps->getOutputFlagPresentFlag() ) + { + WRITE_FLAG( picHeader->getPicOutputFlag(), "pic_output_flag" ); + } + else + { + picHeader->setPicOutputFlag(true); + } - // check for bitstream restriction stating that: - // If the current picture is a BLA or CRA picture, the value of NumPocTotalCurr shall be equal to 0. - // Ideally this process should not be repeated for each slice in a picture - if( pcSlice->isIRAP() ) + // reference picture lists + WRITE_FLAG( picHeader->getPicRplPresentFlag(), "pic_rpl_present_flag" ); + if( picHeader->getPicRplPresentFlag() ) + { + // List0 and List1 + for(int listIdx = 0; listIdx < 2; listIdx++) + { + // copy L1 index from L0 index + if (listIdx == 1 && !pps->getRpl1IdxPresentFlag()) { - for( int picIdx = 0; picIdx < rps->getNumberOfPictures(); picIdx++ ) + picHeader->setRPL1idx(picHeader->getRPL0idx()); + } + // RPL in picture header or SPS + else if (sps->getNumRPL(listIdx) > 0) + { + if (!pps->getPPSRefPicListSPSIdc(listIdx)) + { + WRITE_FLAG(picHeader->getRPLIdx(listIdx) != -1 ? 1 : 0, "pic_rpl_sps_flag[i]"); + } + else if (pps->getPPSRefPicListSPSIdc( listIdx ) == 1) { - CHECK( rps->getUsed( picIdx ), "Picture should not be used" ); + picHeader->setRPLIdx( listIdx, -1); } } - - if( pcSlice->getRPSidx() < 0 ) + else { - WRITE_FLAG( 0, "short_term_ref_pic_set_sps_flag" ); - xCodeShortTermRefPicSet( rps, true, pcSlice->getSPS()->getRPSList()->getNumberOfReferencePictureSets() ); + picHeader->setRPLIdx( listIdx, -1 ); } - else + + // use list from SPS + if (picHeader->getRPLIdx(listIdx) != -1) { - WRITE_FLAG( 1, "short_term_ref_pic_set_sps_flag" ); - int numBits = 0; - while( ( 1 << numBits ) < pcSlice->getSPS()->getRPSList()->getNumberOfReferencePictureSets() ) + if (listIdx == 1 && !pps->getRpl1IdxPresentFlag()) { - numBits++; } - if( numBits > 0 ) + else if (sps->getNumRPL( listIdx ) > 1) { - WRITE_CODE( pcSlice->getRPSidx(), numBits, "short_term_ref_pic_set_idx" ); + int numBits = ceilLog2(sps->getNumRPL( listIdx )); + WRITE_CODE(picHeader->getRPLIdx(listIdx), numBits, "pic_rpl_idx[i]"); } + else + { + picHeader->setRPLIdx( listIdx, 0 ); + } + picHeader->setRPL( listIdx, sps->getRPLList( listIdx )->getReferencePictureList(picHeader->getRPLIdx(listIdx))); + } + // explicit RPL in picture header + else + { + xCodeRefPicList( picHeader->getRPL(listIdx), sps->getLongTermRefsPresent(), sps->getBitsForPOC(), !sps->getUseWP() && !sps->getUseWPBiPred() ); } - if( pcSlice->getSPS()->getLongTermRefsPresent() ) + + // POC MSB cycle signalling for LTRP + if (picHeader->getRPL(listIdx)->getNumberOfLongtermPictures()) { - int numLtrpInSH = rps->getNumberOfLongtermPictures(); - int ltrpInSPS[MAX_NUM_REF_PICS]; - int numLtrpInSPS = 0; - uint32_t ltrpIndex; - int counter = 0; - // WARNING: The following code only works only if a matching long-term RPS is - // found in the SPS for ALL long-term pictures - // The problem is that the SPS coded long-term pictures are moved to the - // beginning of the list which causes a mismatch when no reference picture - // list reordering is used - // NB: Long-term coding is currently not supported in general by the HM encoder - for( int k = rps->getNumberOfPictures() - 1; k > rps->getNumberOfPictures() - rps->getNumberOfLongtermPictures() - 1; k-- ) + for (int i = 0; i < picHeader->getRPL(listIdx)->getNumberOfLongtermPictures() + picHeader->getRPL(listIdx)->getNumberOfShorttermPictures(); i++) { - if( xFindMatchingLTRP( pcSlice, <rpIndex, rps->getPOC( k ), rps->getUsed( k ) ) ) - { - ltrpInSPS[numLtrpInSPS] = ltrpIndex; - numLtrpInSPS++; - } - else + if (picHeader->getRPL(listIdx)->isRefPicLongterm(i)) { - counter++; + if (picHeader->getRPL(listIdx)->getLtrpInSliceHeaderFlag()) + { + WRITE_CODE(picHeader->getRPL(listIdx)->getRefPicIdentifier(i), sps->getBitsForPOC(), + "pic_poc_lsb_lt[listIdx][rplsIdx][j]"); + } + WRITE_FLAG(picHeader->getLocalRPL(listIdx)->getDeltaPocMSBPresentFlag(i) ? 1 : 0, "pic_delta_poc_msb_present_flag[i][j]"); + if (picHeader->getLocalRPL(listIdx)->getDeltaPocMSBPresentFlag(i)) + { + WRITE_UVLC(picHeader->getLocalRPL(listIdx)->getDeltaPocMSBCycleLT(i), "pic_delta_poc_msb_cycle_lt[i][j]"); + } } } - numLtrpInSH -= numLtrpInSPS; - // check that either all long-term pictures are coded in SPS or in slice header (no mixing) - CHECK( numLtrpInSH != 0 && numLtrpInSPS != 0, "Long term picture not coded" ); + } + } + } + + // partitioning constraint overrides + if (sps->getSplitConsOverrideEnabledFlag()) + { + WRITE_FLAG(picHeader->getSplitConsOverrideFlag(), "partition_constraints_override_flag"); + if (picHeader->getSplitConsOverrideFlag()) + { + WRITE_UVLC(floorLog2(picHeader->getMinQTSize(I_SLICE)) - sps->getLog2MinCodingBlockSize(), "pic_log2_diff_min_qt_min_cb_intra_slice_luma"); + WRITE_UVLC(floorLog2(picHeader->getMinQTSize(P_SLICE)) - sps->getLog2MinCodingBlockSize(), "pic_log2_diff_min_qt_min_cb_inter_slice"); + WRITE_UVLC(picHeader->getMaxMTTHierarchyDepth(P_SLICE), "pic_max_mtt_hierarchy_depth_inter_slice"); + WRITE_UVLC(picHeader->getMaxMTTHierarchyDepth(I_SLICE), "pic_max_mtt_hierarchy_depth_intra_slice_luma"); + if (picHeader->getMaxMTTHierarchyDepth(I_SLICE) != 0) + { + WRITE_UVLC(floorLog2(picHeader->getMaxBTSize(I_SLICE)) - floorLog2(picHeader->getMinQTSize(I_SLICE)), "pic_log2_diff_max_bt_min_qt_intra_slice_luma"); + WRITE_UVLC(floorLog2(picHeader->getMaxTTSize(I_SLICE)) - floorLog2(picHeader->getMinQTSize(I_SLICE)), "pic_log2_diff_max_tt_min_qt_intra_slice_luma"); + } + if (picHeader->getMaxMTTHierarchyDepth(P_SLICE) != 0) + { + WRITE_UVLC(floorLog2(picHeader->getMaxBTSize(P_SLICE)) - floorLog2(picHeader->getMinQTSize(P_SLICE)), "pic_log2_diff_max_bt_min_qt_inter_slice"); + WRITE_UVLC(floorLog2(picHeader->getMaxTTSize(P_SLICE)) - floorLog2(picHeader->getMinQTSize(P_SLICE)), "pic_log2_diff_max_tt_min_qt_inter_slice"); + } + if (sps->getUseDualITree()) + { + WRITE_UVLC(floorLog2(picHeader->getMinQTSize(I_SLICE, CHANNEL_TYPE_CHROMA)) - sps->getLog2MinCodingBlockSize(), "pic_log2_diff_min_qt_min_cb_intra_slice_chroma"); + WRITE_UVLC(picHeader->getMaxMTTHierarchyDepth(I_SLICE, CHANNEL_TYPE_CHROMA), "pic_max_mtt_hierarchy_depth_intra_slice_chroma"); + if (picHeader->getMaxMTTHierarchyDepth(I_SLICE, CHANNEL_TYPE_CHROMA) != 0) + { + WRITE_UVLC(floorLog2(picHeader->getMaxBTSize(I_SLICE, CHANNEL_TYPE_CHROMA)) - floorLog2(picHeader->getMinQTSize(I_SLICE, CHANNEL_TYPE_CHROMA)), "pic_log2_diff_max_bt_min_qt_intra_slice_chroma"); + WRITE_UVLC(floorLog2(picHeader->getMaxTTSize(I_SLICE, CHANNEL_TYPE_CHROMA)) - floorLog2(picHeader->getMinQTSize(I_SLICE, CHANNEL_TYPE_CHROMA)), "pic_log2_diff_max_tt_min_qt_intra_slice_chroma"); + } + } + } + } + else + { + picHeader->setSplitConsOverrideFlag(0); + } + + // inherit constraint values from SPS + if (!sps->getSplitConsOverrideEnabledFlag() || !picHeader->getSplitConsOverrideFlag()) + { + picHeader->setMinQTSizes(sps->getMinQTSizes()); + picHeader->setMaxMTTHierarchyDepths(sps->getMaxMTTHierarchyDepths()); + picHeader->setMaxBTSizes(sps->getMaxBTSizes()); + picHeader->setMaxTTSizes(sps->getMaxTTSizes()); + } + + // delta quantization and chrom and chroma offset + if (pps->getUseDQP()) + { + WRITE_UVLC( picHeader->getCuQpDeltaSubdivIntra(), "pic_cu_qp_delta_subdiv_intra_slice" ); + WRITE_UVLC( picHeader->getCuQpDeltaSubdivInter(), "pic_cu_qp_delta_subdiv_inter_slice" ); + } + else + { + picHeader->setCuQpDeltaSubdivIntra( 0 ); + picHeader->setCuQpDeltaSubdivInter( 0 ); + } + if (pps->getCuChromaQpOffsetEnabledFlag()) + { + WRITE_UVLC( picHeader->getCuChromaQpOffsetSubdivIntra(), "pic_cu_chroma_qp_offset_subdiv_intra_slice" ); + WRITE_UVLC( picHeader->getCuChromaQpOffsetSubdivInter(), "pic_cu_chroma_qp_offset_subdiv_inter_slice" ); + } + else + { + picHeader->setCuChromaQpOffsetSubdivIntra( 0 ); + picHeader->setCuChromaQpOffsetSubdivInter( 0 ); + } + + // temporal motion vector prediction + if (sps->getSPSTemporalMVPEnabledFlag()) + { + WRITE_FLAG( picHeader->getEnableTMVPFlag(), "pic_temporal_mvp_enabled_flag" ); + } + else + { + picHeader->setEnableTMVPFlag(false); + } + + // mvd L1 zero flag + if (!pps->getPPSMvdL1ZeroIdc()) + { + WRITE_FLAG(picHeader->getMvdL1ZeroFlag(), "pic_mvd_l1_zero_flag"); + } + else + { + picHeader->setMvdL1ZeroFlag( pps->getPPSMvdL1ZeroIdc() - 1 ); + } + + // merge candidate list size + if (!pps->getPPSSixMinusMaxNumMergeCandPlus1()) + { + CHECK(picHeader->getMaxNumMergeCand() > MRG_MAX_NUM_CANDS, "More merge candidates signalled than supported"); + WRITE_UVLC(MRG_MAX_NUM_CANDS - picHeader->getMaxNumMergeCand(), "pic_six_minus_max_num_merge_cand"); + } + else + { + picHeader->setMaxNumMergeCand(MRG_MAX_NUM_CANDS - (pps->getPPSSixMinusMaxNumMergeCandPlus1() - 1)); + } + + // subblock merge candidate list size + if ( sps->getUseAffine() ) + { + CHECK( picHeader->getMaxNumAffineMergeCand() > AFFINE_MRG_MAX_NUM_CANDS, "More affine merge candidates signalled than supported" ); + WRITE_UVLC(AFFINE_MRG_MAX_NUM_CANDS - picHeader->getMaxNumAffineMergeCand(), "pic_five_minus_max_num_subblock_merge_cand"); + } + else + { + picHeader->setMaxNumAffineMergeCand( sps->getSBTMVPEnabledFlag() && picHeader->getEnableTMVPFlag() ); + } - int bitsForLtrpInSPS = 0; - while( pcSlice->getSPS()->getNumLongTermRefPicSPS() > ( 1 << bitsForLtrpInSPS ) ) + // full-pel MMVD flag + if (sps->getFpelMmvdEnabledFlag()) + { + WRITE_FLAG( picHeader->getDisFracMMVD(), "pic_fpel_mmvd_enabled_flag" ); + } + else + { + picHeader->setDisFracMMVD(false); + } + + // picture level BDOF disable flags + if (sps->getBdofControlPresentFlag()) + { + WRITE_FLAG(picHeader->getDisBdofFlag(), "pic_disable_bdof_flag"); + } + else + { + picHeader->setDisBdofFlag(0); + } + + // picture level DMVR disable flags + if (sps->getDmvrControlPresentFlag()) + { + WRITE_FLAG(picHeader->getDisDmvrFlag(), "pic_disable_dmvr_flag"); + } + else + { + picHeader->setDisDmvrFlag(0); + } + + // picture level PROF disable flags + if (sps->getProfControlPresentFlag()) + { + WRITE_FLAG(picHeader->getDisProfFlag(), "pic_disable_prof_flag"); + } + else + { + picHeader->setDisProfFlag(0); + } + + // triangle merge candidate list size + if (sps->getUseTriangle() && picHeader->getMaxNumMergeCand() >= 2) + { + if (!pps->getPPSMaxNumMergeCandMinusMaxNumTriangleCandPlus1()) + { + CHECK(picHeader->getMaxNumMergeCand() < picHeader->getMaxNumTriangleCand(), "Incorrrect max number of triangle candidates!"); + WRITE_UVLC(picHeader->getMaxNumMergeCand() - picHeader->getMaxNumTriangleCand(), "pic_max_num_merge_cand_minus_max_num_triangle_cand"); + } + else + { + picHeader->setMaxNumTriangleCand((uint32_t)(picHeader->getMaxNumMergeCand() - (pps->getPPSMaxNumMergeCandMinusMaxNumTriangleCandPlus1() - 1))); + } + } + + // ibc merge candidate list size + if (sps->getIBCFlag()) + { + CHECK( picHeader->getMaxNumIBCMergeCand() > IBC_MRG_MAX_NUM_CANDS, "More IBC merge candidates signalled than supported" ); + WRITE_UVLC(IBC_MRG_MAX_NUM_CANDS - picHeader->getMaxNumIBCMergeCand(), "pic_six_minus_max_num_ibc_merge_cand"); + } + + // joint Cb/Cr sign flag + if (sps->getJointCbCrEnabledFlag()) + { + WRITE_FLAG( picHeader->getJointCbCrSignFlag(), "pic_joint_cbcr_sign_flag" ); + } + else + { + picHeader->setJointCbCrSignFlag(false); + } + + // sao enable flags + if(sps->getSAOEnabledFlag()) + { + WRITE_FLAG(picHeader->getSaoEnabledPresentFlag(), "pic_sao_enabled_present_flag"); + if (picHeader->getSaoEnabledPresentFlag()) + { + WRITE_FLAG(picHeader->getSaoEnabledFlag(CHANNEL_TYPE_LUMA), "slice_sao_luma_flag"); + if (sps->getChromaFormatIdc() != CHROMA_400) + { + WRITE_FLAG(picHeader->getSaoEnabledFlag(CHANNEL_TYPE_CHROMA), "slice_sao_chroma_flag"); + } + } + else + { + picHeader->setSaoEnabledFlag(CHANNEL_TYPE_LUMA, true); + picHeader->setSaoEnabledFlag(CHANNEL_TYPE_CHROMA, true); + } + } + else + { + picHeader->setSaoEnabledFlag(CHANNEL_TYPE_LUMA, false); + picHeader->setSaoEnabledFlag(CHANNEL_TYPE_CHROMA, false); + } + + // alf enable flags and aps IDs + if( sps->getALFEnabledFlag() ) + { + WRITE_FLAG(picHeader->getAlfEnabledPresentFlag(), "pic_alf_enabled_present_flag"); + if (picHeader->getAlfEnabledPresentFlag()) + { + WRITE_FLAG(picHeader->getAlfEnabledFlag(COMPONENT_Y), "pic_alf_enabled_flag"); + if (picHeader->getAlfEnabledFlag(COMPONENT_Y)) + { + WRITE_CODE(picHeader->getNumAlfAps(), 3, "pic_num_alf_aps_ids_luma"); + const std::vector<int>& apsId = picHeader->getAlfAPSs(); + for (int i = 0; i < picHeader->getNumAlfAps(); i++) { - bitsForLtrpInSPS++; + WRITE_CODE(apsId[i], 3, "pic_alf_aps_id_luma"); } - if( pcSlice->getSPS()->getNumLongTermRefPicSPS() > 0 ) + + const int alfChromaIdc = picHeader->getAlfEnabledFlag(COMPONENT_Cb) + picHeader->getAlfEnabledFlag(COMPONENT_Cr) * 2 ; + if (sps->getChromaFormatIdc() != CHROMA_400) { - WRITE_UVLC( numLtrpInSPS, "num_long_term_sps" ); + WRITE_CODE(alfChromaIdc, 2, "pic_alf_chroma_idc"); } - WRITE_UVLC( numLtrpInSH, "num_long_term_pics" ); - // Note that the LSBs of the LT ref. pic. POCs must be sorted before. - // Not sorted here because LT ref indices will be used in setRefPicList() - int prevDeltaMSB = 0, prevLSB = 0; - int offset = rps->getNumberOfNegativePictures() + rps->getNumberOfPositivePictures(); - counter = 0; - // Warning: If some pictures are moved to ltrpInSPS, i is referring to a wrong index - // (mapping would be required) - for( int i = rps->getNumberOfPictures() - 1; i > offset - 1; i--, counter++ ) + if (alfChromaIdc) { - if( counter < numLtrpInSPS ) - { - if( bitsForLtrpInSPS > 0 ) - { - WRITE_CODE( ltrpInSPS[counter], bitsForLtrpInSPS, "lt_idx_sps[i]" ); - } - } - else - { - WRITE_CODE( rps->getPocLSBLT( i ), pcSlice->getSPS()->getBitsForPOC(), "poc_lsb_lt" ); - WRITE_FLAG( rps->getUsed( i ), "used_by_curr_pic_lt_flag" ); - } - WRITE_FLAG( rps->getDeltaPocMSBPresentFlag( i ), "delta_poc_msb_present_flag" ); - - if( rps->getDeltaPocMSBPresentFlag( i ) ) - { - bool deltaFlag = false; - // First LTRP from SPS || First LTRP from SH || curr LSB != prev LSB - if( ( i == rps->getNumberOfPictures() - 1 ) || ( i == rps->getNumberOfPictures() - 1 - numLtrpInSPS ) || ( rps->getPocLSBLT( i ) != prevLSB ) ) - { - deltaFlag = true; - } - if( deltaFlag ) - { - WRITE_UVLC( rps->getDeltaPocMSBCycleLT( i ), "delta_poc_msb_cycle_lt[i]" ); - } - else - { - int differenceInDeltaMSB = rps->getDeltaPocMSBCycleLT( i ) - prevDeltaMSB; - CHECK( differenceInDeltaMSB < 0, "Negative diff. delta MSB" ); - WRITE_UVLC( differenceInDeltaMSB, "delta_poc_msb_cycle_lt[i]" ); - } - prevLSB = rps->getPocLSBLT( i ); - prevDeltaMSB = rps->getDeltaPocMSBCycleLT( i ); - } + WRITE_CODE(picHeader->getAlfApsIdChroma(), 3, "pic_alf_aps_id_chroma"); } } - if( pcSlice->getSPS()->getSPSTemporalMVPEnabledFlag() ) + } + else + { + picHeader->setAlfEnabledFlag(COMPONENT_Y, true); + picHeader->setAlfEnabledFlag(COMPONENT_Cb, true); + picHeader->setAlfEnabledFlag(COMPONENT_Cr, true); + } + } + else + { + picHeader->setAlfEnabledFlag(COMPONENT_Y, false); + picHeader->setAlfEnabledFlag(COMPONENT_Cb, false); + picHeader->setAlfEnabledFlag(COMPONENT_Cr, false); + } + + // dependent quantization + if (!pps->getPPSDepQuantEnabledIdc()) + { + WRITE_FLAG(picHeader->getDepQuantEnabledFlag(), "pic_dep_quant_enabled_flag"); + } + else + { + picHeader->setDepQuantEnabledFlag( pps->getPPSDepQuantEnabledIdc() - 1 ); + } + + // sign data hiding + if( !picHeader->getDepQuantEnabledFlag() ) + { + WRITE_FLAG( picHeader->getSignDataHidingEnabledFlag(), "pic_sign_data_hiding_enabled_flag" ); + } + else + { + picHeader->setSignDataHidingEnabledFlag(false); + } + + // deblocking filter controls + if (pps->getDeblockingFilterControlPresentFlag()) + { + if(pps->getDeblockingFilterOverrideEnabledFlag()) + { + WRITE_FLAG ( picHeader->getDeblockingFilterOverridePresentFlag(), "pic_deblocking_filter_override_present_flag" ); + if( picHeader->getDeblockingFilterOverridePresentFlag() ) { - WRITE_FLAG( pcSlice->getEnableTMVPFlag() ? 1 : 0, "slice_temporal_mvp_enabled_flag" ); + WRITE_FLAG ( picHeader->getDeblockingFilterOverrideFlag(), "pic_deblocking_filter_override_flag" ); } + else + { + picHeader->setDeblockingFilterOverrideFlag(false); + } + } + else + { + picHeader->setDeblockingFilterOverridePresentFlag(false); + picHeader->setDeblockingFilterOverrideFlag(false); } - if( pcSlice->getSPS()->getSAOEnabledFlag() ) + + if(picHeader->getDeblockingFilterOverrideFlag()) { - WRITE_FLAG( pcSlice->getSaoEnabledFlag( CHANNEL_TYPE_LUMA ), "slice_sao_luma_flag" ); - if( chromaEnabled ) + WRITE_FLAG ( picHeader->getDeblockingFilterDisable(), "pic_deblocking_filter_disabled_flag" ); + if(!picHeader->getDeblockingFilterDisable()) { - WRITE_FLAG( pcSlice->getSaoEnabledFlag( CHANNEL_TYPE_CHROMA ), "slice_sao_chroma_flag" ); + WRITE_SVLC( picHeader->getDeblockingFilterBetaOffsetDiv2(), "pic_beta_offset_div2" ); + WRITE_SVLC( picHeader->getDeblockingFilterTcOffsetDiv2(), "pic_tc_offset_div2" ); } } + else + { + picHeader->setDeblockingFilterDisable ( pps->getPPSDeblockingFilterDisabledFlag() ); + picHeader->setDeblockingFilterBetaOffsetDiv2( pps->getDeblockingFilterBetaOffsetDiv2() ); + picHeader->setDeblockingFilterTcOffsetDiv2 ( pps->getDeblockingFilterTcOffsetDiv2() ); + } + } + else + { + picHeader->setDeblockingFilterDisable ( false ); + picHeader->setDeblockingFilterBetaOffsetDiv2( 0 ); + picHeader->setDeblockingFilterTcOffsetDiv2 ( 0 ); + } - if( pcSlice->getSPS()->getALFEnabledFlag() ) + // luma mapping / chroma scaling controls + if (sps->getUseLmcs()) + { + WRITE_FLAG(picHeader->getLmcsEnabledFlag(), "pic_lmcs_enabled_flag"); + if (picHeader->getLmcsEnabledFlag()) { - const int alfEnabled = pcSlice->getAPS()->getAlfAPSParam().enabledFlag[COMPONENT_Y] ? 1 : 0; - WRITE_FLAG( alfEnabled, "tile_group_alf_enabled_flag"); - if (alfEnabled) + WRITE_CODE(picHeader->getLmcsAPSId(), 2, "pic_lmcs_aps_id"); + if (sps->getChromaFormatIdc() != CHROMA_400) { - WRITE_CODE(pcSlice->getAPSId(), 5, "tile_group_aps_id"); + WRITE_FLAG(picHeader->getLmcsChromaResidualScaleFlag(), "pic_chroma_residual_scale_flag"); } + else + { + picHeader->setLmcsChromaResidualScaleFlag(false); + } + } + } + else + { + picHeader->setLmcsEnabledFlag(false); + picHeader->setLmcsChromaResidualScaleFlag(false); + } + + // quantization scaling lists + if( sps->getScalingListFlag() ) + { + WRITE_FLAG( picHeader->getScalingListPresentFlag(), "pic_scaling_list_present_flag" ); + if( picHeader->getScalingListPresentFlag() ) + { + WRITE_CODE( picHeader->getScalingListAPSId(), 3, "pic_scaling_list_aps_id" ); + } + } + else + { + picHeader->setScalingListPresentFlag( false ); + } + + // picture header extension + if(pps->getPictureHeaderExtensionPresentFlag()) + { + WRITE_UVLC(0,"pic_segment_header_extension_length"); + } + + xWriteRbspTrailingBits(); +} + +void HLSWriter::codeSliceHeader ( Slice* pcSlice ) +{ +#if ENABLE_TRACING + xTraceSliceHeader (); +#endif + + CodingStructure& cs = *pcSlice->getPic()->cs; + const PicHeader *picHeader = cs.picHeader; + const ChromaFormat format = pcSlice->getSPS()->getChromaFormatIdc(); + const uint32_t numberValidComponents = getNumberValidComponents(format); + const bool chromaEnabled = isChromaEnabled(format); + + int pocBits = pcSlice->getSPS()->getBitsForPOC(); + int pocMask = (1 << pocBits) - 1; + WRITE_CODE(pcSlice->getPOC() & pocMask, pocBits, "slice_pic_order_cnt_lsb"); + + + if (pcSlice->getSPS()->getSubPicPresentFlag()) + { + uint32_t bitsSubPicId; + if (pcSlice->getSPS()->getSubPicIdSignallingPresentFlag()) + { + bitsSubPicId = pcSlice->getSPS()->getSubPicIdLen(); } + else if (picHeader->getSubPicIdSignallingPresentFlag()) + { + bitsSubPicId = picHeader->getSubPicIdLen(); + } + else if (pcSlice->getPPS()->getSubPicIdSignallingPresentFlag()) + { + bitsSubPicId = pcSlice->getPPS()->getSubPicIdLen(); + } + else + { + bitsSubPicId = ceilLog2(pcSlice->getSPS()->getNumSubPics()); + } + WRITE_CODE(pcSlice->getSliceSubPicId(), bitsSubPicId, "slice_subpic_id"); + } + + // raster scan slices + if( pcSlice->getPPS()->getRectSliceFlag() == 0 ) + { + // slice address is the raster scan tile index of first tile in slice + if( pcSlice->getPPS()->getNumTiles() > 1 ) + { + int bitsSliceAddress = ceilLog2(pcSlice->getPPS()->getNumTiles()); + WRITE_CODE( pcSlice->getSliceID(), bitsSliceAddress, "slice_address"); + WRITE_UVLC( pcSlice->getNumTilesInSlice() - 1, "num_tiles_in_slice_minus1"); + } + } + // rectangular slices + else + { + // slice address is the index of the slice within the current sub-picture + if( pcSlice->getPPS()->getNumSlicesInPic() > 1 ) + { + int bitsSliceAddress = ceilLog2(pcSlice->getPPS()->getNumSlicesInPic()); // change to NumSlicesInSubPic when available + WRITE_CODE( pcSlice->getSliceID(), bitsSliceAddress, "slice_address"); + } + } + + + WRITE_UVLC( pcSlice->getSliceType(), "slice_type" ); + - //check if numrefidxes match the defaults. If not, override - if( !pcSlice->isIntra() ) + if( !picHeader->getPicRplPresentFlag() && (!pcSlice->getIdrPicFlag() || pcSlice->getSPS()->getIDRRefParamListPresent()) ) { - bool overrideFlag = ( pcSlice->getNumRefIdx( REF_PIC_LIST_0 ) != pcSlice->getPPS()->getNumRefIdxL0DefaultActive() || ( pcSlice->isInterB() && pcSlice->getNumRefIdx( REF_PIC_LIST_1 ) != pcSlice->getPPS()->getNumRefIdxL1DefaultActive() ) ); - WRITE_FLAG( overrideFlag ? 1 : 0, "num_ref_idx_active_override_flag" ); - if( overrideFlag ) + //Write L0 related syntax elements + if (pcSlice->getSPS()->getNumRPL0() > 0) { - WRITE_UVLC( pcSlice->getNumRefIdx( REF_PIC_LIST_0 ) - 1, "num_ref_idx_l0_active_minus1" ); - if( pcSlice->isInterB() ) + if (!pcSlice->getPPS()->getPPSRefPicListSPSIdc0()) { - WRITE_UVLC( pcSlice->getNumRefIdx( REF_PIC_LIST_1 ) - 1, "num_ref_idx_l1_active_minus1" ); + WRITE_FLAG(pcSlice->getRPL0idx() != -1 ? 1 : 0, "ref_pic_list_sps_flag[0]"); } - else + } + if (pcSlice->getRPL0idx() != -1) + { + if (pcSlice->getSPS()->getNumRPL0() > 1) { - pcSlice->setNumRefIdx( REF_PIC_LIST_1, 0 ); + int numBits = 0; + while ((1 << numBits) < pcSlice->getSPS()->getNumRPL0()) + { + numBits++; + } + WRITE_CODE(pcSlice->getRPL0idx(), numBits, "ref_pic_list_idx[0]"); } } - } - else - { - pcSlice->setNumRefIdx( REF_PIC_LIST_0, 0 ); - pcSlice->setNumRefIdx( REF_PIC_LIST_1, 0 ); - } - - if( pcSlice->getPPS()->getListsModificationPresentFlag() && pcSlice->getNumRpsCurrTempList() > 1 ) - { - RefPicListModification* refPicListModification = pcSlice->getRefPicListModification(); - if( !pcSlice->isIntra() ) + else + { //write local RPL0 + xCodeRefPicList( pcSlice->getRPL0(), pcSlice->getSPS()->getLongTermRefsPresent(), pcSlice->getSPS()->getBitsForPOC(), !pcSlice->getSPS()->getUseWP() && !pcSlice->getSPS()->getUseWPBiPred() ); + } + //Deal POC Msb cycle signalling for LTRP + if (pcSlice->getRPL0()->getNumberOfLongtermPictures()) { - WRITE_FLAG( pcSlice->getRefPicListModification()->getRefPicListModificationFlagL0() ? 1 : 0, "ref_pic_list_modification_flag_l0" ); - if( pcSlice->getRefPicListModification()->getRefPicListModificationFlagL0() ) + for (int i = 0; i < pcSlice->getRPL0()->getNumberOfLongtermPictures() + pcSlice->getRPL0()->getNumberOfShorttermPictures(); i++) { - int numRpsCurrTempList0 = pcSlice->getNumRpsCurrTempList(); - if( numRpsCurrTempList0 > 1 ) + if (pcSlice->getRPL0()->isRefPicLongterm(i)) { - int length = 1; - numRpsCurrTempList0--; - while( numRpsCurrTempList0 >>= 1 ) + if (pcSlice->getRPL0()->getLtrpInSliceHeaderFlag()) + { + WRITE_CODE(pcSlice->getRPL0()->getRefPicIdentifier(i), pcSlice->getSPS()->getBitsForPOC(), + "slice_poc_lsb_lt[listIdx][rplsIdx][j]"); + } + WRITE_FLAG(pcSlice->getLocalRPL0()->getDeltaPocMSBPresentFlag(i) ? 1 : 0, "delta_poc_msb_present_flag[i][j]"); + if (pcSlice->getLocalRPL0()->getDeltaPocMSBPresentFlag(i)) { - length++; + WRITE_UVLC(pcSlice->getLocalRPL0()->getDeltaPocMSBCycleLT(i), "delta_poc_msb_cycle_lt[i][j]"); } - for( int i = 0; i < pcSlice->getNumRefIdx( REF_PIC_LIST_0 ); i++ ) + } + } + } + + //Write L1 related syntax elements + if (!pcSlice->getPPS()->getRpl1IdxPresentFlag()) + { + CHECK(pcSlice->getRPL1idx() != pcSlice->getRPL0idx(), "RPL1Idx is not signalled but it is not the same as RPL0Idx"); + if (pcSlice->getRPL1idx() == -1) + { //write local RPL1 + xCodeRefPicList( pcSlice->getRPL1(), pcSlice->getSPS()->getLongTermRefsPresent(), pcSlice->getSPS()->getBitsForPOC(), !pcSlice->getSPS()->getUseWP() && !pcSlice->getSPS()->getUseWPBiPred() ); + } + } + else + { + if (pcSlice->getSPS()->getNumRPL1() > 0) + { + if (!pcSlice->getPPS()->getPPSRefPicListSPSIdc1()) + { + WRITE_FLAG(pcSlice->getRPL1idx() != -1 ? 1 : 0, "ref_pic_list_sps_flag[1]"); + } + } + if (pcSlice->getRPL1idx() != -1) + { + if (pcSlice->getSPS()->getNumRPL1() > 1) + { + int numBits = 0; + while ((1 << numBits) < pcSlice->getSPS()->getNumRPL1()) { - WRITE_CODE( refPicListModification->getRefPicSetIdxL0( i ), length, "list_entry_l0" ); + numBits++; } + WRITE_CODE(pcSlice->getRPL1idx(), numBits, "ref_pic_list_idx[1]"); } } + else + { //write local RPL1 + xCodeRefPicList( pcSlice->getRPL1(), pcSlice->getSPS()->getLongTermRefsPresent(), pcSlice->getSPS()->getBitsForPOC(), !pcSlice->getSPS()->getUseWP() && !pcSlice->getSPS()->getUseWPBiPred() ); + } } - if( pcSlice->isInterB() ) + //Deal POC Msb cycle signalling for LTRP + if (pcSlice->getRPL1()->getNumberOfLongtermPictures()) { - WRITE_FLAG( pcSlice->getRefPicListModification()->getRefPicListModificationFlagL1() ? 1 : 0, "ref_pic_list_modification_flag_l1" ); - if( pcSlice->getRefPicListModification()->getRefPicListModificationFlagL1() ) + for (int i = 0; i < pcSlice->getRPL1()->getNumberOfLongtermPictures() + pcSlice->getRPL1()->getNumberOfShorttermPictures(); i++) { - int numRpsCurrTempList1 = pcSlice->getNumRpsCurrTempList(); - if( numRpsCurrTempList1 > 1 ) + if (pcSlice->getRPL1()->isRefPicLongterm(i)) { - int length = 1; - numRpsCurrTempList1--; - while( numRpsCurrTempList1 >>= 1 ) + if (pcSlice->getRPL1()->getLtrpInSliceHeaderFlag()) { - length++; + WRITE_CODE(pcSlice->getRPL1()->getRefPicIdentifier(i), pcSlice->getSPS()->getBitsForPOC(), + "slice_poc_lsb_lt[listIdx][rplsIdx][j]"); } - for( int i = 0; i < pcSlice->getNumRefIdx( REF_PIC_LIST_1 ); i++ ) + WRITE_FLAG(pcSlice->getLocalRPL1()->getDeltaPocMSBPresentFlag(i) ? 1 : 0, "delta_poc_msb_present_flag[i][j]"); + if (pcSlice->getLocalRPL1()->getDeltaPocMSBPresentFlag(i)) { - WRITE_CODE( refPicListModification->getRefPicSetIdxL1( i ), length, "list_entry_l1" ); + WRITE_UVLC(pcSlice->getLocalRPL1()->getDeltaPocMSBCycleLT(i), "delta_poc_msb_cycle_lt[i][j]"); } } } } } - if( pcSlice->isInterB() ) + if( picHeader->getPicRplPresentFlag() || !pcSlice->getIdrPicFlag() || pcSlice->getSPS()->getIDRRefParamListPresent() ) { - WRITE_FLAG( pcSlice->getMvdL1ZeroFlag() ? 1 : 0, "mvd_l1_zero_flag" ); + //check if numrefidxes match the defaults. If not, override + + if ((!pcSlice->isIntra() && pcSlice->getRPL0()->getNumRefEntries() > 1) || + (pcSlice->isInterB() && pcSlice->getRPL1()->getNumRefEntries() > 1) ) + { + int defaultL0 = std::min<int>(pcSlice->getRPL0()->getNumRefEntries(), pcSlice->getPPS()->getNumRefIdxL0DefaultActive()); + int defaultL1 = pcSlice->isInterB() ? std::min<int>(pcSlice->getRPL1()->getNumRefEntries(), pcSlice->getPPS()->getNumRefIdxL1DefaultActive()) : 0; + bool overrideFlag = ( pcSlice->getNumRefIdx( REF_PIC_LIST_0 ) != defaultL0 || ( pcSlice->isInterB() && pcSlice->getNumRefIdx( REF_PIC_LIST_1 ) != defaultL1 ) ); + WRITE_FLAG( overrideFlag ? 1 : 0, "num_ref_idx_active_override_flag" ); + if( overrideFlag ) + { + if(pcSlice->getRPL0()->getNumRefEntries() > 1) + { + WRITE_UVLC( pcSlice->getNumRefIdx( REF_PIC_LIST_0 ) - 1, "num_ref_idx_l0_active_minus1" ); + } + else + { + pcSlice->setNumRefIdx( REF_PIC_LIST_0, 1); + } + + if( pcSlice->isInterB() && pcSlice->getRPL1()->getNumRefEntries() > 1) + { + WRITE_UVLC( pcSlice->getNumRefIdx( REF_PIC_LIST_1 ) - 1, "num_ref_idx_l1_active_minus1" ); + } + else + { + pcSlice->setNumRefIdx( REF_PIC_LIST_1, pcSlice->isInterB() ? 1 : 0); + } + } + else + { + pcSlice->setNumRefIdx( REF_PIC_LIST_0, defaultL0 ); + pcSlice->setNumRefIdx( REF_PIC_LIST_1, defaultL1 ); + } + } + else + { + pcSlice->setNumRefIdx( REF_PIC_LIST_0, pcSlice->isIntra() ? 0 : 1 ); + pcSlice->setNumRefIdx( REF_PIC_LIST_1, pcSlice->isInterB() ? 1 : 0 ); + } } + if( !pcSlice->isIntra() ) { if( !pcSlice->isIntra() && pcSlice->getPPS()->getCabacInitPresentFlag() ) @@ -1280,11 +1944,14 @@ void HLSWriter::codeSliceHeader ( Slice* pcSlice ) } } - if( pcSlice->getEnableTMVPFlag() ) + if( pcSlice->getPicHeader()->getEnableTMVPFlag() ) { if( pcSlice->getSliceType() == B_SLICE ) { - WRITE_FLAG( pcSlice->getColFromL0Flag(), "collocated_from_l0_flag" ); + if (!pcSlice->getPPS()->getPPSCollocatedFromL0Idc()) + { + WRITE_FLAG( pcSlice->getColFromL0Flag(), "collocated_from_l0_flag" ); + } } if( pcSlice->getSliceType() != I_SLICE && @@ -1294,81 +1961,13 @@ void HLSWriter::codeSliceHeader ( Slice* pcSlice ) WRITE_UVLC( pcSlice->getColRefIdx(), "collocated_ref_idx" ); } } + if( ( pcSlice->getPPS()->getUseWP() && pcSlice->getSliceType() == P_SLICE ) || ( pcSlice->getPPS()->getWPBiPred() && pcSlice->getSliceType() == B_SLICE ) ) { xCodePredWeightTable( pcSlice ); } - WRITE_FLAG( pcSlice->getDepQuantEnabledFlag() ? 1 : 0, "dep_quant_enabled_flag" ); -#if HEVC_USE_SIGN_HIDING - if( !pcSlice->getDepQuantEnabledFlag() ) - { - WRITE_FLAG( pcSlice->getSignDataHidingEnabledFlag() ? 1 : 0, "sign_data_hiding_enabled_flag" ); - } - else - { - CHECK( pcSlice->getSignDataHidingEnabledFlag(), "sign data hiding not supported when dependent quantization is enabled" ); - } -#endif - if ( - pcSlice->getSPS()->getSplitConsOverrideEnabledFlag() - ) - { - WRITE_FLAG(pcSlice->getSplitConsOverrideFlag() ? 1 : 0, "partition_constrainst_override_flag"); - if (pcSlice->getSplitConsOverrideFlag()) - { - WRITE_UVLC(g_aucLog2[pcSlice->getMinQTSize()] - pcSlice->getSPS()->getLog2MinCodingBlockSize(), "log2_diff_min_qt_min_cb"); - WRITE_UVLC(pcSlice->getMaxBTDepth(), "max_bt_depth"); - if (pcSlice->getMaxBTDepth() != 0) - { - CHECK(pcSlice->getMaxBTSize() < pcSlice->getMinQTSize(), "maxBtSize is smaller than minQtSize"); - WRITE_UVLC(g_aucLog2[pcSlice->getMaxBTSize()] - g_aucLog2[pcSlice->getMinQTSize()], "log2_diff_max_bt_min_qt"); - CHECK(pcSlice->getMaxTTSize() < pcSlice->getMinQTSize(), "maxTtSize is smaller than minQtSize"); - WRITE_UVLC(g_aucLog2[pcSlice->getMaxTTSize()] - g_aucLog2[pcSlice->getMinQTSize()], "log2_diff_max_tt_min_qt"); - } - if ( - pcSlice->isIntra() && pcSlice->getSPS()->getUseDualITree() - ) - { - WRITE_UVLC(g_aucLog2[pcSlice->getMinQTSizeIChroma()] - pcSlice->getSPS()->getLog2MinCodingBlockSize(), "log2_diff_min_qt_min_cb_chroma"); - WRITE_UVLC(pcSlice->getMaxBTDepthIChroma(), "max_mtt_hierarchy_depth_chroma"); - if (pcSlice->getMaxBTDepthIChroma() != 0) - { - CHECK(pcSlice->getMaxBTSizeIChroma() < pcSlice->getMinQTSizeIChroma(), "maxBtSizeC is smaller than minQtSizeC"); - WRITE_UVLC(g_aucLog2[pcSlice->getMaxBTSizeIChroma()] - g_aucLog2[pcSlice->getMinQTSizeIChroma()], "log2_diff_max_bt_min_qt_chroma"); - CHECK(pcSlice->getMaxTTSizeIChroma() < pcSlice->getMinQTSizeIChroma(), "maxTtSizeC is smaller than minQtSizeC"); - WRITE_UVLC(g_aucLog2[pcSlice->getMaxTTSizeIChroma()] - g_aucLog2[pcSlice->getMinQTSizeIChroma()], "log2_diff_max_tt_min_qt_chroma"); - } - } - } - } - if (!cs.slice->isIntra() || cs.slice->getSPS()->getIBCFlag()) - { - CHECK(pcSlice->getMaxNumMergeCand() > MRG_MAX_NUM_CANDS, "More merge candidates signalled than supported"); - WRITE_UVLC(MRG_MAX_NUM_CANDS - pcSlice->getMaxNumMergeCand(), "six_minus_max_num_merge_cand"); - } - if( !pcSlice->isIntra() ) - { - if ( pcSlice->getSPS()->getSBTMVPEnabledFlag() && !pcSlice->getSPS()->getUseAffine() ) // ATMVP only - { - CHECK( pcSlice->getMaxNumAffineMergeCand() != 1, "Sub-block merge can number should be 1" ); - } - else - if ( !pcSlice->getSPS()->getSBTMVPEnabledFlag() && !pcSlice->getSPS()->getUseAffine() ) // both off - { - CHECK( pcSlice->getMaxNumAffineMergeCand() != 0, "Sub-block merge can number should be 0" ); - } - else - if ( pcSlice->getSPS()->getUseAffine() ) - { - CHECK( pcSlice->getMaxNumAffineMergeCand() > AFFINE_MRG_MAX_NUM_CANDS, "More affine merge candidates signalled than supported" ); - WRITE_UVLC( AFFINE_MRG_MAX_NUM_CANDS - pcSlice->getMaxNumAffineMergeCand(), "five_minus_max_num_affine_merge_cand" ); - } - if ( pcSlice->getSPS()->getDisFracMmvdEnabledFlag() ) - { - WRITE_FLAG( pcSlice->getDisFracMMVD(), "tile_group_fracmmvd_disabled_flag" ); - } - } + int iCode = pcSlice->getSliceQp() - ( pcSlice->getPPS()->getPicInitQPMinus26() + 26 ); WRITE_SVLC( iCode, "slice_qp_delta" ); if (pcSlice->getPPS()->getSliceChromaQpFlag()) @@ -1380,21 +1979,64 @@ void HLSWriter::codeSliceHeader ( Slice* pcSlice ) if (numberValidComponents > COMPONENT_Cr) { WRITE_SVLC( pcSlice->getSliceChromaQpDelta(COMPONENT_Cr), "slice_cr_qp_offset" ); + if (pcSlice->getSPS()->getJointCbCrEnabledFlag()) + { + WRITE_SVLC( pcSlice->getSliceChromaQpDelta(JOINT_CbCr), "slice_joint_cbcr_qp_offset"); + } } CHECK(numberValidComponents < COMPONENT_Cr+1, "Too many valid components"); } - if (pcSlice->getPPS()->getPpsRangeExtension().getChromaQpOffsetListEnabledFlag()) + if (pcSlice->getPPS()->getCuChromaQpOffsetEnabledFlag()) { WRITE_FLAG(pcSlice->getUseChromaQpAdj(), "cu_chroma_qp_offset_enabled_flag"); } + if( pcSlice->getSPS()->getSAOEnabledFlag() && !picHeader->getSaoEnabledPresentFlag() ) + { + WRITE_FLAG( pcSlice->getSaoEnabledFlag( CHANNEL_TYPE_LUMA ), "slice_sao_luma_flag" ); + if( chromaEnabled ) + { + WRITE_FLAG( pcSlice->getSaoEnabledFlag( CHANNEL_TYPE_CHROMA ), "slice_sao_chroma_flag" ); + } + } + + if( pcSlice->getSPS()->getALFEnabledFlag() && !picHeader->getAlfEnabledPresentFlag() ) + { + const int alfEnabled = pcSlice->getTileGroupAlfEnabledFlag(COMPONENT_Y); + WRITE_FLAG(alfEnabled, "slice_alf_enabled_flag"); + + if (alfEnabled) + { + WRITE_CODE(pcSlice->getTileGroupNumAps(), 3, "slice_num_alf_aps_ids_luma"); + const std::vector<int>& apsId = pcSlice->getTileGroupApsIdLuma(); + for (int i = 0; i < pcSlice->getTileGroupNumAps(); i++) + { + WRITE_CODE(apsId[i], 3, "slice_alf_aps_id_luma"); + } + + const int alfChromaIdc = pcSlice->getTileGroupAlfEnabledFlag(COMPONENT_Cb) + pcSlice->getTileGroupAlfEnabledFlag(COMPONENT_Cr) * 2 ; + if (chromaEnabled) + { + WRITE_CODE(alfChromaIdc, 2, "slice_alf_chroma_idc"); + } + if (alfChromaIdc) + { + WRITE_CODE(pcSlice->getTileGroupApsIdChroma(), 3, "slice_alf_aps_id_chroma"); + } + } + } + if (pcSlice->getPPS()->getDeblockingFilterControlPresentFlag()) { - if (pcSlice->getPPS()->getDeblockingFilterOverrideEnabledFlag() ) + if (pcSlice->getPPS()->getDeblockingFilterOverrideEnabledFlag() && !picHeader->getDeblockingFilterOverridePresentFlag()) { WRITE_FLAG(pcSlice->getDeblockingFilterOverrideFlag(), "deblocking_filter_override_flag"); } + else + { + pcSlice->setDeblockingFilterOverrideFlag(0); + } if (pcSlice->getDeblockingFilterOverrideFlag()) { WRITE_FLAG(pcSlice->getDeblockingFilterDisable(), "slice_deblocking_filter_disabled_flag"); @@ -1404,23 +2046,19 @@ void HLSWriter::codeSliceHeader ( Slice* pcSlice ) WRITE_SVLC (pcSlice->getDeblockingFilterTcOffsetDiv2(), "slice_tc_offset_div2"); } } + else + { + pcSlice->setDeblockingFilterDisable ( picHeader->getDeblockingFilterDisable() ); + pcSlice->setDeblockingFilterBetaOffsetDiv2( picHeader->getDeblockingFilterBetaOffsetDiv2() ); + pcSlice->setDeblockingFilterTcOffsetDiv2 ( picHeader->getDeblockingFilterTcOffsetDiv2() ); + } } - - bool isSAOEnabled = pcSlice->getSPS()->getSAOEnabledFlag() && (pcSlice->getSaoEnabledFlag(CHANNEL_TYPE_LUMA) || (chromaEnabled && pcSlice->getSaoEnabledFlag(CHANNEL_TYPE_CHROMA))); - bool isDBFEnabled = (!pcSlice->getDeblockingFilterDisable()); - - if(pcSlice->getPPS()->getLoopFilterAcrossSlicesEnabledFlag() && ( isSAOEnabled || isDBFEnabled )) - { - WRITE_FLAG(pcSlice->getLFCrossSliceBoundaryFlag()?1:0, "slice_loop_filter_across_slices_enabled_flag"); - } - - if (pcSlice->getSPS()->getUseReshaper()) + else { - codeReshaper(pcSlice->getReshapeInfo(), pcSlice->getSPS(), pcSlice->isIntra()); + pcSlice->setDeblockingFilterDisable ( false ); + pcSlice->setDeblockingFilterBetaOffsetDiv2( 0 ); + pcSlice->setDeblockingFilterTcOffsetDiv2 ( 0 ); } -#if HEVC_DEPENDENT_SLICES - } -#endif if(pcSlice->getPPS()->getSliceHeaderExtensionPresentFlag()) { @@ -1429,7 +2067,6 @@ void HLSWriter::codeSliceHeader ( Slice* pcSlice ) } -#if JVET_M0101_HLS void HLSWriter::codeConstraintInfo ( const ConstraintInfo* cinfo ) { WRITE_FLAG(cinfo->getProgressiveSourceFlag(), "general_progressive_source_flag" ); @@ -1442,25 +2079,39 @@ void HLSWriter::codeConstraintInfo ( const ConstraintInfo* cinfo ) WRITE_CODE(cinfo->getMaxChromaFormatConstraintIdc(), 2, "max_chroma_format_constraint_idc" ); WRITE_FLAG(cinfo->getNoQtbttDualTreeIntraConstraintFlag() ? 1 : 0, "no_qtbtt_dual_tree_intra_constraint_flag"); + WRITE_FLAG(cinfo->getNoPartitionConstraintsOverrideConstraintFlag() ? 1 : 0, "no_partition_constraints_override_constraint_flag"); WRITE_FLAG(cinfo->getNoSaoConstraintFlag() ? 1 : 0, "no_sao_constraint_flag"); WRITE_FLAG(cinfo->getNoAlfConstraintFlag() ? 1 : 0, "no_alf_constraint_flag"); - WRITE_FLAG(cinfo->getNoPcmConstraintFlag() ? 1 : 0, "no_pcm_constraint_flag"); + WRITE_FLAG(cinfo->getNoJointCbCrConstraintFlag() ? 1 : 0, "no_joint_cbcr_constraint_flag"); WRITE_FLAG(cinfo->getNoRefWraparoundConstraintFlag() ? 1 : 0, "no_ref_wraparound_constraint_flag"); WRITE_FLAG(cinfo->getNoTemporalMvpConstraintFlag() ? 1 : 0, "no_temporal_mvp_constraint_flag"); WRITE_FLAG(cinfo->getNoSbtmvpConstraintFlag() ? 1 : 0, "no_sbtmvp_constraint_flag"); WRITE_FLAG(cinfo->getNoAmvrConstraintFlag() ? 1 : 0, "no_amvr_constraint_flag"); WRITE_FLAG(cinfo->getNoBdofConstraintFlag() ? 1 : 0, "no_bdof_constraint_flag"); + WRITE_FLAG(cinfo->getNoDmvrConstraintFlag() ? 1 : 0, "no_dmvr_constraint_flag"); WRITE_FLAG(cinfo->getNoCclmConstraintFlag() ? 1 : 0, "no_cclm_constraint_flag"); WRITE_FLAG(cinfo->getNoMtsConstraintFlag() ? 1 : 0, "no_mts_constraint_flag"); + WRITE_FLAG(cinfo->getNoSbtConstraintFlag() ? 1 : 0, "no_sbt_constraint_flag"); WRITE_FLAG(cinfo->getNoAffineMotionConstraintFlag() ? 1 : 0, "no_affine_motion_constraint_flag"); - WRITE_FLAG(cinfo->getNoGbiConstraintFlag() ? 1 : 0, "no_gbi_constraint_flag"); - WRITE_FLAG(cinfo->getNoMhIntraConstraintFlag() ? 1 : 0, "no_mh_intra_constraint_flag"); + WRITE_FLAG(cinfo->getNoBcwConstraintFlag() ? 1 : 0, "no_bcw_constraint_flag"); + WRITE_FLAG(cinfo->getNoIbcConstraintFlag() ? 1 : 0, "no_ibc_constraint_flag"); + WRITE_FLAG(cinfo->getNoCiipConstraintFlag() ? 1 : 0, "no_ciip_constraint_flag"); + WRITE_FLAG(cinfo->getNoFPelMmvdConstraintFlag() ? 1 : 0, "no_fpel_mmvd_constraint_flag"); WRITE_FLAG(cinfo->getNoTriangleConstraintFlag() ? 1 : 0, "no_triangle_constraint_flag"); WRITE_FLAG(cinfo->getNoLadfConstraintFlag() ? 1 : 0, "no_ladf_constraint_flag"); - WRITE_FLAG(cinfo->getNoCurrPicRefConstraintFlag() ? 1 : 0, "no_curr_pic_ref_constraint_flag"); + WRITE_FLAG(cinfo->getNoTransformSkipConstraintFlag() ? 1 : 0, "no_transform_skip_constraint_flag"); + WRITE_FLAG(cinfo->getNoBDPCMConstraintFlag() ? 1 : 0, "no_bdpcm_constraint_flag"); WRITE_FLAG(cinfo->getNoQpDeltaConstraintFlag() ? 1 : 0, "no_qp_delta_constraint_flag"); WRITE_FLAG(cinfo->getNoDepQuantConstraintFlag() ? 1 : 0, "no_dep_quant_constraint_flag"); WRITE_FLAG(cinfo->getNoSignDataHidingConstraintFlag() ? 1 : 0, "no_sign_data_hiding_constraint_flag"); + WRITE_FLAG(cinfo->getNoTrailConstraintFlag() ? 1 : 0, "no_trail_constraint_flag"); + WRITE_FLAG(cinfo->getNoStsaConstraintFlag() ? 1 : 0, "no_stsa_constraint_flag"); + WRITE_FLAG(cinfo->getNoRaslConstraintFlag() ? 1 : 0, "no_rasl_constraint_flag"); + WRITE_FLAG(cinfo->getNoRadlConstraintFlag() ? 1 : 0, "no_radl_constraint_flag"); + WRITE_FLAG(cinfo->getNoIdrConstraintFlag() ? 1 : 0, "no_idr_constraint_flag"); + WRITE_FLAG(cinfo->getNoCraConstraintFlag() ? 1 : 0, "no_cra_constraint_flag"); + WRITE_FLAG(cinfo->getNoGdrConstraintFlag() ? 1 : 0, "no_gdr_constraint_flag"); + WRITE_FLAG(cinfo->getNoApsConstraintFlag() ? 1 : 0, "no_aps_constraint_flag"); } @@ -1469,9 +2120,16 @@ void HLSWriter::codeProfileTierLevel ( const ProfileTierLevel* ptl, int maxN WRITE_CODE( int(ptl->getProfileIdc()), 7 , "general_profile_idc" ); WRITE_FLAG( ptl->getTierFlag()==Level::HIGH, "general_tier_flag" ); - codeConstraintInfo(ptl->getConstraintInfo()); + codeConstraintInfo( ptl->getConstraintInfo() ); + + WRITE_CODE( int( ptl->getLevelIdc() ), 8, "general_level_idc" ); + + WRITE_CODE(ptl->getNumSubProfile(), 8, "num_sub_profiles"); + for (int i = 0; i < ptl->getNumSubProfile(); i++) + { + WRITE_CODE(ptl->getSubProfileIdc(i) , 32, "general_sub_profile_idc[i]"); + } - WRITE_CODE( int(ptl->getLevelIdc()), 8 , "general_level_idc" ); for (int i = 0; i < maxNumSubLayersMinus1; i++) { @@ -1493,92 +2151,7 @@ void HLSWriter::codeProfileTierLevel ( const ProfileTierLevel* ptl, int maxN } -#else -void HLSWriter::codePTL( const PTL* pcPTL, bool profilePresentFlag, int maxNumSubLayersMinus1) -{ - if(profilePresentFlag) - { - codeProfileTier(pcPTL->getGeneralPTL(), false); // general_... - } - WRITE_CODE( int(pcPTL->getGeneralPTL()->getLevelIdc()), 8, "general_level_idc" ); - - for (int i = 0; i < maxNumSubLayersMinus1; i++) - { - WRITE_FLAG( pcPTL->getSubLayerProfilePresentFlag(i), "sub_layer_profile_present_flag[i]" ); - WRITE_FLAG( pcPTL->getSubLayerLevelPresentFlag(i), "sub_layer_level_present_flag[i]" ); - } - - if (maxNumSubLayersMinus1 > 0) - { - for (int i = maxNumSubLayersMinus1; i < 8; i++) - { - WRITE_CODE(0, 2, "reserved_zero_2bits"); - } - } - - for(int i = 0; i < maxNumSubLayersMinus1; i++) - { - if( pcPTL->getSubLayerProfilePresentFlag(i) ) - { - codeProfileTier(pcPTL->getSubLayerPTL(i), true); // sub_layer_... - } - if( pcPTL->getSubLayerLevelPresentFlag(i) ) - { - WRITE_CODE( int(pcPTL->getSubLayerPTL(i)->getLevelIdc()), 8, "sub_layer_level_idc[i]" ); - } - } -} - -#if ENABLE_TRACING || RExt__DECODER_DEBUG_BIT_STATISTICS -void HLSWriter::codeProfileTier( const ProfileTierLevel* ptl, const bool bIsSubLayer ) -#define PTL_TRACE_TEXT(txt) bIsSubLayer?("sub_layer_" txt) : ("general_" txt) -#else -void HLSWriter::codeProfileTier( const ProfileTierLevel* ptl, const bool /*bIsSubLayer*/ ) -#define PTL_TRACE_TEXT(txt) txt -#endif -{ - WRITE_CODE( ptl->getProfileSpace(), 2 , PTL_TRACE_TEXT("profile_space" )); - WRITE_FLAG( ptl->getTierFlag()==Level::HIGH, PTL_TRACE_TEXT("tier_flag" )); - WRITE_CODE( int(ptl->getProfileIdc()), 5 , PTL_TRACE_TEXT("profile_idc" )); - for(int j = 0; j < 32; j++) - { - WRITE_FLAG( ptl->getProfileCompatibilityFlag(j), PTL_TRACE_TEXT("profile_compatibility_flag[][j]" )); - } - - WRITE_FLAG(ptl->getProgressiveSourceFlag(), PTL_TRACE_TEXT("progressive_source_flag" )); - WRITE_FLAG(ptl->getInterlacedSourceFlag(), PTL_TRACE_TEXT("interlaced_source_flag" )); - WRITE_FLAG(ptl->getNonPackedConstraintFlag(), PTL_TRACE_TEXT("non_packed_constraint_flag" )); - WRITE_FLAG(ptl->getFrameOnlyConstraintFlag(), PTL_TRACE_TEXT("frame_only_constraint_flag" )); - - if (ptl->getProfileIdc() == Profile::MAINREXT || ptl->getProfileIdc() == Profile::HIGHTHROUGHPUTREXT ) - { - const uint32_t bitDepthConstraint=ptl->getBitDepthConstraint(); - WRITE_FLAG(bitDepthConstraint<=12, PTL_TRACE_TEXT("max_12bit_constraint_flag" )); - WRITE_FLAG(bitDepthConstraint<=10, PTL_TRACE_TEXT("max_10bit_constraint_flag" )); - WRITE_FLAG(bitDepthConstraint<= 8, PTL_TRACE_TEXT("max_8bit_constraint_flag" )); - const ChromaFormat chromaFmtConstraint=ptl->getChromaFormatConstraint(); - WRITE_FLAG(chromaFmtConstraint==CHROMA_422||chromaFmtConstraint==CHROMA_420||chromaFmtConstraint==CHROMA_400, PTL_TRACE_TEXT("max_422chroma_constraint_flag" )); - WRITE_FLAG(chromaFmtConstraint==CHROMA_420||chromaFmtConstraint==CHROMA_400, PTL_TRACE_TEXT("max_420chroma_constraint_flag" )); - WRITE_FLAG(chromaFmtConstraint==CHROMA_400, PTL_TRACE_TEXT("max_monochrome_constraint_flag")); - WRITE_FLAG(ptl->getIntraConstraintFlag(), PTL_TRACE_TEXT("intra_constraint_flag" )); - WRITE_FLAG(ptl->getOnePictureOnlyConstraintFlag(), PTL_TRACE_TEXT("one_picture_only_constraint_flag")); - WRITE_FLAG(ptl->getLowerBitRateConstraintFlag(), PTL_TRACE_TEXT("lower_bit_rate_constraint_flag" )); - WRITE_CODE(0 , 16, PTL_TRACE_TEXT("reserved_zero_34bits[0..15]" )); - WRITE_CODE(0 , 16, PTL_TRACE_TEXT("reserved_zero_34bits[16..31]" )); - WRITE_CODE(0 , 2, PTL_TRACE_TEXT("reserved_zero_34bits[32..33]" )); - } - else - { - WRITE_CODE(0x0000 , 16, PTL_TRACE_TEXT("reserved_zero_43bits[0..15]" )); - WRITE_CODE(0x0000 , 16, PTL_TRACE_TEXT("reserved_zero_43bits[16..31]" )); - WRITE_CODE(0x000 , 11, PTL_TRACE_TEXT("reserved_zero_43bits[32..42]" )); - } - WRITE_FLAG(false, PTL_TRACE_TEXT("reserved_zero_bit" )); -#undef PTL_TRACE_TEXT -} -#endif -#if HEVC_TILES_WPP /** * Write tiles and wavefront substreams sizes for the slice header (entry points). * @@ -1586,7 +2159,8 @@ void HLSWriter::codeProfileTier( const ProfileTierLevel* ptl, const bool /*bIsSu */ void HLSWriter::codeTilesWPPEntryPoint( Slice* pSlice ) { - if (!pSlice->getPPS()->getTilesEnabledFlag() && !pSlice->getPPS()->getEntropyCodingSyncEnabledFlag()) + pSlice->setNumEntryPoints( pSlice->getPPS() ); + if( pSlice->getNumEntryPoints() == 0 ) { return; } @@ -1605,10 +2179,9 @@ void HLSWriter::codeTilesWPPEntryPoint( Slice* pSlice ) while (maxOffset >= (1u << (offsetLenMinus1 + 1))) { offsetLenMinus1++; - CHECK(offsetLenMinus1 + 1 >= 32, "Invalid offset lenght minus 1"); + CHECK(offsetLenMinus1 + 1 >= 32, "Invalid offset length minus 1"); } - WRITE_UVLC(pSlice->getNumberOfSubstreamSizes(), "num_entry_point_offsets"); if (pSlice->getNumberOfSubstreamSizes()>0) { WRITE_UVLC(offsetLenMinus1, "offset_len_minus1"); @@ -1619,7 +2192,6 @@ void HLSWriter::codeTilesWPPEntryPoint( Slice* pSlice ) } } } -#endif // ==================================================================================================================== @@ -1708,37 +2280,29 @@ void HLSWriter::xCodePredWeightTable( Slice* pcSlice ) } } -#if HEVC_USE_SCALING_LISTS /** code quantization matrix * \param scalingList quantization matrix information */ void HLSWriter::codeScalingList( const ScalingList &scalingList ) { //for each size - for(uint32_t sizeId = SCALING_LIST_FIRST_CODED; sizeId <= SCALING_LIST_LAST_CODED; sizeId++) + WRITE_FLAG(scalingList.getDisableScalingMatrixForLfnstBlks(), "scaling_matrix_for_lfnst_disabled_flag"); + for (uint32_t scalingListId = 0; scalingListId < 28; scalingListId++) { - const int predListStep = (sizeId == SCALING_LIST_32x32? (SCALING_LIST_NUM/NUMBER_OF_PREDICTION_MODES) : 1); // if 32x32, skip over chroma entries. - - for(uint32_t listId = 0; listId < SCALING_LIST_NUM; listId+=predListStep) + bool scalingListCopyModeFlag = scalingList.getScalingListCopyModeFlag(scalingListId); + WRITE_FLAG(scalingListCopyModeFlag, "scaling_list_copy_mode_flag"); //copy mode + if (!scalingListCopyModeFlag)// Copy Mode { - bool scalingListPredModeFlag = scalingList.getScalingListPredModeFlag(sizeId, listId); - WRITE_FLAG( scalingListPredModeFlag, "scaling_list_pred_mode_flag" ); - if(!scalingListPredModeFlag)// Copy Mode - { - if (sizeId == SCALING_LIST_32x32) - { - // adjust the code, to cope with the missing chroma entries - WRITE_UVLC( ((int)listId - (int)scalingList.getRefMatrixId (sizeId,listId)) / (SCALING_LIST_NUM/NUMBER_OF_PREDICTION_MODES), "scaling_list_pred_matrix_id_delta"); - } - else - { - WRITE_UVLC( (int)listId - (int)scalingList.getRefMatrixId (sizeId,listId), "scaling_list_pred_matrix_id_delta"); - } - } - else// DPCM Mode - { - xCodeScalingList(&scalingList, sizeId, listId); - } + WRITE_FLAG(scalingList.getScalingListPreditorModeFlag(scalingListId), "scaling_list_predictor_mode_flag"); + } + if ((scalingListCopyModeFlag || scalingList.getScalingListPreditorModeFlag(scalingListId)) && scalingListId!= SCALING_LIST_1D_START_2x2 && scalingListId != SCALING_LIST_1D_START_4x4 && scalingListId != SCALING_LIST_1D_START_8x8) + { + WRITE_UVLC((int)scalingListId - (int)scalingList.getRefMatrixId(scalingListId), "scaling_list_pred_matrix_id_delta"); + } + if (!scalingListCopyModeFlag) + { + //DPCM + xCodeScalingList(&scalingList, scalingListId, scalingList.getScalingListPreditorModeFlag(scalingListId)); } } return; @@ -1748,35 +2312,55 @@ void HLSWriter::codeScalingList( const ScalingList &scalingList ) * \param sizeId size index * \param listId list index */ -void HLSWriter::xCodeScalingList(const ScalingList* scalingList, uint32_t sizeId, uint32_t listId) +void HLSWriter::xCodeScalingList(const ScalingList* scalingList, uint32_t scalingListId, bool isPredictor) { - int coefNum = std::min( MAX_MATRIX_COEF_NUM, ( int ) g_scalingListSize[sizeId] ); - uint32_t* scan = g_scanOrder[SCAN_UNGROUPED][SCAN_DIAG][gp_sizeIdxInfo->idxFrom( 1 << ( sizeId == SCALING_LIST_FIRST_CODED ? 2 : 3 ) )][gp_sizeIdxInfo->idxFrom( 1 << ( sizeId == SCALING_LIST_FIRST_CODED ? 2 : 3 ) )]; - int nextCoef = SCALING_LIST_START_VALUE; + int matrixSize = (scalingListId < SCALING_LIST_1D_START_4x4) ? 2 : ((scalingListId < SCALING_LIST_1D_START_8x8) ? 4 : 8); + int coefNum = matrixSize * matrixSize; + ScanElement *scan = g_scanOrder[SCAN_UNGROUPED][SCAN_DIAG][gp_sizeIdxInfo->idxFrom(matrixSize)][gp_sizeIdxInfo->idxFrom(matrixSize)]; + int nextCoef = (isPredictor) ? 0 : SCALING_LIST_START_VALUE; + int data; - const int *src = scalingList->getScalingListAddress(sizeId, listId); - if( sizeId > SCALING_LIST_8x8 ) + const int *src = scalingList->getScalingListAddress(scalingListId); + int PredListId = scalingList->getRefMatrixId(scalingListId); + const int *srcPred = (isPredictor) ? ((scalingListId==PredListId) ? scalingList->getScalingListDefaultAddress(scalingListId) : scalingList->getScalingListAddress(PredListId)) : NULL; + int deltasrc[65] = { 0 }; + + if (isPredictor) { - WRITE_SVLC( scalingList->getScalingListDC(sizeId,listId) - 8, "scaling_list_dc_coef_minus8"); - nextCoef = scalingList->getScalingListDC(sizeId,listId); + if (scalingListId >= SCALING_LIST_1D_START_16x16) + { + deltasrc[64] = scalingList->getScalingListDC(scalingListId) - ((PredListId >= SCALING_LIST_1D_START_16x16) ? ((scalingListId == PredListId) ? 16 : scalingList->getScalingListDC(PredListId)) : srcPred[scan[0].idx]); + } + for (int i = 0; i < coefNum; i++) + { + deltasrc[i] = (src[scan[i].idx] - srcPred[scan[i].idx]); + } } - for(int i=0;i<coefNum;i++) + if (scalingListId >= SCALING_LIST_1D_START_16x16) { - data = src[scan[i]] - nextCoef; - nextCoef = src[scan[i]]; - if(data > 127) + if (isPredictor) { - data = data - 256; + data = deltasrc[64]; + nextCoef = deltasrc[64]; } - if(data < -128) + else { - data = data + 256; + data = scalingList->getScalingListDC(scalingListId) - nextCoef; + nextCoef = scalingList->getScalingListDC(scalingListId); } - - WRITE_SVLC( data, "scaling_list_delta_coef"); + data = ((data + 128) & 255) - 128; + WRITE_SVLC((int8_t)data, "scaling_list_dc_coef"); + } + for(int i=0;i<coefNum;i++) + { + if (scalingListId >= SCALING_LIST_1D_START_64x64 && scan[i].x >= 4 && scan[i].y >= 4) + continue; + data = (isPredictor) ? (deltasrc[i] - nextCoef) : (src[scan[i].idx] - nextCoef); + nextCoef = (isPredictor) ? deltasrc[i] : src[scan[i].idx]; + data = ((data + 128) & 255) - 128; + WRITE_SVLC((int8_t)data, "scaling_list_delta_coef"); } } -#endif bool HLSWriter::xFindMatchingLTRP(Slice* pcSlice, uint32_t *ltrpsIndex, int ltrpPOC, bool usedFlag) { @@ -1793,174 +2377,62 @@ bool HLSWriter::xFindMatchingLTRP(Slice* pcSlice, uint32_t *ltrpsIndex, int ltrp return false; } - -void HLSWriter::alfGolombEncode( int coeff, int k ) +void HLSWriter::alfGolombEncode( int coeff, int k, const bool signed_coeff ) { - int symbol = abs( coeff ); - - int m = (int)pow( 2.0, k ); - int q = symbol / m; - - for( int i = 0; i < q; i++ ) + unsigned int symbol = abs( coeff ); + while ( symbol >= (unsigned int)( 1 << k ) ) { - xWriteFlag( 1 ); + symbol -= 1 << k; + k++; + WRITE_FLAG( 0, "alf_coeff_abs_prefix" ); } - xWriteFlag( 0 ); - // write one zero + WRITE_FLAG( 1, "alf_coeff_abs_prefix" ); - for( int i = 0; i < k; i++ ) + if ( k > 0 ) { - xWriteFlag( symbol & 0x01 ); - symbol >>= 1; + WRITE_CODE( symbol, k, "alf_coeff_abs_suffix" ); } - - if( coeff != 0 ) + if ( signed_coeff && coeff != 0 ) { - int sign = ( coeff > 0 ) ? 1 : 0; - xWriteFlag( sign ); + WRITE_FLAG( (coeff < 0) ? 1 : 0, "alf_coeff_sign" ); } } -void HLSWriter::alfFilter( const AlfSliceParam& alfSliceParam, const bool isChroma ) +void HLSWriter::alfFilter( const AlfParam& alfParam, const bool isChroma, const int altIdx ) { - if( !isChroma ) - { - WRITE_FLAG( alfSliceParam.alfLumaCoeffDeltaFlag, "alf_luma_coeff_delta_flag" ); - if( !alfSliceParam.alfLumaCoeffDeltaFlag ) - { - if( alfSliceParam.numLumaFilters > 1 ) - { - WRITE_FLAG( alfSliceParam.alfLumaCoeffDeltaPredictionFlag, "alf_luma_coeff_delta_prediction_flag" ); - } - } - } - - static int bitsCoeffScan[EncAdaptiveLoopFilter::m_MAX_SCAN_VAL][EncAdaptiveLoopFilter::m_MAX_EXP_GOLOMB]; - memset( bitsCoeffScan, 0, sizeof( bitsCoeffScan ) ); - AlfFilterShape alfShape( isChroma ? 5 : 7 ); - const int maxGolombIdx = AdaptiveLoopFilter::getMaxGolombIdx( alfShape.filterType ); - const short* coeff = isChroma ? alfSliceParam.chromaCoeff : alfSliceParam.lumaCoeff; - const int numFilters = isChroma ? 1 : alfSliceParam.numLumaFilters; + AlfFilterShape alfShape(isChroma ? 5 : 7); + const short* coeff = isChroma ? alfParam.chromaCoeff[altIdx] : alfParam.lumaCoeff; + const short* clipp = isChroma ? alfParam.chromaClipp[altIdx] : alfParam.lumaClipp; + const int numFilters = isChroma ? 1 : alfParam.numLumaFilters; // vlc for all - for( int ind = 0; ind < numFilters; ++ind ) - { - if( isChroma || !alfSliceParam.alfLumaCoeffDeltaFlag || alfSliceParam.alfLumaCoeffFlag[ind] ) - { - for( int i = 0; i < alfShape.numCoeff - 1; i++ ) - { - int coeffVal = abs( coeff[ind * MAX_NUM_ALF_LUMA_COEFF + i] ); - - for( int k = 1; k < 15; k++ ) - { - bitsCoeffScan[alfShape.golombIdx[i]][k] += EncAdaptiveLoopFilter::lengthGolomb( coeffVal, k ); - } - } - } - } - - static int kMinTab[MAX_NUM_ALF_COEFF]; - int kMin = EncAdaptiveLoopFilter::getGolombKMin( alfShape, numFilters, kMinTab, bitsCoeffScan ); - - // Golomb parameters - WRITE_UVLC( kMin - 1, isChroma ? "alf_chroma_min_eg_order_minus1" : "alf_luma_min_eg_order_minus1" ); - - for( int idx = 0; idx < maxGolombIdx; idx++ ) - { - bool golombOrderIncreaseFlag = ( kMinTab[idx] != kMin ) ? true : false; - CHECK( !( kMinTab[idx] <= kMin + 1 ), "ALF Golomb parameter not consistent" ); - WRITE_FLAG( golombOrderIncreaseFlag, isChroma ? "alf_chroma_eg_order_increase_flag" : "alf_luma_eg_order_increase_flag" ); - kMin = kMinTab[idx]; - } - - if( !isChroma ) - { - if( alfSliceParam.alfLumaCoeffDeltaFlag ) - { - for( int ind = 0; ind < numFilters; ++ind ) - { - WRITE_FLAG( alfSliceParam.alfLumaCoeffFlag[ind], "alf_luma_coeff_flag[i]" ); - } - } - } // Filter coefficients for( int ind = 0; ind < numFilters; ++ind ) { - if( !isChroma && !alfSliceParam.alfLumaCoeffFlag[ind] && alfSliceParam.alfLumaCoeffDeltaFlag ) - { - continue; - } for( int i = 0; i < alfShape.numCoeff - 1; i++ ) { - alfGolombEncode( coeff[ind* MAX_NUM_ALF_LUMA_COEFF + i], kMinTab[alfShape.golombIdx[i]] ); // alf_coeff_chroma[i], alf_coeff_luma_delta[i][j] + alfGolombEncode( coeff[ind* MAX_NUM_ALF_LUMA_COEFF + i], 3 ); // alf_coeff_chroma[i], alf_coeff_luma_delta[i][j] } } -} -void HLSWriter::xWriteTruncBinCode( uint32_t uiSymbol, const int uiMaxSymbol ) -{ - int uiThresh; - if( uiMaxSymbol > 256 ) + // Clipping values coding +#if JVET_Q0249_ALF_CHROMA_CLIPFLAG + if( alfParam.nonLinearFlag[isChroma] ) +#else + if( alfParam.nonLinearFlag[isChroma][altIdx] ) +#endif { - int uiThreshVal = 1 << 8; - uiThresh = 8; - while( uiThreshVal <= uiMaxSymbol ) + for (int ind = 0; ind < numFilters; ++ind) { - uiThresh++; - uiThreshVal <<= 1; + for (int i = 0; i < alfShape.numCoeff - 1; i++) + { + WRITE_CODE(clipp[ind* MAX_NUM_ALF_LUMA_COEFF + i], 2, "alf_clipping_index"); + } } - uiThresh--; - } - else - { - uiThresh = g_tbMax[uiMaxSymbol]; - } - - int uiVal = 1 << uiThresh; - assert( uiVal <= uiMaxSymbol ); - assert( ( uiVal << 1 ) > uiMaxSymbol ); - assert( uiSymbol < uiMaxSymbol ); - int b = uiMaxSymbol - uiVal; - assert( b < uiVal ); - if( uiSymbol < uiVal - b ) - { - xWriteCode( uiSymbol, uiThresh ); - } - else - { - uiSymbol += uiVal - b; - assert( uiSymbol < ( uiVal << 1 ) ); - assert( ( uiSymbol >> 1 ) >= uiVal - b ); - xWriteCode( uiSymbol, uiThresh + 1 ); } } -void HLSWriter::truncatedUnaryEqProb( int symbol, const int maxSymbol ) -{ - if( maxSymbol == 0 ) - { - return; - } - - bool codeLast = ( maxSymbol > symbol ); - int bins = 0; - int numBins = 0; - - while( symbol-- ) - { - bins <<= 1; - bins++; - numBins++; - } - if( codeLast ) - { - bins <<= 1; - numBins++; - } - CHECK( !( numBins <= 32 ), "Unspecified error" ); - xWriteCode( bins, numBins ); -} //! \} diff --git a/source/Lib/EncoderLib/VLCWriter.h b/source/Lib/EncoderLib/VLCWriter.h index 2ec729bde447451bb29916fcabeaaa337a05bb17..7816710363be97eb40bad3bca4f212ca57290756 100644 --- a/source/Lib/EncoderLib/VLCWriter.h +++ b/source/Lib/EncoderLib/VLCWriter.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * -* Copyright (c) 2010-2019, ITU/ISO/IEC +* Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -49,6 +49,7 @@ #if ENABLE_TRACING +#define WRITE_SCODE( value, length, name) xWriteSCodeTr ( value, length, name ) #define WRITE_CODE( value, length, name) xWriteCodeTr ( value, length, name ) #define WRITE_UVLC( value, name) xWriteUvlcTr ( value, name ) #define WRITE_SVLC( value, name) xWriteSvlcTr ( value, name ) @@ -56,7 +57,7 @@ extern bool g_HLSTraceEnable; #else - +#define WRITE_SCODE( value, length, name) xWriteSCode ( value, length ) #define WRITE_CODE( value, length, name) xWriteCode ( value, length ) #define WRITE_UVLC( value, name) xWriteUvlc ( value ) #define WRITE_SVLC( value, name) xWriteSvlc ( value ) @@ -76,21 +77,20 @@ protected: virtual ~VLCWriter() {} void setBitstream ( OutputBitstream* p ) { m_pcBitIf = p; } - + void xWriteSCode ( int code, uint32_t length ); void xWriteCode ( uint32_t uiCode, uint32_t uiLength ); void xWriteUvlc ( uint32_t uiCode ); void xWriteSvlc ( int iCode ); void xWriteFlag ( uint32_t uiCode ); #if ENABLE_TRACING + void xWriteSCodeTr ( int value, uint32_t length, const char *pSymbolName); void xWriteCodeTr ( uint32_t value, uint32_t length, const char *pSymbolName); void xWriteUvlcTr ( uint32_t value, const char *pSymbolName); void xWriteSvlcTr ( int value, const char *pSymbolName); void xWriteFlagTr ( uint32_t value, const char *pSymbolName); #endif void xWriteRbspTrailingBits(); -#if JVET_M0101_HLS bool isByteAligned() { return (m_pcBitIf->getNumBitsUntilByteAligned() == 0); } ; -#endif }; @@ -113,46 +113,35 @@ public: virtual ~HLSWriter() {} private: - void xCodeShortTermRefPicSet ( const ReferencePictureSet* pcRPS, bool calledFromSliceHeader, int idx ); + void xCodeRefPicList( const ReferencePictureList* rpl, bool isLongTermPresent, uint32_t ltLsbBitsCount, const bool isForbiddenZeroDeltaPoc ); bool xFindMatchingLTRP ( Slice* pcSlice, uint32_t *ltrpsIndex, int ltrpPOC, bool usedFlag ); void xCodePredWeightTable ( Slice* pcSlice ); -#if HEVC_USE_SCALING_LISTS - void xCodeScalingList ( const ScalingList* scalingList, uint32_t sizeId, uint32_t listId); -#endif + void xCodeScalingList ( const ScalingList* scalingList, uint32_t scalinListId, bool isPredictor); public: void setBitstream ( OutputBitstream* p ) { m_pcBitIf = p; } uint32_t getNumberOfWrittenBits () { return m_pcBitIf->getNumberOfWrittenBits(); } void codeVUI ( const VUI *pcVUI, const SPS* pcSPS ); void codeSPS ( const SPS* pcSPS ); - void codePPS ( const PPS* pcPPS ); - void codeAPS ( APS* pcAPS); -#if HEVC_VPS + void codePPS ( const PPS* pcPPS, const SPS* pcSPS ); + void codeAPS ( APS* pcAPS ); + void codeAlfAps ( APS* pcAPS ); + void codeLmcsAps ( APS* pcAPS ); + void codeScalingListAps ( APS* pcAPS ); void codeVPS ( const VPS* pcVPS ); -#endif + void codeDPS ( const DPS* dps ); + void codePictureHeader ( PicHeader* picHeader ); void codeSliceHeader ( Slice* pcSlice ); -#if !JVET_M0101_HLS - void codePTL ( const PTL* pcPTL, bool profilePresentFlag, int maxNumSubLayersMinus1); - void codeProfileTier ( const ProfileTierLevel* ptl, const bool bIsSubLayer ); -#else void codeConstraintInfo ( const ConstraintInfo* cinfo ); void codeProfileTierLevel ( const ProfileTierLevel* ptl, int maxNumSubLayersMinus1 ); -#endif - void codeHrdParameters ( const HRD *hrd, bool commonInfPresentFlag, uint32_t maxNumSubLayersMinus1 ); -#if HEVC_TILES_WPP + void codeHrdParameters ( const HRDParameters *hrd, const uint32_t firstSubLayer, const uint32_t maxNumSubLayersMinus1); + void codeTilesWPPEntryPoint ( Slice* pSlice ); -#endif -#if HEVC_USE_SCALING_LISTS void codeScalingList ( const ScalingList &scalingList ); -#endif - void alfFilter( const AlfSliceParam& alfSliceParam, const bool isChroma ); + void alfFilter( const AlfParam& alfParam, const bool isChroma, const int altIdx ); private: - void xWriteTruncBinCode( uint32_t uiSymbol, const int uiMaxSymbol ); - void alfGolombEncode( const int coeff, const int k ); - void truncatedUnaryEqProb( int symbol, int maxSymbol ); - - void codeReshaper ( const SliceReshapeInfo& pSliceReshaperInfo, const SPS* pcSPS, const bool isIntra); + void alfGolombEncode( const int coeff, const int k, const bool signed_coeff=true ); }; //! \} diff --git a/source/Lib/EncoderLib/WeightPredAnalysis.cpp b/source/Lib/EncoderLib/WeightPredAnalysis.cpp index d6e28387a28e4388ec1bf4b5d05f636b0114e2a3..5117c79d98d9a4e5cc9b05c7c79e576a55bc439f 100644 --- a/source/Lib/EncoderLib/WeightPredAnalysis.cpp +++ b/source/Lib/EncoderLib/WeightPredAnalysis.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/source/Lib/EncoderLib/WeightPredAnalysis.h b/source/Lib/EncoderLib/WeightPredAnalysis.h index ca3a1e95e909cbf6bc5509fbdd0a4a8f4410239c..76c91be5f30d5966dfef874927d97c1ce8f50a9c 100644 --- a/source/Lib/EncoderLib/WeightPredAnalysis.h +++ b/source/Lib/EncoderLib/WeightPredAnalysis.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/source/Lib/Utilities/ColourRemapping.cpp b/source/Lib/Utilities/ColourRemapping.cpp index 480f7ab2b261e4d8ce198bcf92a1c08599fc32c1..0466dc6caa282580ce7393d3a66a81a4b4a465f8 100644 --- a/source/Lib/Utilities/ColourRemapping.cpp +++ b/source/Lib/Utilities/ColourRemapping.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -40,6 +40,8 @@ #include <stdio.h> #include <fcntl.h> +#if HEVC_SEI + #include "ColourRemapping.h" #include "DecoderLib/AnnexBread.h" #include "DecoderLib/NALread.h" @@ -413,3 +415,5 @@ void applyColourRemapping(const PelUnitBuf& pic, SEIColourRemappingInfo& criSEI, } //! \} +#endif + diff --git a/source/Lib/Utilities/ColourRemapping.h b/source/Lib/Utilities/ColourRemapping.h index f090191448477fa9835cc619c4c74bd6d69f0753..41901ebeba8d6fd43ff21f080803b9ab44c2043d 100644 --- a/source/Lib/Utilities/ColourRemapping.h +++ b/source/Lib/Utilities/ColourRemapping.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -42,6 +42,8 @@ #pragma once #endif // _MSC_VER > 1000 +#if HEVC_SEI + #include "CommonLib/Picture.h" #include "CommonLib/SEI.h" #include <fstream> @@ -71,3 +73,4 @@ public: #endif +#endif diff --git a/source/Lib/Utilities/VideoIOYuv.cpp b/source/Lib/Utilities/VideoIOYuv.cpp index e6808bf71094ee8fca22b48db7e7fb104b6a80f1..720d06212579459767a676b6174502cee7cb3caa 100644 --- a/source/Lib/Utilities/VideoIOYuv.cpp +++ b/source/Lib/Utilities/VideoIOYuv.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -453,7 +453,7 @@ static bool verifyPlane(Pel* dst, * @param fileBitDepth component bit depth in file * @return true for success, false in case of error */ -static bool writePlane(ostream& fd, const Pel* src, +static bool writePlane( uint32_t orgWidth, uint32_t orgHeight, ostream& fd, const Pel* src, const bool is16bit, const uint32_t stride_src, uint32_t width444, uint32_t height444, @@ -471,7 +471,10 @@ static bool writePlane(ostream& fd, const Pel* src, const uint32_t width_file = width444 >> csx_file; const uint32_t height_file = height444 >> csy_file; const bool writePYUV = (packedYUVOutputMode > 0) && (fileBitDepth == 10 || fileBitDepth == 12) && ((width_file & (1 + (fileBitDepth & 3))) == 0); - const uint32_t stride_file = writePYUV ? (width444 * fileBitDepth) >> (csx_file + 3) : (width444 * (is16bit ? 2 : 1)) >> csx_file; + + CHECK( writePYUV, "Not supported" ); + CHECK( csx_file != csx_src, "Not supported" ); + const uint32_t stride_file = writePYUV ? ( orgWidth * fileBitDepth ) >> ( csx_file + 3 ) : ( orgWidth * ( is16bit ? 2 : 1 ) ) >> csx_file; std::vector<uint8_t> bufVec(stride_file); uint8_t *buf=&(bufVec[0]); @@ -669,6 +672,41 @@ static bool writePlane(ostream& fd, const Pel* src, pSrcBuf += srcbuf_stride; } } + + // here height444 and orgHeight are luma heights + for( uint32_t y444 = height444; y444 < orgHeight; y444++ ) + { + if( ( y444 & mask_y_file ) == 0 ) // if this is chroma, determine whether to skip every other row + { + + if( !is16bit ) + { + for( uint32_t x = 0; x < ( orgWidth >> csx_file ); x++ ) + { + buf[x] = 0; + } + } + else + { + for( uint32_t x = 0; x < ( orgWidth >> csx_file ); x++ ) + { + buf[2 * x] = 0; + buf[2 * x + 1] = 0; + } + } + fd.write( reinterpret_cast<const char*>( buf ), stride_file ); + if( fd.eof() || fd.fail() ) + { + return false; + } + } + + if( ( y444 & mask_y_src ) == 0 ) + { + pSrcBuf += srcbuf_stride; + } + } + } return true; } @@ -903,6 +941,9 @@ bool VideoIOYuv::read ( PelUnitBuf& pic, PelUnitBuf& picOrg, const InputColourSp #else ColourSpaceConvert( picOrg, pic, ipcsc, true); #endif + + picOrg.copyFrom(pic); + return true; } @@ -919,7 +960,8 @@ bool VideoIOYuv::read ( PelUnitBuf& pic, PelUnitBuf& picOrg, const InputColourSp * @param format chroma format * @return true for success, false in case of error */ -bool VideoIOYuv::write( const CPelUnitBuf& pic, + // here orgWidth and orgHeight are for luma +bool VideoIOYuv::write( uint32_t orgWidth, uint32_t orgHeight, const CPelUnitBuf& pic, const InputColourSpaceConversion ipCSC, const bool bPackedYUVOutputMode, int confLeft, int confRight, int confTop, int confBottom, ChromaFormat format, const bool bClipToRec709 ) @@ -993,7 +1035,7 @@ bool VideoIOYuv::write( const CPelUnitBuf& pic, const uint32_t csy = ::getComponentScaleY(compID, format); const CPelBuf area = picO.get(compID); const int planeOffset = (confLeft >> csx) + (confTop >> csy) * area.stride; - if (!writePlane (m_cHandle, area.bufAt (0, 0) + planeOffset, is16bit, area.stride, + if( !writePlane( orgWidth, orgHeight, m_cHandle, area.bufAt( 0, 0 ) + planeOffset, is16bit, area.stride, width444, height444, compID, picO.chromaFormat, format, m_fileBitdepth[ch], bPackedYUVOutputMode ? 1 : 0)) { @@ -1086,9 +1128,9 @@ bool VideoIOYuv::write( const CPelUnitBuf& picTop, const CPelUnitBuf& picBottom, const uint32_t width444 = areaTopY.width - (confLeft + confRight); const uint32_t height444 = areaTopY.height - (confTop + confBottom); - CHECK(areaTop.width == areaBottom.width , "Incompatible formats"); - CHECK(areaTop.height == areaBottom.height, "Incompatible formats"); - CHECK(areaTop.stride == areaBottom.stride, "Incompatible formats"); + CHECK(areaTop.width != areaBottom.width , "Incompatible formats"); + CHECK(areaTop.height != areaBottom.height, "Incompatible formats"); + CHECK(areaTop.stride != areaBottom.stride, "Incompatible formats"); if ((width444 == 0) || (height444 == 0)) { @@ -1177,4 +1219,86 @@ void VideoIOYuv::ColourSpaceConvert(const CPelUnitBuf &src, PelUnitBuf &dest, co } } +bool VideoIOYuv::writeUpscaledPicture( const SPS& sps, const PPS& pps, const CPelUnitBuf& pic, const InputColourSpaceConversion ipCSC, const bool bPackedYUVOutputMode, int outputChoice, ChromaFormat format, const bool bClipToRec709 ) +{ + ChromaFormat chromaFormatIDC = sps.getChromaFormatIdc(); + bool ret = false; + + static Window confFullResolution; + static Window afterScaleWindowFullResolution; + + // decoder does not have information about upscaled picture scaling and conformance windows, store this information when full resolution picutre is encountered + if( sps.getMaxPicWidthInLumaSamples() == pps.getPicWidthInLumaSamples() && sps.getMaxPicHeightInLumaSamples() == pps.getPicHeightInLumaSamples() ) + { + afterScaleWindowFullResolution = pps.getScalingWindow(); + afterScaleWindowFullResolution = pps.getConformanceWindow(); + } + + if( outputChoice && ( sps.getMaxPicWidthInLumaSamples() != pic.get( COMPONENT_Y ).width || sps.getMaxPicHeightInLumaSamples() != pic.get( COMPONENT_Y ).height ) ) + { + if( outputChoice == 2 ) + { + PelStorage upscaledPic; + upscaledPic.create( chromaFormatIDC, Area( Position(), Size( sps.getMaxPicWidthInLumaSamples(), sps.getMaxPicHeightInLumaSamples() ) ) ); + +#if JVET_Q0487_SCALING_WINDOW_ISSUES + int curPicWidth = sps.getMaxPicWidthInLumaSamples() - SPS::getWinUnitX( sps.getChromaFormatIdc() ) * ( afterScaleWindowFullResolution.getWindowLeftOffset() + afterScaleWindowFullResolution.getWindowRightOffset() ); + int curPicHeight = sps.getMaxPicHeightInLumaSamples() - SPS::getWinUnitY( sps.getChromaFormatIdc() ) * ( afterScaleWindowFullResolution.getWindowTopOffset() + afterScaleWindowFullResolution.getWindowBottomOffset() ); +#else + int curPicWidth = sps.getMaxPicWidthInLumaSamples() - afterScaleWindowFullResolution.getWindowLeftOffset() - afterScaleWindowFullResolution.getWindowRightOffset(); + int curPicHeight = sps.getMaxPicHeightInLumaSamples() - afterScaleWindowFullResolution.getWindowTopOffset() - afterScaleWindowFullResolution.getWindowBottomOffset(); +#endif + + const Window& beforeScalingWindow = pps.getScalingWindow(); +#if JVET_Q0487_SCALING_WINDOW_ISSUES + int refPicWidth = pps.getPicWidthInLumaSamples() - SPS::getWinUnitX( sps.getChromaFormatIdc() ) * ( beforeScalingWindow.getWindowLeftOffset() + beforeScalingWindow.getWindowRightOffset() ); + int refPicHeight = pps.getPicHeightInLumaSamples() - SPS::getWinUnitY( sps.getChromaFormatIdc() ) * ( beforeScalingWindow.getWindowTopOffset() + beforeScalingWindow.getWindowBottomOffset() ); +#else + int refPicWidth = pps.getPicWidthInLumaSamples() - beforeScalingWindow.getWindowLeftOffset() - beforeScalingWindow.getWindowRightOffset(); + int refPicHeight = pps.getPicHeightInLumaSamples() - beforeScalingWindow.getWindowTopOffset() - beforeScalingWindow.getWindowBottomOffset(); +#endif + + int xScale = ( ( refPicWidth << SCALE_RATIO_BITS ) + ( curPicWidth >> 1 ) ) / curPicWidth; + int yScale = ( ( refPicHeight << SCALE_RATIO_BITS ) + ( curPicHeight >> 1 ) ) / curPicHeight; + + Picture::rescalePicture( std::pair<int, int>( xScale, yScale ), pic, pps.getScalingWindow(), upscaledPic, afterScaleWindowFullResolution, chromaFormatIDC, sps.getBitDepths(), false, false, sps.getHorCollocatedChromaFlag(), sps.getVerCollocatedChromaFlag() ); + ret = write( sps.getMaxPicWidthInLumaSamples(), sps.getMaxPicHeightInLumaSamples(), upscaledPic, + ipCSC, + bPackedYUVOutputMode, + confFullResolution.getWindowLeftOffset() * SPS::getWinUnitX( chromaFormatIDC ), + confFullResolution.getWindowRightOffset() * SPS::getWinUnitX( chromaFormatIDC ), + confFullResolution.getWindowTopOffset() * SPS::getWinUnitY( chromaFormatIDC ), + confFullResolution.getWindowBottomOffset() * SPS::getWinUnitY( chromaFormatIDC ), + NUM_CHROMA_FORMAT, bClipToRec709 ); + } + else + { + const Window &conf = pps.getConformanceWindow(); + + ret = write( sps.getMaxPicWidthInLumaSamples(), sps.getMaxPicHeightInLumaSamples(), pic, + ipCSC, + bPackedYUVOutputMode, + conf.getWindowLeftOffset() * SPS::getWinUnitX( chromaFormatIDC ), + conf.getWindowRightOffset() * SPS::getWinUnitX( chromaFormatIDC ), + conf.getWindowTopOffset() * SPS::getWinUnitY( chromaFormatIDC ), + conf.getWindowBottomOffset() * SPS::getWinUnitY( chromaFormatIDC ), + NUM_CHROMA_FORMAT, bClipToRec709 ); + } + } + else + { + const Window &conf = pps.getConformanceWindow(); + + ret = write( pic.get( COMPONENT_Y ).width, pic.get( COMPONENT_Y ).height, pic, + ipCSC, + bPackedYUVOutputMode, + conf.getWindowLeftOffset() * SPS::getWinUnitX( chromaFormatIDC ), + conf.getWindowRightOffset() * SPS::getWinUnitX( chromaFormatIDC ), + conf.getWindowTopOffset() * SPS::getWinUnitY( chromaFormatIDC ), + conf.getWindowBottomOffset() * SPS::getWinUnitY( chromaFormatIDC ), + NUM_CHROMA_FORMAT, bClipToRec709 ); + } + + return ret; +} diff --git a/source/Lib/Utilities/VideoIOYuv.h b/source/Lib/Utilities/VideoIOYuv.h index 150ecced5da91190f2a3d8b2b5dfa4ab45d84f86..27504973ee54b0f2248b64a08d54927e89ce78e8 100644 --- a/source/Lib/Utilities/VideoIOYuv.h +++ b/source/Lib/Utilities/VideoIOYuv.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -50,6 +50,9 @@ using namespace std; // Class definition // ==================================================================================================================== +#include "CommonLib/Slice.h" +#include "CommonLib/Picture.h" + /// YUV file I/O class class VideoIOYuv { @@ -77,7 +80,7 @@ public: bool read ( PelUnitBuf& pic, PelUnitBuf& picOrg, const InputColourSpaceConversion ipcsc, int aiPad[2], ChromaFormat fileFormat=NUM_CHROMA_FORMAT, const bool bClipToRec709=false ); ///< read one frame with padding parameter // If fileFormat=NUM_CHROMA_FORMAT, use the format defined by pPicYuv - bool write( const CPelUnitBuf& pic, + bool write( uint32_t orgWidth, uint32_t orgHeight, const CPelUnitBuf& pic, const InputColourSpaceConversion ipCSC, const bool bPackedYUVOutputMode, int confLeft = 0, int confRight = 0, int confTop = 0, int confBottom = 0, ChromaFormat format = NUM_CHROMA_FORMAT, const bool bClipToRec709 = false ); ///< write one YUV frame with padding parameter @@ -92,7 +95,10 @@ public: bool isEof (); ///< check for end-of-file bool isFail(); ///< check for failure + bool isOpen() { return m_cHandle.is_open(); } + bool writeUpscaledPicture( const SPS& sps, const PPS& pps, const CPelUnitBuf& pic, + const InputColourSpaceConversion ipCSC, const bool bPackedYUVOutputMode, int outputChoice = 0, ChromaFormat format = NUM_CHROMA_FORMAT, const bool bClipToRec709 = false ); ///< write one upsaled YUV frame }; diff --git a/source/Lib/Utilities/program_options_lite.cpp b/source/Lib/Utilities/program_options_lite.cpp index 0c4bba0502cc08c2caa01b4cd61f66554dfe30ab..859c59117d4d8cbb09ff21397a9b00e514bad74f 100644 --- a/source/Lib/Utilities/program_options_lite.cpp +++ b/source/Lib/Utilities/program_options_lite.cpp @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -96,8 +96,22 @@ namespace df } else { +#if JVET_O0549_ENCODER_ONLY_FILTER_POL + if (opt_name.size() > 0 && opt_name.back() == '*') + { + string prefix_name = opt_name.substr(0, opt_name.size() - 1); + names->opt_prefix.push_back(prefix_name); + opt_prefix_map[prefix_name].push_back(names); + } + else + { + names->opt_long.push_back(opt_name); + opt_long_map[opt_name].push_back(names); + } +#else names->opt_long.push_back(opt_name); opt_long_map[opt_name].push_back(names); +#endif } opt_start += opt_end + 1; } @@ -150,6 +164,12 @@ namespace df { out << "--" << entry.opt_long.front(); } +#if JVET_O0549_ENCODER_ONLY_FILTER_POL + else if (!entry.opt_prefix.empty()) + { + out << "--" << entry.opt_prefix.front() << "*"; + } +#endif } /* format the help text */ @@ -271,6 +291,9 @@ namespace df bool OptionWriter::storePair(bool allow_long, bool allow_short, const string& name, const string& value) { bool found = false; +#if JVET_O0549_ENCODER_ONLY_FILTER_POL + std::string val = value; +#endif Options::NamesMap::iterator opt_it; if (allow_long) { @@ -290,15 +313,34 @@ namespace df found = true; } } - +#if JVET_O0549_ENCODER_ONLY_FILTER_POL + bool allow_prefix = allow_long; + if (allow_prefix && !found) + { + for (opt_it = opts.opt_prefix_map.begin(); opt_it != opts.opt_prefix_map.end(); opt_it++) + { + std::string name_prefix = name.substr(0, opt_it->first.size()); + if (name_prefix == opt_it->first) + { + // prepend value matching * + val = name.substr(name_prefix.size()) + std::string(" ") + val; + found = true; + break; + } + } + } +#endif if (!found) { error_reporter.error(where()) << "Unknown option `" << name << "' (value:`" << value << "')\n"; return false; } - +#if JVET_O0549_ENCODER_ONLY_FILTER_POL + setOptions((*opt_it).second, val, error_reporter); +#else setOptions((*opt_it).second, value, error_reporter); +#endif return true; } diff --git a/source/Lib/Utilities/program_options_lite.h b/source/Lib/Utilities/program_options_lite.h index 2ce2bd26ed80c6066ec93401034513b2b4b71b4a..6fc3dd33789460bd2dd28733cf73851f333de439 100644 --- a/source/Lib/Utilities/program_options_lite.h +++ b/source/Lib/Utilities/program_options_lite.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -36,6 +36,8 @@ #include <list> #include <map> +#define JVET_O0549_ENCODER_ONLY_FILTER_POL 1 // JVET-O0549: Encoder-only GOP-based temporal filter. Program Options Lite related changes. + #ifndef __PROGRAM_OPTIONS_LITE__ #define __PROGRAM_OPTIONS_LITE__ @@ -196,6 +198,7 @@ namespace df } std::list<std::string> opt_long; std::list<std::string> opt_short; + std::list<std::string> opt_prefix; OptionBase* opt; }; @@ -207,6 +210,7 @@ namespace df typedef std::map<std::string, NamesPtrList> NamesMap; NamesMap opt_long_map; NamesMap opt_short_map; + NamesMap opt_prefix_map; }; /* Class with templated overloaded operator(), for use by Options::addOptions() */ @@ -228,7 +232,61 @@ namespace df parent.addOption(new Option<T>(name, storage, default_val, desc)); return *this; } + template<typename T> + OptionSpecific& + operator()(const std::string& name, T* storage, T default_val, unsigned uiMaxNum, const std::string& desc = "") + { + std::string cNameBuffer; + std::string cDescriptionBuffer; + + for (unsigned int uiK = 0; uiK < uiMaxNum; uiK++) + { + // it needs to be reset when extra digit is added, e.g. number 10 and above + cNameBuffer.resize(name.size() + 10); + cDescriptionBuffer.resize(desc.size() + 10); + + // isn't there are sprintf function for string?? + sprintf((char*)cNameBuffer.c_str(), name.c_str(), uiK, uiK); + sprintf((char*)cDescriptionBuffer.c_str(), desc.c_str(), uiK, uiK); + size_t pos = cNameBuffer.find_first_of('\0'); + if (pos != std::string::npos) + { + cNameBuffer.resize(pos); + } + + parent.addOption(new Option<T>(cNameBuffer, (storage[uiK]), default_val, cDescriptionBuffer)); + } + + return *this; + } + + template<typename T> + OptionSpecific& + operator()(const std::string& name, T** storage, T default_val, unsigned uiMaxNum, const std::string& desc = "") + { + std::string cNameBuffer; + std::string cDescriptionBuffer; + + for (unsigned int uiK = 0; uiK < uiMaxNum; uiK++) + { + // it needs to be reset when extra digit is added, e.g. number 10 and above + cNameBuffer.resize(name.size() + 10); + cDescriptionBuffer.resize(desc.size() + 10); + + // isn't there are sprintf function for string?? + sprintf((char*)cNameBuffer.c_str(), name.c_str(), uiK, uiK); + sprintf((char*)cDescriptionBuffer.c_str(), desc.c_str(), uiK, uiK); + + size_t pos = cNameBuffer.find_first_of('\0'); + if (pos != std::string::npos) + cNameBuffer.resize(pos); + + parent.addOption(new Option<T>(cNameBuffer, *(storage[uiK]), default_val, cDescriptionBuffer)); + } + + return *this; + } /** * Add option described by name to the parent Options list, * with desc as an optional help description diff --git a/source/Lib/libmd5/MD5.h b/source/Lib/libmd5/MD5.h index 25835bde110b88d353aca3c62499fb1f15f9163e..d41a07424199208d2c75fcdb1e634a955594ee4f 100644 --- a/source/Lib/libmd5/MD5.h +++ b/source/Lib/libmd5/MD5.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/source/Lib/libmd5/libmd5.h b/source/Lib/libmd5/libmd5.h index 3859b049215fd4344d5c760723dda5515482de71..4554c73d5d0235a30a7188719b199af46d5a159f 100644 --- a/source/Lib/libmd5/libmd5.h +++ b/source/Lib/libmd5/libmd5.h @@ -3,7 +3,7 @@ * and contributor rights, including patent rights, and no such rights are * granted under this license. * - * Copyright (c) 2010-2019, ITU/ISO/IEC + * Copyright (c) 2010-2020, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without